aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet')
-rw-r--r--src/vnet/CMakeLists.txt114
-rw-r--r--src/vnet/adj/adj.c2
-rw-r--r--src/vnet/adj/adj_bfd.c2
-rw-r--r--src/vnet/adj/adj_dp.h42
-rw-r--r--src/vnet/adj/adj_glean.c63
-rw-r--r--src/vnet/adj/adj_mcast.c4
-rw-r--r--src/vnet/adj/adj_midchain_delegate.c5
-rw-r--r--src/vnet/adj/adj_nsh.c2
-rw-r--r--src/vnet/adj/rewrite.h4
-rw-r--r--src/vnet/api_errno.h10
-rw-r--r--src/vnet/arp/arp.c39
-rw-r--r--src/vnet/arp/arp_proxy.c6
-rw-r--r--src/vnet/bfd/bfd.api51
-rw-r--r--src/vnet/bfd/bfd_api.c8
-rw-r--r--src/vnet/bfd/bfd_cli.c134
-rw-r--r--src/vnet/bfd/bfd_main.c171
-rw-r--r--src/vnet/bfd/bfd_main.h13
-rw-r--r--src/vnet/bfd/bfd_protocol.h10
-rw-r--r--src/vnet/bfd/bfd_udp.c104
-rw-r--r--src/vnet/bfd/bfd_udp.h4
-rw-r--r--src/vnet/bier/bier_update.c9
-rw-r--r--src/vnet/bonding/bond_api.c16
-rw-r--r--src/vnet/bonding/cli.c18
-rw-r--r--src/vnet/bonding/device.c13
-rw-r--r--src/vnet/bonding/node.c2
-rw-r--r--src/vnet/buffer.h2
-rw-r--r--src/vnet/classify/classify_api.c13
-rw-r--r--src/vnet/classify/flow_classify.c4
-rw-r--r--src/vnet/classify/flow_classify_node.c4
-rw-r--r--src/vnet/classify/in_out_acl.c6
-rw-r--r--src/vnet/classify/ip_classify.c4
-rw-r--r--src/vnet/classify/pcap_classify.h6
-rw-r--r--src/vnet/classify/policer_classify.c4
-rw-r--r--src/vnet/classify/trace_classify.h5
-rw-r--r--src/vnet/classify/vnet_classify.c48
-rw-r--r--src/vnet/classify/vnet_classify.h16
-rw-r--r--src/vnet/crypto/cli.c96
-rw-r--r--src/vnet/crypto/crypto.api21
-rw-r--r--src/vnet/crypto/crypto.c158
-rw-r--r--src/vnet/crypto/crypto.h69
-rw-r--r--src/vnet/crypto/crypto_api.c14
-rw-r--r--src/vnet/crypto/node.c26
-rw-r--r--src/vnet/dev/api.c275
-rw-r--r--src/vnet/dev/api.h68
-rw-r--r--src/vnet/dev/args.c237
-rw-r--r--src/vnet/dev/args.h74
-rw-r--r--src/vnet/dev/cli.c331
-rw-r--r--src/vnet/dev/config.c196
-rw-r--r--src/vnet/dev/counters.c132
-rw-r--r--src/vnet/dev/counters.h128
-rw-r--r--src/vnet/dev/dev.api86
-rw-r--r--src/vnet/dev/dev.c461
-rw-r--r--src/vnet/dev/dev.h753
-rw-r--r--src/vnet/dev/dev_api.c192
-rw-r--r--src/vnet/dev/dev_funcs.h332
-rw-r--r--src/vnet/dev/error.c54
-rw-r--r--src/vnet/dev/errors.h46
-rw-r--r--src/vnet/dev/format.c507
-rw-r--r--src/vnet/dev/handlers.c256
-rw-r--r--src/vnet/dev/log.h22
-rw-r--r--src/vnet/dev/mgmt.h10
-rw-r--r--src/vnet/dev/pci.c458
-rw-r--r--src/vnet/dev/pci.h80
-rw-r--r--src/vnet/dev/port.c748
-rw-r--r--src/vnet/dev/process.c474
-rw-r--r--src/vnet/dev/process.h10
-rw-r--r--src/vnet/dev/queue.c227
-rw-r--r--src/vnet/dev/runtime.c180
-rw-r--r--src/vnet/dev/types.h66
-rw-r--r--src/vnet/devices/af_packet/FEATURE.yaml16
-rw-r--r--src/vnet/devices/af_packet/af_packet.api191
-rw-r--r--src/vnet/devices/af_packet/af_packet.c849
-rw-r--r--src/vnet/devices/af_packet/af_packet.h168
-rw-r--r--src/vnet/devices/af_packet/af_packet_api.c248
-rw-r--r--src/vnet/devices/af_packet/cli.c293
-rw-r--r--src/vnet/devices/af_packet/device.c690
-rw-r--r--src/vnet/devices/af_packet/dir.dox29
-rw-r--r--src/vnet/devices/af_packet/node.c574
-rw-r--r--src/vnet/devices/devices.c25
-rw-r--r--src/vnet/devices/devices.h2
-rw-r--r--src/vnet/devices/netlink.c5
-rw-r--r--src/vnet/devices/pipe/pipe.c19
-rw-r--r--src/vnet/devices/pipe/pipe_api.c2
-rw-r--r--src/vnet/devices/tap/FEATURE.yaml2
-rw-r--r--src/vnet/devices/tap/cli.c14
-rw-r--r--src/vnet/devices/tap/tap.c4
-rw-r--r--src/vnet/devices/tap/tapv2.api4
-rw-r--r--src/vnet/devices/virtio/FEATURE.yaml4
-rw-r--r--src/vnet/devices/virtio/cli.c20
-rw-r--r--src/vnet/devices/virtio/device.c31
-rw-r--r--src/vnet/devices/virtio/node.c53
-rw-r--r--src/vnet/devices/virtio/pci.c75
-rw-r--r--src/vnet/devices/virtio/pci.h11
-rw-r--r--src/vnet/devices/virtio/vhost_user.api201
-rw-r--r--src/vnet/devices/virtio/vhost_user.c2613
-rw-r--r--src/vnet/devices/virtio/vhost_user.h388
-rw-r--r--src/vnet/devices/virtio/vhost_user_api.c352
-rw-r--r--src/vnet/devices/virtio/vhost_user_inline.h496
-rw-r--r--src/vnet/devices/virtio/vhost_user_input.c1474
-rw-r--r--src/vnet/devices/virtio/vhost_user_output.c1145
-rw-r--r--src/vnet/devices/virtio/virtio.api2
-rw-r--r--src/vnet/devices/virtio/virtio.c5
-rw-r--r--src/vnet/devices/virtio/virtio_pci_modern.c4
-rw-r--r--src/vnet/devices/virtio/virtio_pre_input.c3
-rw-r--r--src/vnet/devices/virtio/virtio_process.c2
-rw-r--r--src/vnet/devices/virtio/virtio_std.h2
-rw-r--r--src/vnet/dpo/dpo.c4
-rw-r--r--src/vnet/dpo/dvr_dpo.c9
-rw-r--r--src/vnet/dpo/interface_rx_dpo.c7
-rw-r--r--src/vnet/dpo/interface_tx_dpo.c5
-rw-r--r--src/vnet/dpo/ip6_ll_dpo.c2
-rw-r--r--src/vnet/dpo/l3_proxy_dpo.c5
-rw-r--r--src/vnet/dpo/load_balance.c36
-rw-r--r--src/vnet/dpo/load_balance.h18
-rw-r--r--src/vnet/dpo/mpls_label_dpo.c36
-rw-r--r--src/vnet/dpo/receive_dpo.c7
-rw-r--r--src/vnet/dpo/replicate_dpo.c16
-rw-r--r--src/vnet/dpo/replicate_dpo.h8
-rw-r--r--src/vnet/error.h5
-rw-r--r--src/vnet/ethernet/arp_packet.h2
-rw-r--r--src/vnet/ethernet/init.c4
-rw-r--r--src/vnet/ethernet/interface.c34
-rw-r--r--src/vnet/ethernet/mac_address.c2
-rw-r--r--src/vnet/ethernet/node.c38
-rw-r--r--src/vnet/ethernet/p2p_ethernet.c16
-rw-r--r--src/vnet/ethernet/p2p_ethernet_api.c2
-rw-r--r--src/vnet/ethernet/p2p_ethernet_input.c2
-rw-r--r--src/vnet/ethernet/packet.h2
-rw-r--r--src/vnet/feature/feature.c4
-rw-r--r--src/vnet/feature/feature.h111
-rw-r--r--src/vnet/feature/registration.c2
-rw-r--r--src/vnet/fib/fib.c2
-rw-r--r--src/vnet/fib/fib_api.c7
-rw-r--r--src/vnet/fib/fib_api.h2
-rw-r--r--src/vnet/fib/fib_attached_export.c1
-rw-r--r--src/vnet/fib/fib_bfd.c2
-rw-r--r--src/vnet/fib/fib_entry.h11
-rw-r--r--src/vnet/fib/fib_entry_src.c20
-rw-r--r--src/vnet/fib/fib_entry_src_interface.c81
-rw-r--r--src/vnet/fib/fib_node.c2
-rw-r--r--src/vnet/fib/fib_path.c29
-rw-r--r--src/vnet/fib/fib_table.c45
-rw-r--r--src/vnet/fib/fib_table.h9
-rw-r--r--src/vnet/fib/fib_types.c21
-rw-r--r--src/vnet/fib/fib_types.h7
-rw-r--r--src/vnet/fib/fib_urpf_list.c2
-rw-r--r--src/vnet/fib/fib_walk.c2
-rw-r--r--src/vnet/fib/ip4_fib.c23
-rw-r--r--src/vnet/fib/ip6_fib.c6
-rw-r--r--src/vnet/fib/mpls_fib.c21
-rw-r--r--src/vnet/flow/flow.api6
-rw-r--r--src/vnet/flow/flow.c2
-rw-r--r--src/vnet/flow/flow.h48
-rw-r--r--src/vnet/flow/flow_api.c4
-rw-r--r--src/vnet/flow/flow_cli.c134
-rw-r--r--src/vnet/gre/FEATURE.yaml13
-rw-r--r--src/vnet/gre/error.def23
-rw-r--r--src/vnet/gre/gre.api110
-rw-r--r--src/vnet/gre/gre.c867
-rw-r--r--src/vnet/gre/gre.h443
-rw-r--r--src/vnet/gre/gre_api.c220
-rw-r--r--src/vnet/gre/interface.c845
-rw-r--r--src/vnet/gre/node.c598
-rw-r--r--src/vnet/gre/packet.h2
-rw-r--r--src/vnet/gre/pg.c86
-rw-r--r--src/vnet/gso/FEATURE.yaml2
-rw-r--r--src/vnet/gso/cli.c2
-rw-r--r--src/vnet/gso/gro_func.h5
-rw-r--r--src/vnet/gso/gso.h120
-rw-r--r--src/vnet/gso/gso.rst154
-rw-r--r--src/vnet/gso/hdr_offset_parser.h5
-rw-r--r--src/vnet/gso/node.c162
-rw-r--r--src/vnet/handoff.c4
-rw-r--r--src/vnet/hash/FEATURE.yaml2
-rw-r--r--src/vnet/hash/hash.rst90
-rw-r--r--src/vnet/hdlc/hdlc.c2
-rw-r--r--src/vnet/hdlc/node.c2
-rw-r--r--src/vnet/interface.api55
-rw-r--r--src/vnet/interface.c73
-rw-r--r--src/vnet/interface.h5
-rw-r--r--src/vnet/interface/runtime.c5
-rw-r--r--src/vnet/interface/rx_queue.c18
-rw-r--r--src/vnet/interface/stats.c9
-rw-r--r--src/vnet/interface/tx_queue.rst159
-rw-r--r--src/vnet/interface_api.c150
-rw-r--r--src/vnet/interface_cli.c161
-rw-r--r--src/vnet/interface_format.c6
-rw-r--r--src/vnet/interface_funcs.h4
-rw-r--r--src/vnet/interface_output.c9
-rw-r--r--src/vnet/interface_stats.c2
-rw-r--r--src/vnet/interface_test.c24
-rw-r--r--src/vnet/ip-neighbor/ip4_neighbor.c14
-rw-r--r--src/vnet/ip-neighbor/ip6_neighbor.c9
-rw-r--r--src/vnet/ip-neighbor/ip_neighbor.api36
-rw-r--r--src/vnet/ip-neighbor/ip_neighbor.c87
-rw-r--r--src/vnet/ip-neighbor/ip_neighbor.h2
-rw-r--r--src/vnet/ip-neighbor/ip_neighbor_api.c28
-rw-r--r--src/vnet/ip-neighbor/ip_neighbor_watch.c10
-rw-r--r--src/vnet/ip/icmp4.c11
-rw-r--r--src/vnet/ip/icmp46_packet.h4
-rw-r--r--src/vnet/ip/icmp6.c9
-rw-r--r--src/vnet/ip/ip.api46
-rw-r--r--src/vnet/ip/ip.c5
-rw-r--r--src/vnet/ip/ip4.h2
-rw-r--r--src/vnet/ip/ip46_address.h2
-rw-r--r--src/vnet/ip/ip46_cli.c10
-rw-r--r--src/vnet/ip/ip4_forward.c58
-rw-r--r--src/vnet/ip/ip4_inlines.h14
-rw-r--r--src/vnet/ip/ip4_input.c2
-rw-r--r--src/vnet/ip/ip4_input.h8
-rw-r--r--src/vnet/ip/ip4_options.c2
-rw-r--r--src/vnet/ip/ip4_packet.h8
-rw-r--r--src/vnet/ip/ip4_punt_drop.c14
-rw-r--r--src/vnet/ip/ip4_source_and_port_range_check.c10
-rw-r--r--src/vnet/ip/ip4_to_ip6.h2
-rw-r--r--src/vnet/ip/ip6.h2
-rw-r--r--src/vnet/ip/ip6_forward.c49
-rw-r--r--src/vnet/ip/ip6_hop_by_hop.c18
-rw-r--r--src/vnet/ip/ip6_inlines.h39
-rw-r--r--src/vnet/ip/ip6_input.c2
-rw-r--r--src/vnet/ip/ip6_link.c25
-rw-r--r--src/vnet/ip/ip6_ll_table.c16
-rw-r--r--src/vnet/ip/ip6_ll_types.c6
-rw-r--r--src/vnet/ip/ip6_packet.h7
-rw-r--r--src/vnet/ip/ip6_punt_drop.c14
-rw-r--r--src/vnet/ip/ip6_to_ip4.h2
-rw-r--r--src/vnet/ip/ip_api.c54
-rw-r--r--src/vnet/ip/ip_checksum.c2
-rw-r--r--src/vnet/ip/ip_container_proxy.c6
-rw-r--r--src/vnet/ip/ip_flow_hash.h12
-rw-r--r--src/vnet/ip/ip_frag.c4
-rw-r--r--src/vnet/ip/ip_in_out_acl.c19
-rw-r--r--src/vnet/ip/ip_init.c2
-rw-r--r--src/vnet/ip/ip_interface.c12
-rw-r--r--src/vnet/ip/ip_interface.h2
-rw-r--r--src/vnet/ip/ip_psh_cksum.h6
-rw-r--r--src/vnet/ip/ip_punt_drop.c5
-rw-r--r--src/vnet/ip/ip_test.c6
-rw-r--r--src/vnet/ip/ip_types.c19
-rw-r--r--src/vnet/ip/ip_types.h4
-rw-r--r--src/vnet/ip/lookup.c55
-rw-r--r--src/vnet/ip/lookup.h3
-rw-r--r--src/vnet/ip/punt.c55
-rw-r--r--src/vnet/ip/punt.h6
-rw-r--r--src/vnet/ip/punt_api.c2
-rw-r--r--src/vnet/ip/punt_node.c79
-rw-r--r--src/vnet/ip/reass/ip4_full_reass.c378
-rw-r--r--src/vnet/ip/reass/ip4_sv_reass.c202
-rw-r--r--src/vnet/ip/reass/ip4_sv_reass.h1
-rw-r--r--src/vnet/ip/reass/ip6_full_reass.c16
-rw-r--r--src/vnet/ip/reass/ip6_sv_reass.c173
-rw-r--r--src/vnet/ip/reass/ip6_sv_reass.h1
-rw-r--r--src/vnet/ip/vtep.h4
-rw-r--r--src/vnet/ip6-nd/ip6_mld.c14
-rw-r--r--src/vnet/ip6-nd/ip6_nd.api130
-rw-r--r--src/vnet/ip6-nd/ip6_nd.c6
-rw-r--r--src/vnet/ip6-nd/ip6_nd_api.c173
-rw-r--r--src/vnet/ip6-nd/ip6_nd_inline.h8
-rw-r--r--src/vnet/ip6-nd/ip6_nd_proxy.c4
-rw-r--r--src/vnet/ip6-nd/ip6_nd_test.c57
-rw-r--r--src/vnet/ip6-nd/ip6_ra.c148
-rw-r--r--src/vnet/ip6-nd/ip6_ra.h111
-rw-r--r--src/vnet/ip6-nd/rd_cp.c16
-rw-r--r--src/vnet/ipfix-export/flow_report.c6
-rw-r--r--src/vnet/ipfix-export/flow_report_classify.c7
-rw-r--r--src/vnet/ipip/ipip.c50
-rw-r--r--src/vnet/ipip/ipip_api.c52
-rw-r--r--src/vnet/ipip/ipip_cli.c12
-rw-r--r--src/vnet/ipip/node.c2
-rw-r--r--src/vnet/ipip/sixrd.c6
-rw-r--r--src/vnet/ipsec/ah.h57
-rw-r--r--src/vnet/ipsec/ah_decrypt.c93
-rw-r--r--src/vnet/ipsec/ah_encrypt.c21
-rw-r--r--src/vnet/ipsec/esp.h133
-rw-r--r--src/vnet/ipsec/esp_decrypt.c298
-rw-r--r--src/vnet/ipsec/esp_encrypt.c242
-rw-r--r--src/vnet/ipsec/ipsec.api121
-rw-r--r--src/vnet/ipsec/ipsec.c54
-rw-r--r--src/vnet/ipsec/ipsec.h22
-rw-r--r--src/vnet/ipsec/ipsec_api.c346
-rw-r--r--src/vnet/ipsec/ipsec_cli.c122
-rw-r--r--src/vnet/ipsec/ipsec_format.c174
-rw-r--r--src/vnet/ipsec/ipsec_handoff.c2
-rw-r--r--src/vnet/ipsec/ipsec_input.c410
-rw-r--r--src/vnet/ipsec/ipsec_itf.c19
-rw-r--r--src/vnet/ipsec/ipsec_output.c2
-rw-r--r--src/vnet/ipsec/ipsec_sa.c366
-rw-r--r--src/vnet/ipsec/ipsec_sa.h520
-rw-r--r--src/vnet/ipsec/ipsec_spd.c6
-rw-r--r--src/vnet/ipsec/ipsec_spd.h2
-rw-r--r--src/vnet/ipsec/ipsec_spd_fp_lookup.h60
-rw-r--r--src/vnet/ipsec/ipsec_spd_policy.c242
-rw-r--r--src/vnet/ipsec/ipsec_test.c52
-rw-r--r--src/vnet/ipsec/ipsec_tun.c14
-rw-r--r--src/vnet/ipsec/ipsec_tun_in.c5
-rw-r--r--src/vnet/ipsec/ipsec_types.api47
-rw-r--r--src/vnet/l2/feat_bitmap.c2
-rw-r--r--src/vnet/l2/l2.api49
-rw-r--r--src/vnet/l2/l2_api.c51
-rw-r--r--src/vnet/l2/l2_arp_term.c2
-rw-r--r--src/vnet/l2/l2_bd.c65
-rw-r--r--src/vnet/l2/l2_bd.h3
-rw-r--r--src/vnet/l2/l2_bvi.c6
-rw-r--r--src/vnet/l2/l2_classify.h1
-rw-r--r--src/vnet/l2/l2_efp_filter.c4
-rw-r--r--src/vnet/l2/l2_fib.c23
-rw-r--r--src/vnet/l2/l2_flood.c4
-rw-r--r--src/vnet/l2/l2_fwd.c6
-rw-r--r--src/vnet/l2/l2_in_out_acl.c2
-rw-r--r--src/vnet/l2/l2_in_out_feat_arc.c4
-rw-r--r--src/vnet/l2/l2_input.c10
-rw-r--r--src/vnet/l2/l2_input.h6
-rw-r--r--src/vnet/l2/l2_input_classify.c5
-rw-r--r--src/vnet/l2/l2_input_node.c12
-rw-r--r--src/vnet/l2/l2_input_vtr.c2
-rw-r--r--src/vnet/l2/l2_learn.c4
-rw-r--r--src/vnet/l2/l2_output.c4
-rw-r--r--src/vnet/l2/l2_output_classify.c4
-rw-r--r--src/vnet/l2/l2_patch.c10
-rw-r--r--src/vnet/l2/l2_rw.c82
-rw-r--r--src/vnet/l2/l2_rw.h4
-rw-r--r--src/vnet/l2/l2_test.c13
-rw-r--r--src/vnet/l2/l2_uu_fwd.c2
-rw-r--r--src/vnet/l2/l2_vtr.c4
-rw-r--r--src/vnet/l2/l2_xcrw.c10
-rw-r--r--src/vnet/lawful-intercept/lawful_intercept.c124
-rw-r--r--src/vnet/lawful-intercept/lawful_intercept.h56
-rw-r--r--src/vnet/lawful-intercept/node.c288
-rw-r--r--src/vnet/llc/llc.c2
-rw-r--r--src/vnet/llc/node.c2
-rw-r--r--src/vnet/mfib/mfib_forward.c2
-rw-r--r--src/vnet/mfib/mfib_itf.c6
-rw-r--r--src/vnet/mfib/mfib_types.c4
-rw-r--r--src/vnet/misc.c9
-rw-r--r--src/vnet/mpls/interface.c25
-rw-r--r--src/vnet/mpls/mpls.api20
-rw-r--r--src/vnet/mpls/mpls.c10
-rw-r--r--src/vnet/mpls/mpls.h17
-rw-r--r--src/vnet/mpls/mpls_api.c54
-rw-r--r--src/vnet/mpls/mpls_input.c2
-rw-r--r--src/vnet/mpls/mpls_lookup.c235
-rw-r--r--src/vnet/mpls/mpls_tunnel.c7
-rw-r--r--src/vnet/osi/node.c2
-rw-r--r--src/vnet/pg/cli.c26
-rw-r--r--src/vnet/pg/input.c22
-rw-r--r--src/vnet/pg/pg.api4
-rw-r--r--src/vnet/pg/pg.h4
-rw-r--r--src/vnet/pg/pg_api.c2
-rw-r--r--src/vnet/pg/stream.c15
-rw-r--r--src/vnet/policer/node_funcs.c6
-rw-r--r--src/vnet/policer/police.h8
-rw-r--r--src/vnet/policer/police_inlines.h2
-rw-r--r--src/vnet/policer/policer.api90
-rw-r--r--src/vnet/policer/policer.c530
-rw-r--r--src/vnet/policer/policer.h17
-rw-r--r--src/vnet/policer/policer.rst217
-rw-r--r--src/vnet/policer/policer_api.c399
-rw-r--r--src/vnet/policer/policer_types.api28
-rw-r--r--src/vnet/policer/xlate.c2
-rw-r--r--src/vnet/policer/xlate.h2
-rw-r--r--src/vnet/ppp/node.c2
-rw-r--r--src/vnet/ppp/ppp.c2
-rw-r--r--src/vnet/qos/qos_egress_map.c10
-rw-r--r--src/vnet/qos/qos_mark.c4
-rw-r--r--src/vnet/qos/qos_mark_node.c2
-rw-r--r--src/vnet/qos/qos_record.c4
-rw-r--r--src/vnet/qos/qos_record_node.c2
-rw-r--r--src/vnet/qos/qos_store.c4
-rw-r--r--src/vnet/qos/qos_store_node.c2
-rw-r--r--src/vnet/session/application.c216
-rw-r--r--src/vnet/session/application.h74
-rw-r--r--src/vnet/session/application_interface.c26
-rw-r--r--src/vnet/session/application_interface.h104
-rw-r--r--src/vnet/session/application_local.c248
-rw-r--r--src/vnet/session/application_local.h3
-rw-r--r--src/vnet/session/application_namespace.c43
-rw-r--r--src/vnet/session/application_namespace.h9
-rw-r--r--src/vnet/session/application_worker.c489
-rw-r--r--src/vnet/session/mma_template.h2
-rw-r--r--src/vnet/session/segment_manager.c26
-rw-r--r--src/vnet/session/segment_manager.h4
-rw-r--r--src/vnet/session/session.api75
-rw-r--r--src/vnet/session/session.c753
-rw-r--r--src/vnet/session/session.h184
-rw-r--r--src/vnet/session/session_api.c292
-rw-r--r--src/vnet/session/session_cli.c79
-rw-r--r--src/vnet/session/session_debug.c111
-rw-r--r--src/vnet/session/session_debug.h203
-rw-r--r--src/vnet/session/session_input.c343
-rw-r--r--src/vnet/session/session_lookup.c154
-rw-r--r--src/vnet/session/session_lookup.h7
-rw-r--r--src/vnet/session/session_node.c221
-rw-r--r--src/vnet/session/session_rules_table.c16
-rw-r--r--src/vnet/session/session_rules_table.h8
-rw-r--r--src/vnet/session/session_table.c61
-rw-r--r--src/vnet/session/session_table.h4
-rw-r--r--src/vnet/session/session_test.c14
-rw-r--r--src/vnet/session/session_types.h128
-rw-r--r--src/vnet/session/transport.c246
-rw-r--r--src/vnet/session/transport.h13
-rw-r--r--src/vnet/session/transport_types.h46
-rw-r--r--src/vnet/snap/node.c2
-rw-r--r--src/vnet/snap/snap.h2
-rw-r--r--src/vnet/span/node.c4
-rw-r--r--src/vnet/span/span.c9
-rw-r--r--src/vnet/span/span_api.c2
-rw-r--r--src/vnet/srmpls/sr_mpls_api.c58
-rw-r--r--src/vnet/srmpls/sr_mpls_policy.c18
-rw-r--r--src/vnet/srmpls/sr_mpls_steering.c8
-rw-r--r--src/vnet/srv6/sr.api116
-rw-r--r--src/vnet/srv6/sr.h15
-rw-r--r--src/vnet/srv6/sr_api.c254
-rw-r--r--src/vnet/srv6/sr_localsid.c22
-rw-r--r--src/vnet/srv6/sr_policy_rewrite.c101
-rw-r--r--src/vnet/srv6/sr_pt.api59
-rw-r--r--src/vnet/srv6/sr_pt.c3
-rw-r--r--src/vnet/srv6/sr_pt.h21
-rw-r--r--src/vnet/srv6/sr_pt_api.c97
-rw-r--r--src/vnet/srv6/sr_pt_node.c175
-rw-r--r--src/vnet/srv6/sr_steering.c10
-rw-r--r--src/vnet/srv6/sr_test.c23
-rw-r--r--src/vnet/syslog/syslog.c2
-rw-r--r--src/vnet/syslog/syslog_api.c4
-rw-r--r--src/vnet/tcp/tcp.c112
-rw-r--r--src/vnet/tcp/tcp.h17
-rw-r--r--src/vnet/tcp/tcp_bt.c2
-rw-r--r--src/vnet/tcp/tcp_cli.c12
-rw-r--r--src/vnet/tcp/tcp_debug.c2
-rw-r--r--src/vnet/tcp/tcp_debug.h62
-rw-r--r--src/vnet/tcp/tcp_error.def1
-rw-r--r--src/vnet/tcp/tcp_format.c89
-rw-r--r--src/vnet/tcp/tcp_inlines.h31
-rw-r--r--src/vnet/tcp/tcp_input.c453
-rw-r--r--src/vnet/tcp/tcp_output.c76
-rw-r--r--src/vnet/tcp/tcp_pg.c181
-rw-r--r--src/vnet/tcp/tcp_syn_filter4.c6
-rw-r--r--src/vnet/tcp/tcp_timer.h15
-rw-r--r--src/vnet/tcp/tcp_types.h2
-rw-r--r--src/vnet/teib/teib.c85
-rw-r--r--src/vnet/teib/teib_cli.c6
-rw-r--r--src/vnet/tls/tls.c329
-rw-r--r--src/vnet/tls/tls.h50
-rw-r--r--src/vnet/udp/udp.api2
-rw-r--r--src/vnet/udp/udp.c374
-rw-r--r--src/vnet/udp/udp.h63
-rw-r--r--src/vnet/udp/udp_api.c23
-rw-r--r--src/vnet/udp/udp_cli.c145
-rw-r--r--src/vnet/udp/udp_encap.c46
-rw-r--r--src/vnet/udp/udp_encap.h1
-rw-r--r--src/vnet/udp/udp_encap_node.c49
-rw-r--r--src/vnet/udp/udp_error.def3
-rw-r--r--src/vnet/udp/udp_inlines.h79
-rw-r--r--src/vnet/udp/udp_input.c63
-rw-r--r--src/vnet/udp/udp_local.c117
-rw-r--r--src/vnet/udp/udp_output.c254
-rw-r--r--src/vnet/unix/gdb_funcs.c2
-rw-r--r--src/vnet/unix/tuntap.c12
-rw-r--r--src/vnet/util/throttle.c5
-rw-r--r--src/vnet/util/throttle.h19
-rw-r--r--src/vnet/vnet.h1
-rw-r--r--src/vnet/vxlan-gpe/decap.c8
-rw-r--r--src/vnet/vxlan-gpe/encap.c8
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe.c27
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe.h8
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe_api.c4
-rw-r--r--src/vnet/vxlan/FEATURE.yaml14
-rw-r--r--src/vnet/vxlan/decap.c1330
-rw-r--r--src/vnet/vxlan/dir.dox24
-rw-r--r--src/vnet/vxlan/encap.c540
-rw-r--r--src/vnet/vxlan/vxlan.api198
-rw-r--r--src/vnet/vxlan/vxlan.c1350
-rw-r--r--src/vnet/vxlan/vxlan.h242
-rw-r--r--src/vnet/vxlan/vxlan_api.c376
-rw-r--r--src/vnet/vxlan/vxlan_error.def17
-rw-r--r--src/vnet/vxlan/vxlan_packet.h80
475 files changed, 19271 insertions, 24611 deletions
diff --git a/src/vnet/CMakeLists.txt b/src/vnet/CMakeLists.txt
index ef187dc2f43..fb8d294009d 100644
--- a/src/vnet/CMakeLists.txt
+++ b/src/vnet/CMakeLists.txt
@@ -26,6 +26,21 @@ list(APPEND VNET_SOURCES
config.c
devices/devices.c
devices/netlink.c
+ dev/api.c
+ dev/args.c
+ dev/cli.c
+ dev/config.c
+ dev/counters.c
+ dev/dev.c
+ dev/dev_api.c
+ dev/error.c
+ dev/format.c
+ dev/handlers.c
+ dev/pci.c
+ dev/port.c
+ dev/process.c
+ dev/queue.c
+ dev/runtime.c
error.c
flow/flow.c
flow/flow_cli.c
@@ -59,6 +74,7 @@ list(APPEND VNET_HEADERS
config.h
devices/devices.h
devices/netlink.h
+ dev/dev.h
flow/flow.h
global_funcs.h
interface/rx_queue_funcs.h
@@ -83,6 +99,7 @@ list(APPEND VNET_HEADERS
)
list(APPEND VNET_API_FILES
+ dev/dev.api
interface.api
interface_types.api
ip/ip_types.api
@@ -297,30 +314,6 @@ list(APPEND VNET_HEADERS
)
##############################################################################
-# Layer 2 / vxlan
-##############################################################################
-list(APPEND VNET_SOURCES
- vxlan/vxlan.c
- vxlan/encap.c
- vxlan/decap.c
- vxlan/vxlan_api.c
-)
-
-list(APPEND VNET_MULTIARCH_SOURCES
- vxlan/encap.c
-)
-
-list(APPEND VNET_HEADERS
- vxlan/vxlan.h
- vxlan/vxlan_packet.h
- vxlan/vxlan_error.def
-)
-
-list(APPEND VNET_MULTIARCH_SOURCES vxlan/decap.c)
-
-list(APPEND VNET_API_FILES vxlan/vxlan.api)
-
-##############################################################################
# Layer 2 / Bonding
##############################################################################
list(APPEND VNET_SOURCES
@@ -670,6 +663,7 @@ list(APPEND VNET_SOURCES
udp/udp_encap.c
udp/udp_decap.c
udp/udp_api.c
+ udp/udp_output.c
)
list(APPEND VNET_MULTIARCH_SOURCES
@@ -691,27 +685,10 @@ list(APPEND VNET_API_FILES udp/udp.api)
##############################################################################
# Tunnel protocol: gre
##############################################################################
-list(APPEND VNET_SOURCES
- gre/gre.c
- gre/node.c
- gre/interface.c
- gre/pg.c
- gre/gre_api.c
-)
-
-list(APPEND VNET_MULTIARCH_SOURCES
- gre/node.c
- gre/gre.c
-)
-
list(APPEND VNET_HEADERS
- gre/gre.h
gre/packet.h
- gre/error.def
)
-list(APPEND VNET_API_FILES gre/gre.api)
-
##############################################################################
# Tunnel protocol: ipip
##############################################################################
@@ -819,6 +796,8 @@ list(APPEND VNET_SOURCES
srv6/sr_steering.c
srv6/sr_api.c
srv6/sr_pt.c
+ srv6/sr_pt_node.c
+ srv6/sr_pt_api.c
)
list(APPEND VNET_HEADERS
@@ -830,6 +809,7 @@ list(APPEND VNET_HEADERS
list(APPEND VNET_API_FILES
srv6/sr.api
srv6/sr_types.api
+ srv6/sr_pt.api
)
##############################################################################
@@ -915,23 +895,6 @@ list(APPEND VNET_HEADERS
)
##############################################################################
-# lawful intercept
-##############################################################################
-
-list(APPEND VNET_SOURCES
- lawful-intercept/lawful_intercept.c
- lawful-intercept/node.c
-)
-
-list(APPEND VNET_MULTIARCH_SOURCES
- lawful-intercept/node.c
-)
-
-list(APPEND VNET_HEADERS
- lawful-intercept/lawful_intercept.h
-)
-
-##############################################################################
# SPAN (port mirroring)
##############################################################################
@@ -982,10 +945,6 @@ list(APPEND VNET_SOURCES
devices/virtio/format.c
devices/virtio/node.c
devices/virtio/pci.c
- devices/virtio/vhost_user.c
- devices/virtio/vhost_user_input.c
- devices/virtio/vhost_user_output.c
- devices/virtio/vhost_user_api.c
devices/virtio/virtio.c
devices/virtio/virtio_api.c
devices/virtio/virtio_pci_legacy.c
@@ -1002,20 +961,15 @@ list(APPEND VNET_HEADERS
devices/virtio/virtio_pci_legacy.h
devices/virtio/virtio_pci_modern.h
devices/virtio/vhost_std.h
- devices/virtio/vhost_user.h
devices/virtio/virtio_types_api.h
)
list(APPEND VNET_MULTIARCH_SOURCES
- devices/virtio/vhost_user_input.c
- devices/virtio/vhost_user_output.c
devices/virtio/node.c
- devices/af_packet/node.c
devices/virtio/device.c
)
list(APPEND VNET_API_FILES
- devices/virtio/vhost_user.api
devices/virtio/virtio.api
devices/virtio/virtio_types.api
)
@@ -1024,6 +978,7 @@ list(APPEND VNET_API_FILES
# tap interface (with virtio backend)
##############################################################################
+if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux")
list(APPEND VNET_SOURCES
devices/tap/cli.c
devices/tap/tap.c
@@ -1037,6 +992,7 @@ list(APPEND VNET_HEADERS
list(APPEND VNET_API_FILES
devices/tap/tapv2.api
)
+endif()
##############################################################################
# tap interface (with virtio backend)
@@ -1064,6 +1020,7 @@ list(APPEND VNET_SOURCES
session/session_rules_table.c
session/session_lookup.c
session/session_node.c
+ session/session_input.c
session/transport.c
session/application.c
session/application_worker.c
@@ -1110,27 +1067,6 @@ list(APPEND VNET_HEADERS
tls/tls_test.h
)
-##############################################################################
-# Linux packet interface
-##############################################################################
-
-list(APPEND VNET_SOURCES
- devices/af_packet/af_packet.c
- devices/af_packet/device.c
- devices/af_packet/node.c
- devices/af_packet/cli.c
- devices/af_packet/af_packet_api.c
-)
-
-list(APPEND VNET_MULTIARCH_SOURCES
- devices/af_packet/device.c
-)
-
-list(APPEND VNET_HEADERS
- devices/af_packet/af_packet.h
-)
-
-list(APPEND VNET_API_FILES devices/af_packet/af_packet.api)
##############################################################################
# Driver feature graph arc support
@@ -1154,6 +1090,7 @@ list(APPEND VNET_API_FILES feature/feature.api)
# FIXME: unix/hgshm.c
+if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux")
list(APPEND VNET_SOURCES
unix/gdb_funcs.c
unix/tuntap.c
@@ -1162,6 +1099,7 @@ list(APPEND VNET_SOURCES
list(APPEND VNET_HEADERS
unix/tuntap.h
)
+endif()
##############################################################################
# FIB
diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c
index 2cb9ec43c00..201561fe485 100644
--- a/src/vnet/adj/adj.c
+++ b/src/vnet/adj/adj.c
@@ -704,7 +704,6 @@ adj_show (vlib_main_t * vm,
}
else
{
- /* *INDENT-OFF* */
pool_foreach_index (ai, adj_pool)
{
if (~0 != sw_if_index &&
@@ -719,7 +718,6 @@ adj_show (vlib_main_t * vm,
FORMAT_IP_ADJACENCY_NONE);
}
}
- /* *INDENT-ON* */
}
}
return 0;
diff --git a/src/vnet/adj/adj_bfd.c b/src/vnet/adj/adj_bfd.c
index c1f02dd9073..e54ba6d74ae 100644
--- a/src/vnet/adj/adj_bfd.c
+++ b/src/vnet/adj/adj_bfd.c
@@ -280,9 +280,7 @@ adj_bfd_main_init (vlib_main_t * vm)
return (0);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (adj_bfd_main_init)=
{
.runs_after = VLIB_INITS("bfd_main_init"),
};
-/* *INDENT-ON* */
diff --git a/src/vnet/adj/adj_dp.h b/src/vnet/adj/adj_dp.h
index aff1a2b1f43..186044b90ad 100644
--- a/src/vnet/adj/adj_dp.h
+++ b/src/vnet/adj/adj_dp.h
@@ -36,22 +36,36 @@ adj_midchain_ipip44_fixup (vlib_main_t * vm,
ip4->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b));
if (PREDICT_TRUE(TUNNEL_ENCAP_DECAP_FLAG_NONE == flags))
- {
- ip_csum_t sum;
- u16 old,new;
-
- old = 0;
- new = ip4->length;
-
- sum = ip4->checksum;
- sum = ip_csum_update (sum, old, new, ip4_header_t, length);
- ip4->checksum = ip_csum_fold (sum);
- }
+ {
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data;
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP |
+ VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM);
+ }
+ else
+ {
+ ip_csum_t sum;
+ u16 old,new;
+ old = 0;
+ new = ip4->length;
+ sum = ip4->checksum;
+ sum = ip_csum_update (sum, old, new, ip4_header_t, length);
+ ip4->checksum = ip_csum_fold (sum);
+ }
+ }
else
- {
+ {
tunnel_encap_fixup_4o4 (flags, ip4 + 1, ip4);
- ip4->checksum = ip4_header_checksum (ip4);
- }
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data;
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP |
+ VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM);
+ }
+ else
+ ip4->checksum = ip4_header_checksum (ip4);
+ }
}
static_always_inline void
diff --git a/src/vnet/adj/adj_glean.c b/src/vnet/adj/adj_glean.c
index 45477649c1a..ceece0d74ed 100644
--- a/src/vnet/adj/adj_glean.c
+++ b/src/vnet/adj/adj_glean.c
@@ -45,7 +45,7 @@ adj_glean_db_lookup (fib_protocol_t proto,
{
uword *p;
- if (vec_len(adj_gleans[proto]) <= sw_if_index)
+ if ((proto >= FIB_PROTOCOL_IP_MAX) || vec_len(adj_gleans[proto]) <= sw_if_index)
return (ADJ_INDEX_INVALID);
p = hash_get_mem (adj_gleans[proto][sw_if_index], nh_addr);
@@ -66,6 +66,7 @@ adj_glean_db_insert (fib_protocol_t proto,
vlib_worker_thread_barrier_sync(vm);
+ ASSERT(proto < FIB_PROTOCOL_IP_MAX);
vec_validate(adj_gleans[proto], sw_if_index);
if (NULL == adj_gleans[proto][sw_if_index])
@@ -186,6 +187,38 @@ adj_glean_update_rewrite_walk (adj_index_t ai,
return (ADJ_WALK_RC_CONTINUE);
}
+static void
+adj_glean_walk_proto (fib_protocol_t proto,
+ u32 sw_if_index,
+ adj_walk_cb_t cb,
+ void *data)
+{
+ adj_index_t ai, *aip, *ais = NULL;
+ ip46_address_t *conn;
+
+ ASSERT(proto < FIB_PROTOCOL_IP_MAX);
+ if (vec_len(adj_gleans[proto]) <= sw_if_index ||
+ NULL == adj_gleans[proto][sw_if_index])
+ return;
+
+ /*
+ * Walk first to collect the indices
+ * then walk the collection. This is safe
+ * to modifications of the hash table
+ */
+ hash_foreach_mem(conn, ai, adj_gleans[proto][sw_if_index],
+ ({
+ vec_add1(ais, ai);
+ }));
+
+ vec_foreach(aip, ais)
+ {
+ if (ADJ_WALK_RC_STOP == cb(*aip, data))
+ break;
+ }
+ vec_free(ais);
+}
+
void
adj_glean_walk (u32 sw_if_index,
adj_walk_cb_t cb,
@@ -195,29 +228,7 @@ adj_glean_walk (u32 sw_if_index,
FOR_EACH_FIB_IP_PROTOCOL(proto)
{
- adj_index_t ai, *aip, *ais = NULL;
- ip46_address_t *conn;
-
- if (vec_len(adj_gleans[proto]) <= sw_if_index ||
- NULL == adj_gleans[proto][sw_if_index])
- continue;
-
- /*
- * Walk first to collect the indices
- * then walk the collection. This is safe
- * to modifications of the hash table
- */
- hash_foreach_mem(conn, ai, adj_gleans[proto][sw_if_index],
- ({
- vec_add1(ais, ai);
- }));
-
- vec_foreach(aip, ais)
- {
- if (ADJ_WALK_RC_STOP == cb(*aip, data))
- break;
- }
- vec_free(ais);
+ adj_glean_walk_proto (proto, sw_if_index, cb, data);
}
}
@@ -235,6 +246,7 @@ adj_glean_get (fib_protocol_t proto,
ip46_address_t *conn;
adj_index_t ai;
+ ASSERT(proto < FIB_PROTOCOL_IP_MAX);
if (vec_len(adj_gleans[proto]) <= sw_if_index ||
NULL == adj_gleans[proto][sw_if_index])
return (ADJ_INDEX_INVALID);
@@ -256,6 +268,7 @@ adj_glean_get_src (fib_protocol_t proto,
const ip_adjacency_t *adj;
adj_index_t ai;
+ ASSERT(proto < FIB_PROTOCOL_IP_MAX);
if (vec_len(adj_gleans[proto]) <= sw_if_index ||
NULL == adj_gleans[proto][sw_if_index])
return (NULL);
@@ -445,7 +458,7 @@ adj_glean_table_bind (fib_protocol_t fproto,
},
};
- adj_glean_walk (sw_if_index, adj_glean_start_backwalk, &bw_ctx);
+ adj_glean_walk_proto (fproto, sw_if_index, adj_glean_start_backwalk, &bw_ctx);
}
diff --git a/src/vnet/adj/adj_mcast.c b/src/vnet/adj/adj_mcast.c
index a20f61f6f6b..573105b7228 100644
--- a/src/vnet/adj/adj_mcast.c
+++ b/src/vnet/adj/adj_mcast.c
@@ -82,6 +82,8 @@ adj_mcast_add_or_lock (fib_protocol_t proto,
*/
vnet_update_adjacency_for_sw_interface(vnm, sw_if_index,
adj_get_index(adj));
+
+ adj_delegate_adj_created(adj);
}
else
{
@@ -89,8 +91,6 @@ adj_mcast_add_or_lock (fib_protocol_t proto,
adj_lock(adj_get_index(adj));
}
- adj_delegate_adj_created(adj);
-
return (adj_get_index(adj));
}
diff --git a/src/vnet/adj/adj_midchain_delegate.c b/src/vnet/adj/adj_midchain_delegate.c
index de57442ac9b..16129ff86ac 100644
--- a/src/vnet/adj/adj_midchain_delegate.c
+++ b/src/vnet/adj/adj_midchain_delegate.c
@@ -148,12 +148,11 @@ adj_midchain_delegate_remove (adj_index_t ai)
{
adj_nbr_midchain_unstack(ai);
- adj_delegate_remove (ai, ADJ_DELEGATE_MIDCHAIN);
-
amd = pool_elt_at_index(amd_pool, ad->ad_index);
fib_entry_untrack(amd->amd_fei, amd->amd_sibling);
-
pool_put(amd_pool, amd);
+
+ adj_delegate_remove (ai, ADJ_DELEGATE_MIDCHAIN);
}
}
diff --git a/src/vnet/adj/adj_nsh.c b/src/vnet/adj/adj_nsh.c
index 00d945729d8..1b4fa6c15b9 100644
--- a/src/vnet/adj/adj_nsh.c
+++ b/src/vnet/adj/adj_nsh.c
@@ -190,7 +190,6 @@ VLIB_REGISTER_NODE (adj_nsh_midchain_node) = {
};
/* Built-in ip4 tx feature path definition */
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (nsh_output, static) =
{
.arc_name = "nsh-output",
@@ -204,4 +203,3 @@ VNET_FEATURE_INIT (nsh_tx_drop, static) =
.node_name = "error-drop",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
diff --git a/src/vnet/adj/rewrite.h b/src/vnet/adj/rewrite.h
index 5cb90e47318..06b1b00882e 100644
--- a/src/vnet/adj/rewrite.h
+++ b/src/vnet/adj/rewrite.h
@@ -147,8 +147,8 @@ vnet_rewrite_set_data_internal (vnet_rewrite_header_t * rw,
int max_size, void *data, int data_bytes)
{
/* Sanity check values carefully for this clib_memset operation */
- ASSERT ((max_size > 0) && (max_size < VNET_REWRITE_TOTAL_BYTES));
- ASSERT ((data_bytes >= 0) && (data_bytes < max_size));
+ ASSERT ((max_size > 0) && (max_size <= VNET_REWRITE_TOTAL_BYTES));
+ ASSERT ((data_bytes >= 0) && (data_bytes <= max_size));
rw->data_bytes = data_bytes;
clib_memcpy_fast (rw->data, data, data_bytes);
diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h
index 4e91e132b89..52f201c081b 100644
--- a/src/vnet/api_errno.h
+++ b/src/vnet/api_errno.h
@@ -35,11 +35,21 @@ format_function_t format_vnet_api_errno;
static_always_inline vnet_api_error_t
vnet_api_error (clib_error_t *err)
{
+ if (err == 0)
+ return 0;
if (err->code >= 0)
return VNET_API_ERROR_BUG;
return err->code;
}
+static_always_inline vnet_api_error_t
+vnet_get_api_error_and_free (clib_error_t *err)
+{
+ vnet_api_error_t rv = vnet_api_error (err);
+ clib_error_free (err);
+ return rv;
+}
+
#endif /* included_vnet_api_errno_h */
/*
diff --git a/src/vnet/arp/arp.c b/src/vnet/arp/arp.c
index d39d48e2c77..43b2a93a7b3 100644
--- a/src/vnet/arp/arp.c
+++ b/src/vnet/arp/arp.c
@@ -191,7 +191,6 @@ always_inline u32
arp_learn (u32 sw_if_index,
const ethernet_arp_ip4_over_ethernet_address_t * addr)
{
- /* *INDENT-OFF* */
ip_neighbor_learn_t l = {
.ip = {
.ip.ip4 = addr->ip4,
@@ -200,7 +199,6 @@ arp_learn (u32 sw_if_index,
.mac = addr->mac,
.sw_if_index = sw_if_index,
};
- /* *INDENT-ON* */
ip_neighbor_learn_dp (&l);
@@ -354,7 +352,6 @@ arp_dst_fib_check (const fib_node_index_t fei, fib_entry_flag_t * flags)
const fib_entry_t *entry = fib_entry_get (fei);
const fib_entry_src_t *entry_src;
fib_source_t src;
- /* *INDENT-OFF* */
FOR_EACH_SRC_ADDED(entry, entry_src, src,
({
*flags = fib_entry_get_flags_for_source (fei, src);
@@ -363,7 +360,6 @@ arp_dst_fib_check (const fib_node_index_t fei, fib_entry_flag_t * flags)
else if (FIB_ENTRY_FLAG_CONNECTED & *flags)
return ARP_DST_FIB_CONN;
}))
- /* *INDENT-ON* */
return ARP_DST_FIB_NONE;
}
@@ -427,6 +423,10 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
}
+ dst_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
+ &arp0->ip4_over_ethernet[1].ip4, 32);
+ conn_sw_if_index0 = fib_entry_get_any_resolving_interface (dst_fei);
+
{
/*
* we're looking for FIB entries that indicate the source
@@ -459,7 +459,6 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
* flags we need, or the flags we must not have,
* is not the best source, so check then all.
*/
- /* *INDENT-OFF* */
FOR_EACH_SRC_ADDED(src_fib_entry, src, source,
({
src_flags = fib_entry_get_flags_for_source (src_fei, source);
@@ -497,7 +496,6 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
* nor is it a already learned host resp.
*/
}));
- /* *INDENT-ON* */
/*
* shorter mask lookup for the next iteration.
@@ -515,24 +513,20 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
while (!attached &&
!fib_entry_is_sourced (src_fei, FIB_SOURCE_DEFAULT_ROUTE));
- if (!attached)
+ if (!attached &&
+ !arp_unnumbered (p0, sw_if_index0, conn_sw_if_index0))
{
/*
- * the matching route is a not attached, i.e. it was
- * added as a result of routing, rather than interface/ARP
- * configuration. If the matching route is not a host route
- * (i.e. a /32)
+ * the matching route is a not attached and not unnumbered,
+ * i.e. it was added as a result of routing, rather than
+ * interface/ARP configuration. If the matching route is not
+ * a host route (i.e. a /32)
*/
error0 = ARP_ERROR_L3_SRC_ADDRESS_NOT_LOCAL;
goto drop;
}
}
- dst_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
- &arp0->ip4_over_ethernet[1].ip4,
- 32);
- conn_sw_if_index0 = fib_entry_get_any_resolving_interface (dst_fei);
-
switch (arp_dst_fib_check (dst_fei, &dst_flags))
{
case ARP_DST_FIB_ADJ:
@@ -625,9 +619,9 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
sw_if_index0 != fib_entry_get_resolving_interface (src_fei))
{
/*
- * The interface the ARP is sent to or was received on is not the
- * interface on which the covering prefix is configured.
- * Maybe this is a case for unnumbered.
+ * The interface the ARP is sent to or was received on is
+ * not the interface on which the covering prefix is
+ * configured. Maybe this is a case for unnumbered.
*/
if (!arp_unnumbered (p0, sw_if_index0, conn_sw_if_index0))
{
@@ -642,8 +636,7 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
goto drop;
}
- next0 = arp_mk_reply (vnm, p0, sw_if_index0,
- if_addr0, arp0, eth_rx);
+ next0 = arp_mk_reply (vnm, p0, sw_if_index0, if_addr0, arp0, eth_rx);
/* We are going to reply to this request, so, in the absence of
errors, learn the sender */
@@ -677,7 +670,6 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (arp_input_node, static) =
{
@@ -764,7 +756,6 @@ VNET_FEATURE_INIT (arp_drop_feat_node, static) =
.runs_before = 0, /* last feature */
};
-/* *INDENT-ON* */
typedef struct
{
@@ -936,13 +927,11 @@ ethernet_arp_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ethernet_arp_init) =
{
.runs_after = VLIB_INITS("ethernet_init",
"ip_neighbor_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/arp/arp_proxy.c b/src/vnet/arp/arp_proxy.c
index 184edbf8be8..39f624d5a1d 100644
--- a/src/vnet/arp/arp_proxy.c
+++ b/src/vnet/arp/arp_proxy.c
@@ -223,7 +223,6 @@ set_arp_proxy (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
/*?
* Enable proxy-arp on an interface. The vpp stack will answer ARP
* requests for the indicated address range. Multiple proxy-arp
@@ -249,15 +248,12 @@ VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = {
"set interface proxy-arp <intfc> [enable|disable]",
.function = set_int_proxy_arp_command_fn,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_arp_proxy_command, static) = {
.path = "set arp proxy",
.short_help = "set arp proxy [del] table-ID <table-ID> start <start-address> end <end-addres>",
.function = set_arp_proxy,
};
-/* *INDENT-ON* */
typedef struct
{
@@ -435,13 +431,11 @@ show_ip4_arp (vlib_main_t * vm,
* Fib_index 0 6.0.0.1 - 6.0.0.11
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip4_arp_command, static) = {
.path = "show arp proxy",
.function = show_ip4_arp,
.short_help = "show ip arp",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/bfd/bfd.api b/src/vnet/bfd/bfd.api
index f573bc5309a..d3b3ed21a26 100644
--- a/src/vnet/bfd/bfd.api
+++ b/src/vnet/bfd/bfd.api
@@ -359,6 +359,7 @@ autoreply define bfd_udp_auth_deactivate
bool is_delayed;
};
+/* must be compatible with bfd_error_t */
counters bfd_udp {
none {
severity info;
@@ -366,17 +367,23 @@ counters bfd_udp {
units "packets";
description "OK";
};
- no_session {
+ bad {
severity error;
type counter64;
units "packets";
- description "no-session";
+ description "bad packet";
};
- bad {
+ disabled {
severity error;
type counter64;
units "packets";
- description "bad packet";
+ description "bfd packets received on disabled interfaces";
+ };
+ version {
+ severity error;
+ type counter64;
+ units "packets";
+ description "version";
};
length {
severity error;
@@ -384,6 +391,42 @@ counters bfd_udp {
units "packets";
description "too short";
};
+ detect_multi {
+ severity error;
+ type counter64;
+ units "packets";
+ description "detect-multi";
+ };
+ multi_point {
+ severity error;
+ type counter64;
+ units "packets";
+ description "multi-point";
+ };
+ my_disc {
+ severity error;
+ type counter64;
+ units "packets";
+ description "my-disc";
+ };
+ your_disc {
+ severity error;
+ type counter64;
+ units "packets";
+ description "your-disc";
+ };
+ admin_down {
+ severity error;
+ type counter64;
+ units "packets";
+ description "session admin-down";
+ };
+ no_session {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no-session";
+ };
failed_verification {
severity error;
type counter64;
diff --git a/src/vnet/bfd/bfd_api.c b/src/vnet/bfd/bfd_api.c
index 4d76f71fd9f..816e71081ff 100644
--- a/src/vnet/bfd/bfd_api.c
+++ b/src/vnet/bfd/bfd_api.c
@@ -217,7 +217,6 @@ bfd_event (bfd_main_t * bm, bfd_session_t * bs)
vpe_api_main_t *vam = &vpe_api_main;
vpe_client_registration_t *reg;
vl_api_registration_t *vl_reg;
- /* *INDENT-OFF* */
pool_foreach (reg, vam->bfd_events_registrations) {
vl_reg = vl_api_client_index_to_registration (reg->client_index);
if (vl_reg)
@@ -231,7 +230,6 @@ bfd_event (bfd_main_t * bm, bfd_session_t * bs)
}
}
}
- /* *INDENT-ON* */
}
static void
@@ -244,13 +242,11 @@ vl_api_bfd_udp_session_dump_t_handler (vl_api_bfd_udp_session_dump_t * mp)
return;
bfd_session_t *bs = NULL;
- /* *INDENT-OFF* */
pool_foreach (bs, bfd_main.sessions) {
if (bs->transport == BFD_TRANSPORT_UDP4 ||
bs->transport == BFD_TRANSPORT_UDP6)
send_bfd_udp_session_details (reg, mp->context, bs);
}
- /* *INDENT-ON* */
}
static void
@@ -301,7 +297,6 @@ vl_api_bfd_auth_keys_dump_t_handler (vl_api_bfd_auth_keys_dump_t * mp)
bfd_auth_key_t *key = NULL;
vl_api_bfd_auth_keys_details_t *rmp = NULL;
- /* *INDENT-OFF* */
pool_foreach (key, bfd_main.auth_keys) {
rmp = vl_msg_api_alloc (sizeof (*rmp));
clib_memset (rmp, 0, sizeof (*rmp));
@@ -312,7 +307,6 @@ vl_api_bfd_auth_keys_dump_t_handler (vl_api_bfd_auth_keys_dump_t * mp)
rmp->use_count = clib_host_to_net_u32 (key->use_count);
vl_api_send_msg (reg, (u8 *)rmp);
}
- /* *INDENT-ON* */
}
static void
@@ -394,7 +388,6 @@ vl_api_bfd_udp_get_echo_source_t_handler (vl_api_bfd_udp_get_echo_source_t *
bfd_udp_get_echo_source (&is_set, &sw_if_index, &have_usable_ip4, &ip4,
&have_usable_ip6, &ip6);
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_BFD_UDP_GET_ECHO_SOURCE_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
@@ -428,7 +421,6 @@ vl_api_bfd_udp_get_echo_source_t_handler (vl_api_bfd_udp_get_echo_source_t *
rmp->have_usable_ip6 = false;
}
}))
- /* *INDENT-ON* */
}
#include <vnet/bfd/bfd.api.c>
diff --git a/src/vnet/bfd/bfd_cli.c b/src/vnet/bfd/bfd_cli.c
index 1d100b077eb..33942bb89e6 100644
--- a/src/vnet/bfd/bfd_cli.c
+++ b/src/vnet/bfd/bfd_cli.c
@@ -134,12 +134,10 @@ show_bfd (vlib_main_t * vm, unformat_input_t * input,
bfd_auth_key_t *key = NULL;
u8 *s = format (NULL, "%=10s %=25s %=10s\n", "Configuration Key ID",
"Type", "Use Count");
- /* *INDENT-OFF* */
pool_foreach (key, bm->auth_keys) {
s = format (s, "%10u %-25s %10u\n", key->conf_key_id,
bfd_auth_type_str (key->auth_type), key->use_count);
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "%v\n", s);
vec_free (s);
vlib_cli_output (vm, "Number of configured BFD keys: %lu\n",
@@ -149,11 +147,9 @@ show_bfd (vlib_main_t * vm, unformat_input_t * input,
{
u8 *s = format (NULL, "%=10s %=32s %=20s %=20s\n", "Index", "Property",
"Local value", "Remote value");
- /* *INDENT-OFF* */
pool_foreach (bs, bm->sessions) {
s = format (s, "%U", format_bfd_session_cli, vm, bs);
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "%v", s);
vec_free (s);
vlib_cli_output (vm, "Number of configured BFD sessions: %lu\n",
@@ -212,13 +208,11 @@ show_bfd (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_bfd_command, static) = {
.path = "show bfd",
.short_help = "show bfd [keys|sessions|echo-source]",
.function = show_bfd,
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_cli_key_add (vlib_main_t * vm, unformat_input_t * input,
@@ -310,7 +304,6 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_key_add_command, static) = {
.path = "bfd key set",
.short_help = "bfd key set"
@@ -319,7 +312,6 @@ VLIB_CLI_COMMAND (bfd_cli_key_add_command, static) = {
" secret <secret>",
.function = bfd_cli_key_add,
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_cli_key_del (vlib_main_t * vm, unformat_input_t * input,
@@ -355,13 +347,11 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_key_del_command, static) = {
.path = "bfd key del",
.short_help = "bfd key del conf-key-id <id>",
.function = bfd_cli_key_del,
};
-/* *INDENT-ON* */
#define INTERFACE_STR "interface"
#define LOCAL_ADDR_STR "local-addr"
@@ -397,23 +387,30 @@ WARN_OFF(tautological-compare) \
goto out; \
}
+static uword
+bfd_cli_unformat_ip46_address (unformat_input_t *input, va_list *args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ return unformat_user (input, unformat_ip46_address, ip46, IP46_TYPE_ANY);
+}
+
static clib_error_t *
bfd_cli_udp_session_add (vlib_main_t * vm, unformat_input_t * input,
CLIB_UNUSED (vlib_cli_command_t * lmd))
{
clib_error_t *ret = NULL;
unformat_input_t _line_input, *line_input = &_line_input;
-#define foreach_bfd_cli_udp_session_add_cli_param(F) \
- F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
- unformat_vnet_sw_interface, &vnet_main) \
- F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (u32, desired_min_tx, DESIRED_MIN_TX_STR, mandatory, "%u") \
- F (u32, required_min_rx, REQUIRED_MIN_RX_STR, mandatory, "%u") \
- F (u32, detect_mult, DETECT_MULT_STR, mandatory, "%u") \
- F (u32, conf_key_id, CONF_KEY_ID_STR, optional, "%u") \
+#define foreach_bfd_cli_udp_session_add_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main) \
+ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (u32, desired_min_tx, DESIRED_MIN_TX_STR, mandatory, "%u") \
+ F (u32, required_min_rx, REQUIRED_MIN_RX_STR, mandatory, "%u") \
+ F (u32, detect_mult, DETECT_MULT_STR, mandatory, "%u") \
+ F (u32, conf_key_id, CONF_KEY_ID_STR, optional, "%u") \
F (u32, bfd_key_id, BFD_KEY_ID_STR, optional, "%u")
foreach_bfd_cli_udp_session_add_cli_param (DECLARE);
@@ -477,7 +474,6 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_udp_session_add_command, static) = {
.path = "bfd udp session add",
.short_help = "bfd udp session add"
@@ -493,7 +489,6 @@ VLIB_CLI_COMMAND (bfd_cli_udp_session_add_command, static) = {
"]",
.function = bfd_cli_udp_session_add,
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_cli_udp_session_mod (vlib_main_t * vm, unformat_input_t * input,
@@ -501,15 +496,15 @@ bfd_cli_udp_session_mod (vlib_main_t * vm, unformat_input_t * input,
{
clib_error_t *ret = NULL;
unformat_input_t _line_input, *line_input = &_line_input;
-#define foreach_bfd_cli_udp_session_mod_cli_param(F) \
- F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
- unformat_vnet_sw_interface, &vnet_main) \
- F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (u32, desired_min_tx, DESIRED_MIN_TX_STR, mandatory, "%u") \
- F (u32, required_min_rx, REQUIRED_MIN_RX_STR, mandatory, "%u") \
+#define foreach_bfd_cli_udp_session_mod_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main) \
+ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (u32, desired_min_tx, DESIRED_MIN_TX_STR, mandatory, "%u") \
+ F (u32, required_min_rx, REQUIRED_MIN_RX_STR, mandatory, "%u") \
F (u32, detect_mult, DETECT_MULT_STR, mandatory, "%u")
foreach_bfd_cli_udp_session_mod_cli_param (DECLARE);
@@ -556,7 +551,6 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_udp_session_mod_command, static) = {
.path = "bfd udp session mod",
.short_help = "bfd udp session mod interface"
@@ -568,7 +562,6 @@ VLIB_CLI_COMMAND (bfd_cli_udp_session_mod_command, static) = {
" <detect multiplier> ",
.function = bfd_cli_udp_session_mod,
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_cli_udp_session_del (vlib_main_t * vm, unformat_input_t * input,
@@ -576,13 +569,13 @@ bfd_cli_udp_session_del (vlib_main_t * vm, unformat_input_t * input,
{
clib_error_t *ret = NULL;
unformat_input_t _line_input, *line_input = &_line_input;
-#define foreach_bfd_cli_udp_session_del_cli_param(F) \
- F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
- unformat_vnet_sw_interface, &vnet_main) \
- F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address)
+#define foreach_bfd_cli_udp_session_del_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main) \
+ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address)
foreach_bfd_cli_udp_session_del_cli_param (DECLARE);
@@ -620,7 +613,6 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_udp_session_del_command, static) = {
.path = "bfd udp session del",
.short_help = "bfd udp session del interface"
@@ -629,7 +621,6 @@ VLIB_CLI_COMMAND (bfd_cli_udp_session_del_command, static) = {
"<peer-address> ",
.function = bfd_cli_udp_session_del,
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_cli_udp_session_set_flags (vlib_main_t * vm, unformat_input_t * input,
@@ -637,14 +628,14 @@ bfd_cli_udp_session_set_flags (vlib_main_t * vm, unformat_input_t * input,
{
clib_error_t *ret = NULL;
unformat_input_t _line_input, *line_input = &_line_input;
-#define foreach_bfd_cli_udp_session_set_flags_cli_param(F) \
- F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
- unformat_vnet_sw_interface, &vnet_main) \
- F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (u8 *, admin_up_down_token, ADMIN_STR, mandatory, "%v", \
+#define foreach_bfd_cli_udp_session_set_flags_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main) \
+ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (u8 *, admin_up_down_token, ADMIN_STR, mandatory, "%v", \
&admin_up_down_token)
foreach_bfd_cli_udp_session_set_flags_cli_param (DECLARE);
@@ -702,7 +693,6 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_udp_session_set_flags_command, static) = {
.path = "bfd udp session set-flags",
.short_help = "bfd udp session set-flags"
@@ -712,7 +702,6 @@ VLIB_CLI_COMMAND (bfd_cli_udp_session_set_flags_command, static) = {
" admin <up|down>",
.function = bfd_cli_udp_session_set_flags,
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_cli_udp_session_auth_activate (vlib_main_t * vm,
@@ -721,15 +710,15 @@ bfd_cli_udp_session_auth_activate (vlib_main_t * vm,
{
clib_error_t *ret = NULL;
unformat_input_t _line_input, *line_input = &_line_input;
-#define foreach_bfd_cli_udp_session_auth_activate_cli_param(F) \
- F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
- unformat_vnet_sw_interface, &vnet_main) \
- F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (u8 *, delayed_token, DELAYED_STR, optional, "%v") \
- F (u32, conf_key_id, CONF_KEY_ID_STR, mandatory, "%u") \
+#define foreach_bfd_cli_udp_session_auth_activate_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main) \
+ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (u8 *, delayed_token, DELAYED_STR, optional, "%v") \
+ F (u32, conf_key_id, CONF_KEY_ID_STR, mandatory, "%u") \
F (u32, bfd_key_id, BFD_KEY_ID_STR, mandatory, "%u")
foreach_bfd_cli_udp_session_auth_activate_cli_param (DECLARE);
@@ -799,7 +788,6 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_udp_session_auth_activate_command, static) = {
.path = "bfd udp session auth activate",
.short_help = "bfd udp session auth activate"
@@ -818,13 +806,13 @@ bfd_cli_udp_session_auth_deactivate (vlib_main_t *vm, unformat_input_t *input,
{
clib_error_t *ret = NULL;
unformat_input_t _line_input, *line_input = &_line_input;
-#define foreach_bfd_cli_udp_session_auth_deactivate_cli_param(F) \
- F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
- unformat_vnet_sw_interface, &vnet_main) \
- F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
+#define foreach_bfd_cli_udp_session_auth_deactivate_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main) \
+ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
F (u8 *, delayed_token, DELAYED_STR, optional, "%v")
foreach_bfd_cli_udp_session_auth_deactivate_cli_param (DECLARE);
@@ -884,7 +872,6 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_udp_session_auth_deactivate_command, static) = {
.path = "bfd udp session auth deactivate",
.short_help = "bfd udp session auth deactivate"
@@ -894,7 +881,6 @@ VLIB_CLI_COMMAND (bfd_cli_udp_session_auth_deactivate_command, static) = {
"[ delayed <yes|no> ]",
.function = bfd_cli_udp_session_auth_deactivate,
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_cli_udp_set_echo_source (vlib_main_t * vm, unformat_input_t * input,
@@ -941,13 +927,11 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_udp_set_echo_source_cmd, static) = {
.path = "bfd udp echo-source set",
.short_help = "bfd udp echo-source set interface <interface>",
.function = bfd_cli_udp_set_echo_source,
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_cli_udp_del_echo_source (vlib_main_t * vm, unformat_input_t * input,
@@ -964,13 +948,11 @@ bfd_cli_udp_del_echo_source (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_udp_del_echo_source_cmd, static) = {
.path = "bfd udp echo-source del",
.short_help = "bfd udp echo-source del",
.function = bfd_cli_udp_del_echo_source,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c
index 1ca1d7ec0ab..1423da91158 100644
--- a/src/vnet/bfd/bfd_main.c
+++ b/src/vnet/bfd/bfd_main.c
@@ -500,30 +500,29 @@ bfd_session_set_flags (vlib_main_t * vm, bfd_session_t * bs, u8 admin_up_down)
}
u8 *
-bfd_input_format_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- const bfd_input_trace_t *t = va_arg (*args, bfd_input_trace_t *);
- const bfd_pkt_t *pkt = (bfd_pkt_t *) t->data;
- if (t->len > STRUCT_SIZE_OF (bfd_pkt_t, head))
- {
- s = format (s, "BFD v%u, diag=%u(%s), state=%u(%s),\n"
- " flags=(P:%u, F:%u, C:%u, A:%u, D:%u, M:%u), "
- "detect_mult=%u, length=%u\n",
- bfd_pkt_get_version (pkt), bfd_pkt_get_diag_code (pkt),
- bfd_diag_code_string (bfd_pkt_get_diag_code (pkt)),
- bfd_pkt_get_state (pkt),
- bfd_state_string (bfd_pkt_get_state (pkt)),
- bfd_pkt_get_poll (pkt), bfd_pkt_get_final (pkt),
- bfd_pkt_get_control_plane_independent (pkt),
- bfd_pkt_get_auth_present (pkt), bfd_pkt_get_demand (pkt),
- bfd_pkt_get_multipoint (pkt), pkt->head.detect_mult,
- pkt->head.length);
- if (t->len >= sizeof (bfd_pkt_t) &&
- pkt->head.length >= sizeof (bfd_pkt_t))
+format_bfd_pkt (u8 *s, va_list *args)
+{
+ u32 len = va_arg (*args, u32);
+ u8 *data = va_arg (*args, u8 *);
+
+ const bfd_pkt_t *pkt = (bfd_pkt_t *) data;
+ if (len > STRUCT_SIZE_OF (bfd_pkt_t, head))
+ {
+ s = format (
+ s,
+ "BFD v%u, diag=%u(%s), state=%u(%s),\n"
+ " flags=(P:%u, F:%u, C:%u, A:%u, D:%u, M:%u), "
+ "detect_mult=%u, length=%u",
+ bfd_pkt_get_version (pkt), bfd_pkt_get_diag_code (pkt),
+ bfd_diag_code_string (bfd_pkt_get_diag_code (pkt)),
+ bfd_pkt_get_state (pkt), bfd_state_string (bfd_pkt_get_state (pkt)),
+ bfd_pkt_get_poll (pkt), bfd_pkt_get_final (pkt),
+ bfd_pkt_get_control_plane_independent (pkt),
+ bfd_pkt_get_auth_present (pkt), bfd_pkt_get_demand (pkt),
+ bfd_pkt_get_multipoint (pkt), pkt->head.detect_mult, pkt->head.length);
+ if (len >= sizeof (bfd_pkt_t) && pkt->head.length >= sizeof (bfd_pkt_t))
{
- s = format (s, " my discriminator: %u\n",
+ s = format (s, "\n my discriminator: %u\n",
clib_net_to_host_u32 (pkt->my_disc));
s = format (s, " your discriminator: %u\n",
clib_net_to_host_u32 (pkt->your_disc));
@@ -534,16 +533,16 @@ bfd_input_format_trace (u8 * s, va_list * args)
s = format (s, " required min echo rx interval: %u",
clib_net_to_host_u32 (pkt->req_min_echo_rx));
}
- if (t->len >= sizeof (bfd_pkt_with_common_auth_t) &&
+ if (len >= sizeof (bfd_pkt_with_common_auth_t) &&
pkt->head.length >= sizeof (bfd_pkt_with_common_auth_t) &&
bfd_pkt_get_auth_present (pkt))
{
const bfd_pkt_with_common_auth_t *with_auth = (void *) pkt;
const bfd_auth_common_t *common = &with_auth->common_auth;
s = format (s, "\n auth len: %u\n", common->len);
- s = format (s, " auth type: %u:%s\n", common->type,
+ s = format (s, " auth type: %u:%s", common->type,
bfd_auth_type_str (common->type));
- if (t->len >= sizeof (bfd_pkt_with_sha1_auth_t) &&
+ if (len >= sizeof (bfd_pkt_with_sha1_auth_t) &&
pkt->head.length >= sizeof (bfd_pkt_with_sha1_auth_t) &&
(BFD_AUTH_TYPE_keyed_sha1 == common->type ||
BFD_AUTH_TYPE_meticulous_keyed_sha1 == common->type))
@@ -557,15 +556,23 @@ bfd_input_format_trace (u8 * s, va_list * args)
sizeof (sha1->hash));
}
}
- else
- {
- s = format (s, "\n");
- }
}
return s;
}
+u8 *
+bfd_input_format_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ const bfd_input_trace_t *t = va_arg (*args, bfd_input_trace_t *);
+
+ s = format (s, "%U", format_bfd_pkt, t->len, t->data);
+
+ return s;
+}
+
typedef struct
{
u32 bs_idx;
@@ -739,17 +746,18 @@ bfd_add_transport_layer (vlib_main_t * vm, u32 bi, bfd_session_t * bs)
}
static int
-bfd_transport_control_frame (vlib_main_t * vm, u32 bi, bfd_session_t * bs)
+bfd_transport_control_frame (vlib_main_t *vm, vlib_node_runtime_t *rt, u32 bi,
+ bfd_session_t *bs)
{
switch (bs->transport)
{
case BFD_TRANSPORT_UDP4:
BFD_DBG ("Transport bfd via udp4, bs_idx=%u", bs->bs_idx);
- return bfd_transport_udp4 (vm, bi, bs, 0 /* is_echo */);
+ return bfd_transport_udp4 (vm, rt, bi, bs, 0 /* is_echo */);
break;
case BFD_TRANSPORT_UDP6:
BFD_DBG ("Transport bfd via udp6, bs_idx=%u", bs->bs_idx);
- return bfd_transport_udp6 (vm, bi, bs, 0 /* is_echo */);
+ return bfd_transport_udp6 (vm, rt, bi, bs, 0 /* is_echo */);
break;
}
return 0;
@@ -773,17 +781,18 @@ bfd_echo_add_transport_layer (vlib_main_t * vm, u32 bi, bfd_session_t * bs)
}
static int
-bfd_transport_echo (vlib_main_t * vm, u32 bi, bfd_session_t * bs)
+bfd_transport_echo (vlib_main_t *vm, vlib_node_runtime_t *rt, u32 bi,
+ bfd_session_t *bs)
{
switch (bs->transport)
{
case BFD_TRANSPORT_UDP4:
BFD_DBG ("Transport bfd echo via udp4, bs_idx=%u", bs->bs_idx);
- return bfd_transport_udp4 (vm, bi, bs, 1 /* is_echo */);
+ return bfd_transport_udp4 (vm, rt, bi, bs, 1 /* is_echo */);
break;
case BFD_TRANSPORT_UDP6:
BFD_DBG ("Transport bfd echo via udp6, bs_idx=%u", bs->bs_idx);
- return bfd_transport_udp6 (vm, bi, bs, 1 /* is_echo */);
+ return bfd_transport_udp6 (vm, rt, bi, bs, 1 /* is_echo */);
break;
}
return 0;
@@ -902,8 +911,39 @@ bfd_init_control_frame (bfd_session_t *bs, vlib_buffer_t *b)
b->current_length = bfd_length;
}
+typedef struct
+{
+ u32 bs_idx;
+ u32 len;
+ u8 data[400];
+} bfd_process_trace_t;
+
static void
-bfd_send_echo (vlib_main_t *vm, bfd_main_t *bm, bfd_session_t *bs, u64 now)
+bfd_process_trace_buf (vlib_main_t *vm, vlib_node_runtime_t *rt,
+ vlib_buffer_t *b, bfd_session_t *bs)
+{
+ u32 n_trace = vlib_get_trace_count (vm, rt);
+ if (n_trace > 0)
+ {
+ bfd_process_trace_t *tr;
+ if (vlib_trace_buffer (vm, rt, 0, b, 0))
+ {
+ tr = vlib_add_trace (vm, rt, b, sizeof (*tr));
+ tr->bs_idx = bs->bs_idx;
+ u64 len = (b->current_length < sizeof (tr->data)) ?
+ b->current_length :
+ sizeof (tr->data);
+ tr->len = len;
+ clib_memcpy_fast (tr->data, vlib_buffer_get_current (b), len);
+ --n_trace;
+ vlib_set_trace_count (vm, rt, n_trace);
+ }
+ }
+}
+
+static void
+bfd_send_echo (vlib_main_t *vm, vlib_node_runtime_t *rt, bfd_main_t *bm,
+ bfd_session_t *bs, u64 now)
{
if (!bfd_is_echo_possible (bs))
{
@@ -931,6 +971,7 @@ bfd_send_echo (vlib_main_t *vm, bfd_main_t *bm, bfd_session_t *bs, u64 now)
bfd_calc_echo_checksum (bs->local_discr, pkt->expire_time_nsec,
bs->echo_secret);
b->current_length = sizeof (*pkt);
+ bfd_process_trace_buf (vm, rt, b, bs);
if (!bfd_echo_add_transport_layer (vm, bi, bs))
{
BFD_ERR ("cannot send echo packet out, turning echo off");
@@ -938,7 +979,7 @@ bfd_send_echo (vlib_main_t *vm, bfd_main_t *bm, bfd_session_t *bs, u64 now)
vlib_buffer_free_one (vm, bi);
return;
}
- if (!bfd_transport_echo (vm, bi, bs))
+ if (!bfd_transport_echo (vm, rt, bi, bs))
{
BFD_ERR ("cannot send echo packet out, turning echo off");
bs->echo = 0;
@@ -957,7 +998,8 @@ bfd_send_echo (vlib_main_t *vm, bfd_main_t *bm, bfd_session_t *bs, u64 now)
}
static void
-bfd_send_periodic (vlib_main_t *vm, bfd_main_t *bm, bfd_session_t *bs, u64 now)
+bfd_send_periodic (vlib_main_t *vm, vlib_node_runtime_t *rt, bfd_main_t *bm,
+ bfd_session_t *bs, u64 now)
{
if (!bs->remote_min_rx_usec && BFD_POLL_NOT_NEEDED == bs->poll_state)
{
@@ -1014,8 +1056,9 @@ bfd_send_periodic (vlib_main_t *vm, bfd_main_t *bm, bfd_session_t *bs, u64 now)
break;
}
bfd_add_auth_section (vm, b, bs);
+ bfd_process_trace_buf (vm, rt, b, bs);
bfd_add_transport_layer (vm, bi, bs);
- if (!bfd_transport_control_frame (vm, bi, bs))
+ if (!bfd_transport_control_frame (vm, rt, bi, bs))
{
vlib_buffer_free_one (vm, bi);
}
@@ -1090,7 +1133,8 @@ bfd_check_rx_timeout (vlib_main_t * vm, bfd_main_t * bm, bfd_session_t * bs,
}
void
-bfd_on_timeout (vlib_main_t *vm, bfd_main_t *bm, bfd_session_t *bs, u64 now)
+bfd_on_timeout (vlib_main_t *vm, vlib_node_runtime_t *rt, bfd_main_t *bm,
+ bfd_session_t *bs, u64 now)
{
BFD_DBG ("Timeout for bs_idx=%lu", bs->bs_idx);
switch (bs->local_state)
@@ -1098,11 +1142,11 @@ bfd_on_timeout (vlib_main_t *vm, bfd_main_t *bm, bfd_session_t *bs, u64 now)
case BFD_STATE_admin_down:
/* fallthrough */
case BFD_STATE_down:
- bfd_send_periodic (vm, bm, bs, now);
+ bfd_send_periodic (vm, rt, bm, bs, now);
break;
case BFD_STATE_init:
bfd_check_rx_timeout (vm, bm, bs, now, 1);
- bfd_send_periodic (vm, bm, bs, now);
+ bfd_send_periodic (vm, rt, bm, bs, now);
break;
case BFD_STATE_up:
bfd_check_rx_timeout (vm, bm, bs, now, 1);
@@ -1119,20 +1163,33 @@ bfd_on_timeout (vlib_main_t *vm, bfd_main_t *bm, bfd_session_t *bs, u64 now)
bs->config_required_min_rx_nsec));
bfd_set_poll_state (bs, BFD_POLL_NEEDED);
}
- bfd_send_periodic (vm, bm, bs, now);
+ bfd_send_periodic (vm, rt, bm, bs, now);
if (bs->echo)
{
- bfd_send_echo (vm, bm, bs, now);
+ bfd_send_echo (vm, rt, bm, bs, now);
}
break;
}
}
+u8 *
+format_bfd_process_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ bfd_process_trace_t *t = va_arg (*args, bfd_process_trace_t *);
+
+ s =
+ format (s, "bs_idx=%u => %U", t->bs_idx, format_bfd_pkt, t->len, t->data);
+
+ return s;
+}
+
/*
* bfd process node function
*/
static uword
-bfd_process (vlib_main_t *vm, CLIB_UNUSED (vlib_node_runtime_t *rt),
+bfd_process (vlib_main_t *vm, vlib_node_runtime_t *rt,
CLIB_UNUSED (vlib_frame_t *f))
{
bfd_main_t *bm = &bfd_main;
@@ -1213,7 +1270,7 @@ bfd_process (vlib_main_t *vm, CLIB_UNUSED (vlib_node_runtime_t *rt),
{
bfd_session_t *bs =
pool_elt_at_index (bm->sessions, *session_index);
- bfd_send_periodic (vm, bm, bs, now);
+ bfd_send_periodic (vm, rt, bm, bs, now);
bfd_set_timer (bm, bs, now, 1);
}
else
@@ -1259,7 +1316,7 @@ bfd_process (vlib_main_t *vm, CLIB_UNUSED (vlib_node_runtime_t *rt),
{
bfd_session_t *bs = pool_elt_at_index (bm->sessions, bs_idx);
bs->tw_id = 0; /* timer is gone because it expired */
- bfd_on_timeout (vm, bm, bs, now);
+ bfd_on_timeout (vm, rt, bm, bs, now);
bfd_set_timer (bm, bs, now, 1);
}
}
@@ -1280,13 +1337,25 @@ bfd_process (vlib_main_t *vm, CLIB_UNUSED (vlib_node_runtime_t *rt),
/*
* bfd process node declaration
*/
-VLIB_REGISTER_NODE (bfd_process_node, static) = {
+// clang-format off
+VLIB_REGISTER_NODE (bfd_process_node, static) =
+{
.function = bfd_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "bfd-process",
- .n_next_nodes = 0,
- .next_nodes = {},
+ .flags = (VLIB_NODE_FLAG_TRACE_SUPPORTED),
+ .format_trace = format_bfd_process_trace,
+ .n_next_nodes = BFD_TX_N_NEXT,
+ .next_nodes = {
+ [BFD_TX_IP4_ARP] = "ip4-arp",
+ [BFD_TX_IP6_NDP] = "ip6-discover-neighbor",
+ [BFD_TX_IP4_REWRITE] = "ip4-rewrite",
+ [BFD_TX_IP6_REWRITE] = "ip6-rewrite",
+ [BFD_TX_IP4_MIDCHAIN] = "ip4-midchain",
+ [BFD_TX_IP6_MIDCHAIN] = "ip6-midchain",
+ }
};
+// clang-format on
static clib_error_t *
bfd_sw_interface_up_down (CLIB_UNUSED (vnet_main_t *vnm),
diff --git a/src/vnet/bfd/bfd_main.h b/src/vnet/bfd/bfd_main.h
index 4fc4ef81260..1d4617e1d7c 100644
--- a/src/vnet/bfd/bfd_main.h
+++ b/src/vnet/bfd/bfd_main.h
@@ -366,7 +366,6 @@ typedef enum
BFD_EVENT_CONFIG_CHANGED,
} bfd_process_event_e;
-/* *INDENT-OFF* */
/** echo packet structure */
typedef CLIB_PACKED (struct {
/** local discriminator */
@@ -376,7 +375,6 @@ typedef CLIB_PACKED (struct {
/** checksum - based on discriminator, local secret and expire time */
u64 checksum;
}) bfd_echo_pkt_t;
-/* *INDENT-ON* */
static inline void
bfd_lock (bfd_main_t * bm)
@@ -476,6 +474,17 @@ const char *bfd_poll_state_string (bfd_poll_state_e state);
*/
void bfd_register_listener (bfd_notify_fn_t fn);
+typedef enum
+{
+ BFD_TX_IP4_ARP,
+ BFD_TX_IP6_NDP,
+ BFD_TX_IP4_REWRITE,
+ BFD_TX_IP6_REWRITE,
+ BFD_TX_IP4_MIDCHAIN,
+ BFD_TX_IP6_MIDCHAIN,
+ BFD_TX_N_NEXT,
+} bfd_tx_next_t;
+
#endif /* __included_bfd_main_h__ */
/*
diff --git a/src/vnet/bfd/bfd_protocol.h b/src/vnet/bfd/bfd_protocol.h
index 210c561b430..16ee3231ef0 100644
--- a/src/vnet/bfd/bfd_protocol.h
+++ b/src/vnet/bfd/bfd_protocol.h
@@ -46,14 +46,11 @@ typedef enum
u32 bfd_max_key_len_for_auth_type (bfd_auth_type_e auth_type);
const char *bfd_auth_type_str (bfd_auth_type_e auth_type);
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 type;
u8 len;
}) bfd_auth_common_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
/*
* 4.4. Keyed SHA1 and Meticulous Keyed SHA1 Authentication Section Format
@@ -88,9 +85,7 @@ typedef CLIB_PACKED (struct {
*/
u8 hash[20];
}) bfd_auth_sha1_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
/*
* The Mandatory Section of a BFD Control packet has the following
@@ -125,21 +120,16 @@ typedef CLIB_PACKED (struct {
u32 req_min_rx;
u32 req_min_echo_rx;
}) bfd_pkt_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
bfd_pkt_t pkt;
bfd_auth_common_t common_auth;
}) bfd_pkt_with_common_auth_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
bfd_pkt_t pkt;
bfd_auth_sha1_t sha1_auth;
}) bfd_pkt_with_sha1_auth_t;
-/* *INDENT-ON* */
u8 bfd_pkt_get_version (const bfd_pkt_t * pkt);
void bfd_pkt_set_version (bfd_pkt_t * pkt, int version);
diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c
index 36ecdf1dbc9..ec42cda1bc4 100644
--- a/src/vnet/bfd/bfd_udp.c
+++ b/src/vnet/bfd/bfd_udp.c
@@ -42,6 +42,14 @@
#include <vnet/bfd/bfd_api.h>
#include <vnet/bfd/bfd.api_enum.h>
+#define F(sym, str) \
+ STATIC_ASSERT ((int) BFD_ERROR_##sym == (int) BFD_UDP_ERROR_##sym, \
+ "BFD error enums mismatch");
+foreach_bfd_error (F)
+#undef F
+ STATIC_ASSERT ((int) BFD_N_ERROR <= (int) BFD_UDP_N_ERROR,
+ "BFD error enum sizes mismatch");
+
typedef struct
{
bfd_main_t *bfd_main;
@@ -54,18 +62,6 @@ typedef struct
int echo_source_is_set;
/* loopback interface used to get echo source ip */
u32 echo_source_sw_if_index;
- /* node index of "ip4-arp" node */
- u32 ip4_arp_idx;
- /* node index of "ip6-discover-neighbor" node */
- u32 ip6_ndp_idx;
- /* node index of "ip4-rewrite" node */
- u32 ip4_rewrite_idx;
- /* node index of "ip6-rewrite" node */
- u32 ip6_rewrite_idx;
- /* node index of "ip4-midchain" node */
- u32 ip4_midchain_idx;
- /* node index of "ip6-midchain" node */
- u32 ip6_midchain_idx;
/* log class */
vlib_log_class_t log_class;
/* number of active udp4 sessions */
@@ -135,7 +131,6 @@ bfd_udp_is_echo_available (bfd_transport_e transport)
{
ip4_main_t *im = &ip4_main;
ip_interface_address_t *ia = NULL;
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im->lookup_main, ia,
bfd_udp_main.echo_source_sw_if_index,
0 /* honor unnumbered */, ({
@@ -144,13 +139,11 @@ bfd_udp_is_echo_available (bfd_transport_e transport)
return 1;
}
}));
- /* *INDENT-ON* */
}
else if (BFD_TRANSPORT_UDP6 == transport)
{
ip6_main_t *im = &ip6_main;
ip_interface_address_t *ia = NULL;
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im->lookup_main, ia,
bfd_udp_main.echo_source_sw_if_index,
0 /* honor unnumbered */, ({
@@ -159,7 +152,6 @@ bfd_udp_is_echo_available (bfd_transport_e transport)
return 1;
}
}));
- /* *INDENT-ON* */
}
}
BFD_DBG ("No usable IP address for UDP echo - echo not available");
@@ -191,7 +183,6 @@ bfd_udp_get_echo_src_ip4 (ip4_address_t * addr)
ip_interface_address_t *ia = NULL;
ip4_main_t *im = &ip4_main;
- /* *INDENT-OFF* */
foreach_ip_interface_address (
&im->lookup_main, ia, bfd_udp_main.echo_source_sw_if_index,
0 /* honor unnumbered */, ({
@@ -209,7 +200,6 @@ bfd_udp_get_echo_src_ip4 (ip4_address_t * addr)
return 1;
}
}));
- /* *INDENT-ON* */
BFD_ERR ("cannot find ip4 address, no usable address found");
return 0;
}
@@ -225,7 +215,6 @@ bfd_udp_get_echo_src_ip6 (ip6_address_t * addr)
ip_interface_address_t *ia = NULL;
ip6_main_t *im = &ip6_main;
- /* *INDENT-OFF* */
foreach_ip_interface_address (
&im->lookup_main, ia, bfd_udp_main.echo_source_sw_if_index,
0 /* honor unnumbered */, ({
@@ -238,7 +227,6 @@ bfd_udp_get_echo_src_ip6 (ip6_address_t * addr)
return 1;
}
}));
- /* *INDENT-ON* */
BFD_ERR ("cannot find ip6 address, no usable address found");
return 0;
}
@@ -384,16 +372,23 @@ bfd_add_udp6_transport (vlib_main_t * vm, u32 bi, const bfd_session_t * bs,
}
static void
-bfd_create_frame_to_next_node (vlib_main_t *vm, bfd_main_t *bm,
- const bfd_session_t *bs, u32 bi, u32 next_node,
+bfd_create_frame_to_next_node (vlib_main_t *vm, vlib_node_runtime_t *rt,
+ u32 bi, const bfd_session_t *bs, u32 next,
vlib_combined_counter_main_t *tx_counter)
{
- vlib_frame_t *f = vlib_get_frame_to_node (vm, next_node);
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ vlib_node_t *from_node = vlib_get_node (vm, rt->node_index);
+ ASSERT (next < vec_len (from_node->next_nodes));
+ u32 to_node_index = from_node->next_nodes[next];
+ vlib_frame_t *f = vlib_get_frame_to_node (vm, to_node_index);
u32 *to_next = vlib_frame_vector_args (f);
to_next[0] = bi;
f->n_vectors = 1;
- vlib_put_frame_to_node (vm, next_node, f);
- vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ if (b->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ f->frame_flags |= VLIB_NODE_FLAG_TRACE;
+ }
+ vlib_put_frame_to_node (vm, to_node_index, f);
vlib_increment_combined_counter (tx_counter, vm->thread_index, bs->bs_idx, 1,
vlib_buffer_length_in_chain (vm, b));
}
@@ -415,10 +410,10 @@ bfd_udp_calc_next_node (const struct bfd_session_s *bs, u32 * next_node)
switch (bs->transport)
{
case BFD_TRANSPORT_UDP4:
- *next_node = bfd_udp_main.ip4_arp_idx;
+ *next_node = BFD_TX_IP4_ARP;
return 1;
case BFD_TRANSPORT_UDP6:
- *next_node = bfd_udp_main.ip6_ndp_idx;
+ *next_node = BFD_TX_IP6_NDP;
return 1;
}
break;
@@ -426,10 +421,10 @@ bfd_udp_calc_next_node (const struct bfd_session_s *bs, u32 * next_node)
switch (bs->transport)
{
case BFD_TRANSPORT_UDP4:
- *next_node = bfd_udp_main.ip4_rewrite_idx;
+ *next_node = BFD_TX_IP4_REWRITE;
return 1;
case BFD_TRANSPORT_UDP6:
- *next_node = bfd_udp_main.ip6_rewrite_idx;
+ *next_node = BFD_TX_IP6_REWRITE;
return 1;
}
break;
@@ -437,10 +432,10 @@ bfd_udp_calc_next_node (const struct bfd_session_s *bs, u32 * next_node)
switch (bs->transport)
{
case BFD_TRANSPORT_UDP4:
- *next_node = bfd_udp_main.ip4_midchain_idx;
+ *next_node = BFD_TX_IP4_MIDCHAIN;
return 1;
case BFD_TRANSPORT_UDP6:
- *next_node = bfd_udp_main.ip6_midchain_idx;
+ *next_node = BFD_TX_IP6_MIDCHAIN;
return 1;
}
break;
@@ -452,35 +447,35 @@ bfd_udp_calc_next_node (const struct bfd_session_s *bs, u32 * next_node)
}
int
-bfd_transport_udp4 (vlib_main_t *vm, u32 bi, const struct bfd_session_s *bs,
- int is_echo)
+bfd_transport_udp4 (vlib_main_t *vm, vlib_node_runtime_t *rt, u32 bi,
+ const struct bfd_session_s *bs, int is_echo)
{
u32 next_node;
int rv = bfd_udp_calc_next_node (bs, &next_node);
bfd_main_t *bm = bfd_udp_main.bfd_main;
if (rv)
{
- bfd_create_frame_to_next_node (vm, bm, bs, bi, next_node,
+ bfd_create_frame_to_next_node (vm, rt, bi, bs, next_node,
is_echo ? &bm->tx_echo_counter :
- &bm->tx_counter);
+ &bm->tx_counter);
}
return rv;
}
int
-bfd_transport_udp6 (vlib_main_t *vm, u32 bi, const struct bfd_session_s *bs,
- int is_echo)
+bfd_transport_udp6 (vlib_main_t *vm, vlib_node_runtime_t *rt, u32 bi,
+ const struct bfd_session_s *bs, int is_echo)
{
u32 next_node;
int rv = bfd_udp_calc_next_node (bs, &next_node);
bfd_main_t *bm = bfd_udp_main.bfd_main;
if (rv)
{
- bfd_create_frame_to_next_node (
- vm, bfd_udp_main.bfd_main, bs, bi, next_node,
- is_echo ? &bm->tx_echo_counter : &bm->tx_counter);
+ bfd_create_frame_to_next_node (vm, rt, bi, bs, next_node,
+ is_echo ? &bm->tx_echo_counter :
+ &bm->tx_counter);
}
- return 1;
+ return rv;
}
static bfd_session_t *
@@ -1354,7 +1349,6 @@ bfd_udp4_input (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
/*
* bfd input graph node declaration
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (bfd_udp4_input_node, static) = {
.function = bfd_udp4_input,
.name = "bfd-udp4-input",
@@ -1375,7 +1369,6 @@ VLIB_REGISTER_NODE (bfd_udp4_input_node, static) = {
[BFD_UDP_INPUT_NEXT_REPLY_MIDCHAIN] = "ip4-midchain",
},
};
-/* *INDENT-ON* */
static uword
bfd_udp6_input (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
@@ -1383,7 +1376,6 @@ bfd_udp6_input (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
return bfd_udp_input (vm, rt, f, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (bfd_udp6_input_node, static) = {
.function = bfd_udp6_input,
.name = "bfd-udp6-input",
@@ -1404,7 +1396,6 @@ VLIB_REGISTER_NODE (bfd_udp6_input_node, static) = {
[BFD_UDP_INPUT_NEXT_REPLY_MIDCHAIN] = "ip6-midchain",
},
};
-/* *INDENT-ON* */
/*
* Process a frame of bfd echo packets
@@ -1509,7 +1500,6 @@ bfd_echo_input_format_trace (u8 * s, va_list * args)
/*
* bfd input graph node declaration
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (bfd_udp_echo4_input_node, static) = {
.function = bfd_udp_echo4_input,
.name = "bfd-udp-echo4-input",
@@ -1529,7 +1519,6 @@ VLIB_REGISTER_NODE (bfd_udp_echo4_input_node, static) = {
[BFD_UDP_ECHO_INPUT_NEXT_REPLY_REWRITE] = "ip4-lookup",
},
};
-/* *INDENT-ON* */
static uword
bfd_udp_echo6_input (vlib_main_t * vm, vlib_node_runtime_t * rt,
@@ -1538,7 +1527,6 @@ bfd_udp_echo6_input (vlib_main_t * vm, vlib_node_runtime_t * rt,
return bfd_udp_echo_input (vm, rt, f, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (bfd_udp_echo6_input_node, static) = {
.function = bfd_udp_echo6_input,
.name = "bfd-udp-echo6-input",
@@ -1559,7 +1547,6 @@ VLIB_REGISTER_NODE (bfd_udp_echo6_input_node, static) = {
},
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_udp_sw_if_add_del (CLIB_UNUSED (vnet_main_t *vnm), u32 sw_if_index,
@@ -1639,25 +1626,6 @@ bfd_udp_init (vlib_main_t * vm)
sizeof (bfd_udp_key_t));
bfd_udp_main.bfd_main = &bfd_main;
bfd_udp_main.vnet_main = vnet_get_main ();
- vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "ip4-arp");
- ASSERT (node);
- bfd_udp_main.ip4_arp_idx = node->index;
- node = vlib_get_node_by_name (vm, (u8 *) "ip6-discover-neighbor");
- ASSERT (node);
- bfd_udp_main.ip6_ndp_idx = node->index;
- node = vlib_get_node_by_name (vm, (u8 *) "ip4-rewrite");
- ASSERT (node);
- bfd_udp_main.ip4_rewrite_idx = node->index;
- node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite");
- ASSERT (node);
- bfd_udp_main.ip6_rewrite_idx = node->index;
- node = vlib_get_node_by_name (vm, (u8 *) "ip4-midchain");
- ASSERT (node);
- bfd_udp_main.ip4_midchain_idx = node->index;
- node = vlib_get_node_by_name (vm, (u8 *) "ip6-midchain");
- ASSERT (node);
- bfd_udp_main.ip6_midchain_idx = node->index;
-
bfd_udp_stats_init (&bfd_udp_main);
bfd_udp_main.log_class = vlib_log_register_class ("bfd", "udp");
diff --git a/src/vnet/bfd/bfd_udp.h b/src/vnet/bfd/bfd_udp.h
index 866b5868b00..8f4bfee2bd7 100644
--- a/src/vnet/bfd/bfd_udp.h
+++ b/src/vnet/bfd/bfd_udp.h
@@ -82,7 +82,7 @@ int bfd_add_udp6_transport (vlib_main_t * vm, u32 bi,
*
* @return 1 on success, 0 on failure
*/
-int bfd_transport_udp4 (vlib_main_t *vm, u32 bi,
+int bfd_transport_udp4 (vlib_main_t *vm, vlib_node_runtime_t *rt, u32 bi,
const struct bfd_session_s *bs, int is_echo);
/**
@@ -90,7 +90,7 @@ int bfd_transport_udp4 (vlib_main_t *vm, u32 bi,
*
* @return 1 on success, 0 on failure
*/
-int bfd_transport_udp6 (vlib_main_t *vm, u32 bi,
+int bfd_transport_udp6 (vlib_main_t *vm, vlib_node_runtime_t *rt, u32 bi,
const struct bfd_session_s *bs, int is_echo);
/**
diff --git a/src/vnet/bier/bier_update.c b/src/vnet/bier/bier_update.c
index 4108d09f51e..fdb7c5c0865 100644
--- a/src/vnet/bier/bier_update.c
+++ b/src/vnet/bier/bier_update.c
@@ -129,7 +129,14 @@ done:
VLIB_CLI_COMMAND (bier_route_command) = {
.path = "bier route",
- .short_help = "bier route [add|del] sd <sud-domain> set <set> bsl <bit-string-length> bp <bit-position> via [next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight <value>] [preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-connected] [rx-ip4 <interface>] [out-labels <value value value>]",
+ .short_help =
+ "bier route [add|del] sd <sud-domain> set <set> bsl <bit-string-length> "
+ "bp <bit-position> via [next-hop-address] [next-hop-interface] "
+ "[next-hop-table <value>] [weight <value>] [preference <value>] "
+ "[udp-encap-id <value>] [ip4-lookup-in-table <value>] "
+ "[ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] "
+ "[resolve-via-host] [resolve-via-connected] [rx-ip4|rx-ip6 <interface>] "
+ "[out-labels <value value value>]",
.function = vnet_bier_route_cmd,
};
diff --git a/src/vnet/bonding/bond_api.c b/src/vnet/bonding/bond_api.c
index 3fd73d7995f..d9287a8e23d 100644
--- a/src/vnet/bonding/bond_api.c
+++ b/src/vnet/bonding/bond_api.c
@@ -43,8 +43,11 @@ vl_api_bond_delete_t_handler (vl_api_bond_delete_t * mp)
vl_api_bond_delete_reply_t *rmp;
u32 sw_if_index = ntohl (mp->sw_if_index);
+ VALIDATE_SW_IF_INDEX (mp);
+
rv = bond_delete_if (vm, sw_if_index);
+ BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_BOND_DELETE_REPLY);
}
@@ -72,12 +75,10 @@ vl_api_bond_create_t_handler (vl_api_bond_create_t * mp)
int rv = ap->rv;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_BOND_CREATE_REPLY,
({
rmp->sw_if_index = ntohl (ap->sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -105,12 +106,10 @@ vl_api_bond_create2_t_handler (vl_api_bond_create2_t * mp)
int rv = ap->rv;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_BOND_CREATE2_REPLY,
({
rmp->sw_if_index = ntohl (ap->sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -168,6 +167,8 @@ static void
vl_api_sw_interface_set_bond_weight_reply_t *rmp;
int rv = 0;
+ VALIDATE_SW_IF_INDEX (mp);
+
clib_memset (ap, 0, sizeof (*ap));
ap->sw_if_index = ntohl (mp->sw_if_index);
@@ -176,6 +177,7 @@ static void
bond_set_intf_weight (vm, ap);
rv = ap->rv;
+ BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_SW_INTERFACE_SET_BOND_WEIGHT_REPLY);
}
@@ -187,12 +189,15 @@ vl_api_bond_detach_slave_t_handler (vl_api_bond_detach_slave_t * mp)
bond_detach_member_args_t _a, *ap = &_a;
int rv = 0;
+ VALIDATE_SW_IF_INDEX (mp);
+
clib_memset (ap, 0, sizeof (*ap));
ap->member = ntohl (mp->sw_if_index);
bond_detach_member (vm, ap);
rv = ap->rv;
+ BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_BOND_DETACH_SLAVE_REPLY);
}
@@ -204,12 +209,15 @@ vl_api_bond_detach_member_t_handler (vl_api_bond_detach_member_t * mp)
bond_detach_member_args_t _a, *ap = &_a;
int rv = 0;
+ VALIDATE_SW_IF_INDEX (mp);
+
clib_memset (ap, 0, sizeof (*ap));
ap->member = ntohl (mp->sw_if_index);
bond_detach_member (vm, ap);
rv = ap->rv;
+ BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_BOND_DETACH_MEMBER_REPLY);
}
diff --git a/src/vnet/bonding/cli.c b/src/vnet/bonding/cli.c
index b0ded4734dd..cdc935ff10f 100644
--- a/src/vnet/bonding/cli.c
+++ b/src/vnet/bonding/cli.c
@@ -183,7 +183,6 @@ bond_dump_ifs (bond_interface_details_t ** out_bondifs)
bond_interface_details_t *r_bondifs = NULL;
bond_interface_details_t *bondif = NULL;
- /* *INDENT-OFF* */
pool_foreach (bif, bm->interfaces) {
vec_add2(r_bondifs, bondif, 1);
clib_memset (bondif, 0, sizeof (*bondif));
@@ -201,7 +200,6 @@ bond_dump_ifs (bond_interface_details_t ** out_bondifs)
bondif->active_members = vec_len (bif->active_members);
bondif->members = vec_len (bif->members);
}
- /* *INDENT-ON* */
*out_bondifs = r_bondifs;
@@ -547,7 +545,6 @@ bond_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bond_create_command, static) = {
.path = "create bond",
.short_help = "create bond mode {round-robin | active-backup | broadcast | "
@@ -555,7 +552,6 @@ VLIB_CLI_COMMAND (bond_create_command, static) = {
"[hw-addr <mac-address>] [id <if-id>] [gso]",
.function = bond_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
bond_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -596,14 +592,12 @@ bond_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bond_delete__command, static) =
{
.path = "delete bond",
.short_help = "delete bond {<interface> | sw_if_index <sw_idx>}",
.function = bond_delete_command_fn,
};
-/* *INDENT-ON* */
void
bond_add_member (vlib_main_t * vm, bond_add_member_args_t * args)
@@ -823,14 +817,12 @@ add_member_interface_command_fn (vlib_main_t * vm, unformat_input_t * input,
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (add_member_interface_command, static) = {
.path = "bond add",
.short_help = "bond add <BondEthernetx> <member-interface> "
"[passive] [long-timeout]",
.function = add_member_interface_command_fn,
};
-/* *INDENT-ON* */
void
bond_detach_member (vlib_main_t * vm, bond_detach_member_args_t * args)
@@ -887,13 +879,11 @@ detach_interface_command_fn (vlib_main_t * vm, unformat_input_t * input,
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (detach_interface_command, static) = {
.path = "bond del",
.short_help = "bond del <member-interface>",
.function = detach_interface_command_fn,
};
-/* *INDENT-ON* */
static void
show_bond (vlib_main_t * vm)
@@ -905,7 +895,6 @@ show_bond (vlib_main_t * vm)
"interface name", "sw_if_index", "mode",
"load balance", "active members", "members");
- /* *INDENT-OFF* */
pool_foreach (bif, bm->interfaces)
{
vlib_cli_output (vm, "%-16U %-12d %-13U %-13U %-14u %u",
@@ -914,7 +903,6 @@ show_bond (vlib_main_t * vm)
format_bond_load_balance, bif->lb,
vec_len (bif->active_members), vec_len (bif->members));
}
- /* *INDENT-ON* */
}
static void
@@ -924,7 +912,6 @@ show_bond_details (vlib_main_t * vm)
bond_if_t *bif;
u32 *sw_if_index;
- /* *INDENT-OFF* */
pool_foreach (bif, bm->interfaces)
{
vlib_cli_output (vm, "%U", format_bond_interface_name, bif->dev_instance);
@@ -963,7 +950,6 @@ show_bond_details (vlib_main_t * vm)
vlib_cli_output (vm, " sw_if_index: %d", bif->sw_if_index);
vlib_cli_output (vm, " hw_if_index: %d", bif->hw_if_index);
}
- /* *INDENT-ON* */
}
static clib_error_t *
@@ -991,13 +977,11 @@ show_bond_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_bond_command, static) = {
.path = "show bond",
.short_help = "show bond [details]",
.function = show_bond_fn,
};
-/* *INDENT-ON* */
void
bond_set_intf_weight (vlib_main_t * vm, bond_set_intf_weight_args_t * args)
@@ -1097,14 +1081,12 @@ bond_set_intf_cmd (vlib_main_t * vm, unformat_input_t * input,
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(set_interface_bond_cmd, static) = {
.path = "set interface bond",
.short_help = "set interface bond <interface> | sw_if_index <idx>"
" weight <value>",
.function = bond_set_intf_cmd,
};
-/* *INDENT-ON* */
clib_error_t *
bond_cli_init (vlib_main_t * vm)
diff --git a/src/vnet/bonding/device.c b/src/vnet/bonding/device.c
index ca48585fa0a..a0b93fccde1 100644
--- a/src/vnet/bonding/device.c
+++ b/src/vnet/bonding/device.c
@@ -111,14 +111,6 @@ bond_set_l2_mode_function (vnet_main_t * vnm,
return 0;
}
-static __clib_unused clib_error_t *
-bond_subif_add_del_function (vnet_main_t * vnm, u32 hw_if_index,
- struct vnet_sw_interface_t *st, int is_add)
-{
- /* Nothing for now */
- return 0;
-}
-
static clib_error_t *
bond_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
{
@@ -616,16 +608,13 @@ bond_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (bond_process_node) = {
.function = bond_process,
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "bond-process",
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (bond_dev_class) = {
.name = "bond",
.tx_function_n_errors = BOND_TX_N_ERROR,
@@ -633,12 +622,10 @@ VNET_DEVICE_CLASS (bond_dev_class) = {
.format_device_name = format_bond_interface_name,
.set_l2_mode_function = bond_set_l2_mode_function,
.admin_up_down_function = bond_interface_admin_up_down,
- .subif_add_del_function = bond_subif_add_del_function,
.format_tx_trace = format_bond_tx_trace,
.mac_addr_add_del_function = bond_add_del_mac_address,
};
-/* *INDENT-ON* */
static clib_error_t *
bond_member_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
diff --git a/src/vnet/bonding/node.c b/src/vnet/bonding/node.c
index 21a968177fe..66de1e4dd80 100644
--- a/src/vnet/bonding/node.c
+++ b/src/vnet/bonding/node.c
@@ -397,7 +397,6 @@ bond_input_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (bond_input_node) = {
.name = "bond-input",
.vector_size = sizeof (u32),
@@ -421,7 +420,6 @@ VNET_FEATURE_INIT (bond_input, static) =
.node_name = "bond-input",
.runs_before = VNET_FEATURES ("ethernet-input"),
};
-/* *INDENT-ON* */
static clib_error_t *
bond_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h
index 144f62ac17a..2f34aa4b5fc 100644
--- a/src/vnet/buffer.h
+++ b/src/vnet/buffer.h
@@ -467,7 +467,7 @@ typedef struct
} qos;
u8 loop_counter;
- u8 __unused[5];
+ u8 pad[5]; /* unused */
/**
* The L4 payload size set on input on GSO enabled interfaces
diff --git a/src/vnet/classify/classify_api.c b/src/vnet/classify/classify_api.c
index 9353a647277..fc57b006d37 100644
--- a/src/vnet/classify/classify_api.c
+++ b/src/vnet/classify/classify_api.c
@@ -115,9 +115,8 @@ static void vl_api_classify_pcap_set_table_t_handler
u32 table_index = ntohl (mp->table_index);
u32 sw_if_index = ntohl (mp->sw_if_index);
- if (sw_if_index == ~0
- || sw_if_index >= vec_len (cm->classify_table_index_by_sw_if_index)
- || (table_index != ~0 && pool_is_free_index (cm->tables, table_index)))
+ if (sw_if_index == ~0 ||
+ (table_index != ~0 && pool_is_free_index (cm->tables, table_index)))
{
rv = VNET_API_ERROR_INVALID_VALUE;
goto out;
@@ -380,7 +379,6 @@ static void vl_api_classify_add_del_table_t_handler
current_data_flag, current_data_offset, mp->is_add, mp->del_chain);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_CLASSIFY_ADD_DEL_TABLE_REPLY,
({
if (rv == 0 && mp->is_add)
@@ -397,7 +395,6 @@ out:
rmp->new_table_index = ~0;
}
}));
- /* *INDENT-ON* */
}
static void vl_api_classify_add_del_session_t_handler
@@ -534,12 +531,10 @@ vl_api_classify_table_ids_t_handler (vl_api_classify_table_ids_t * mp)
u32 *table_ids = 0;
u32 count;
- /* *INDENT-OFF* */
pool_foreach (t, cm->tables)
{
vec_add1 (table_ids, ntohl(t - cm->tables));
}
- /* *INDENT-ON* */
count = vec_len (table_ids);
vl_api_classify_table_ids_reply_t *rmp;
@@ -596,7 +591,6 @@ static void
BAD_SW_IF_INDEX_LABEL;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_CLASSIFY_TABLE_BY_INTERFACE_REPLY,
({
rmp->sw_if_index = ntohl(sw_if_index);
@@ -604,7 +598,6 @@ static void
rmp->ip4_table_id = ntohl(acl[IN_OUT_ACL_TABLE_IP4]);
rmp->ip6_table_id = ntohl(acl[IN_OUT_ACL_TABLE_IP6]);
}));
- /* *INDENT-ON* */
vec_free (acl);
}
@@ -695,7 +688,6 @@ vl_api_classify_session_dump_t_handler (vl_api_classify_session_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (t, cm->tables)
{
if (table_id == t - cm->tables)
@@ -729,7 +721,6 @@ vl_api_classify_session_dump_t_handler (vl_api_classify_session_dump_t * mp)
break;
}
}
- /* *INDENT-ON* */
}
static void
diff --git a/src/vnet/classify/flow_classify.c b/src/vnet/classify/flow_classify.c
index afdadc66235..7197558a77a 100644
--- a/src/vnet/classify/flow_classify.c
+++ b/src/vnet/classify/flow_classify.c
@@ -150,7 +150,6 @@ set_flow_classify_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_input_acl_command, static) = {
.path = "set flow classify",
.short_help =
@@ -158,7 +157,6 @@ VLIB_CLI_COMMAND (set_input_acl_command, static) = {
" [ip6-table <index>] [del]",
.function = set_flow_classify_command_fn,
};
-/* *INDENT-ON* */
static uword
unformat_table_type (unformat_input_t * input, va_list * va)
@@ -215,13 +213,11 @@ show_flow_classify_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_flow_classify_command, static) = {
.path = "show classify flow",
.short_help = "show classify flow type [ip4|ip6]",
.function = show_flow_classify_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/classify/flow_classify_node.c b/src/vnet/classify/flow_classify_node.c
index c0a29992fb4..a34bab6190b 100644
--- a/src/vnet/classify/flow_classify_node.c
+++ b/src/vnet/classify/flow_classify_node.c
@@ -279,7 +279,6 @@ VLIB_NODE_FN (ip4_flow_classify_node) (vlib_main_t * vm,
return flow_classify_inline (vm, node, frame, FLOW_CLASSIFY_TABLE_IP4);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_flow_classify_node) = {
.name = "ip4-flow-classify",
.vector_size = sizeof (u32),
@@ -291,7 +290,6 @@ VLIB_REGISTER_NODE (ip4_flow_classify_node) = {
[FLOW_CLASSIFY_NEXT_INDEX_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip6_flow_classify_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -300,7 +298,6 @@ VLIB_NODE_FN (ip6_flow_classify_node) (vlib_main_t * vm,
return flow_classify_inline (vm, node, frame, FLOW_CLASSIFY_TABLE_IP6);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_flow_classify_node) = {
.name = "ip6-flow-classify",
.vector_size = sizeof (u32),
@@ -313,7 +310,6 @@ VLIB_REGISTER_NODE (ip6_flow_classify_node) = {
},
};
-/* *INDENT-ON* */
static clib_error_t *
diff --git a/src/vnet/classify/in_out_acl.c b/src/vnet/classify/in_out_acl.c
index 752305e1cc2..af765139332 100644
--- a/src/vnet/classify/in_out_acl.c
+++ b/src/vnet/classify/in_out_acl.c
@@ -255,7 +255,6 @@ set_output_acl_command_fn (vlib_main_t * vm,
* Note: Only one table index per API call is allowed.
*
*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_input_acl_command, static) = {
.path = "set interface input acl",
.short_help =
@@ -271,7 +270,6 @@ VLIB_CLI_COMMAND (set_output_acl_command, static) = {
" [ip6-table <index>] [l2-table <index>] [del]",
.function = set_output_acl_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
in_out_acl_init (vlib_main_t * vm)
@@ -284,12 +282,10 @@ in_out_acl_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (in_out_acl_init) =
{
.runs_after = VLIB_INITS("ip_in_out_acl_init"),
};
-/* *INDENT-ON* */
uword
unformat_acl_type (unformat_input_t * input, va_list * args)
@@ -392,7 +388,6 @@ show_outacl_command_fn (vlib_main_t * vm,
IN_OUT_ACL_OUTPUT_TABLE_GROUP);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_inacl_command, static) = {
.path = "show inacl",
.short_help = "show inacl type [ip4|ip6|l2]",
@@ -403,7 +398,6 @@ VLIB_CLI_COMMAND (show_outacl_command, static) = {
.short_help = "show outacl type [ip4|ip6|l2]",
.function = show_outacl_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/classify/ip_classify.c b/src/vnet/classify/ip_classify.c
index 9454ae91937..e8562c6912c 100644
--- a/src/vnet/classify/ip_classify.c
+++ b/src/vnet/classify/ip_classify.c
@@ -309,7 +309,6 @@ VLIB_NODE_FN (ip4_classify_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_classify_node) = {
.name = "ip4-classify",
.vector_size = sizeof (u32),
@@ -320,7 +319,6 @@ VLIB_REGISTER_NODE (ip4_classify_node) = {
.n_next_nodes = 0,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip6_classify_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -330,7 +328,6 @@ VLIB_NODE_FN (ip6_classify_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_classify_node) = {
.name = "ip6-classify",
.vector_size = sizeof (u32),
@@ -341,7 +338,6 @@ VLIB_REGISTER_NODE (ip6_classify_node) = {
.n_next_nodes = 0,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
static clib_error_t *
diff --git a/src/vnet/classify/pcap_classify.h b/src/vnet/classify/pcap_classify.h
index e079816f62c..a4ebcd1241c 100644
--- a/src/vnet/classify/pcap_classify.h
+++ b/src/vnet/classify/pcap_classify.h
@@ -47,11 +47,11 @@ vnet_is_packet_pcaped (vnet_pcap_t *pp, vlib_buffer_t *b, u32 sw_if_index)
return 0; /* wrong error */
if (filter_classify_table_index != ~0 &&
- vnet_is_packet_traced_inline (b, filter_classify_table_index,
- 0 /* full classify */) != 1)
+ pp->current_filter_function (b, filter_classify_table_index,
+ 0 /* full classify */) != 1)
return 0; /* not matching the filter, skip */
- return 1; /* success */
+ return 1;
}
/*
diff --git a/src/vnet/classify/policer_classify.c b/src/vnet/classify/policer_classify.c
index 4cf12a24e9e..814adefc987 100644
--- a/src/vnet/classify/policer_classify.c
+++ b/src/vnet/classify/policer_classify.c
@@ -164,7 +164,6 @@ set_policer_classify_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_policer_classify_command, static) = {
.path = "set policer classify",
.short_help =
@@ -172,7 +171,6 @@ VLIB_CLI_COMMAND (set_policer_classify_command, static) = {
" [ip6-table <index>] [l2-table <index>] [del]",
.function = set_policer_classify_command_fn,
};
-/* *INDENT-ON* */
static uword
unformat_table_type (unformat_input_t * input, va_list * va)
@@ -231,13 +229,11 @@ show_policer_classify_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_policer_classify_command, static) = {
.path = "show classify policer",
.short_help = "show classify policer type [ip4|ip6|l2]",
.function = show_policer_classify_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/classify/trace_classify.h b/src/vnet/classify/trace_classify.h
index bc25ecd0ff7..03421210d03 100644
--- a/src/vnet/classify/trace_classify.h
+++ b/src/vnet/classify/trace_classify.h
@@ -29,6 +29,8 @@
* @param u32 classify_table_index - classifier table index
* @return 0 => no trace, 1 => trace, -1 => error
*/
+int vnet_is_packet_traced (vlib_buffer_t *b, u32 classify_table_index,
+ int func);
static inline int
vnet_is_packet_traced_inline (vlib_buffer_t * b,
@@ -43,6 +45,9 @@ vnet_is_packet_traced_inline (vlib_buffer_t * b,
if (func != 0)
return -1;
+ if (classify_table_index == ~0)
+ return -1;
+
/* This will happen... */
if (pool_is_free_index (vcm->tables, classify_table_index))
return -1;
diff --git a/src/vnet/classify/vnet_classify.c b/src/vnet/classify/vnet_classify.c
index 305521be267..77c1c81f9c4 100644
--- a/src/vnet/classify/vnet_classify.c
+++ b/src/vnet/classify/vnet_classify.c
@@ -640,12 +640,10 @@ unlock:
return rv;
}
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct {
ethernet_header_t eh;
ip4_header_t ip;
}) classify_data_or_mask_t;
-/* *INDENT-ON* */
u32
vnet_classify_hash_packet (const vnet_classify_table_t *t, u8 *h)
@@ -777,8 +775,10 @@ vnet_classify_add_del_table (vnet_classify_main_t *cm, const u8 *mask,
else /* update */
{
vnet_classify_main_t *cm = &vnet_classify_main;
- t = pool_elt_at_index (cm->tables, *table_index);
+ if (pool_is_free_index (cm->tables, *table_index))
+ return VNET_API_ERROR_CLASSIFY_TABLE_NOT_FOUND;
+ t = pool_elt_at_index (cm->tables, *table_index);
t->next_table_index = next_table_index;
}
return 0;
@@ -1331,12 +1331,11 @@ unformat_classify_mask (unformat_input_t * input, va_list * args)
return 0;
}
-#define foreach_l2_input_next \
-_(drop, DROP) \
-_(ethernet, ETHERNET_INPUT) \
-_(ip4, IP4_INPUT) \
-_(ip6, IP6_INPUT) \
-_(li, LI)
+#define foreach_l2_input_next \
+ _ (drop, DROP) \
+ _ (ethernet, ETHERNET_INPUT) \
+ _ (ip4, IP4_INPUT) \
+ _ (ip6, IP6_INPUT)
uword
unformat_l2_input_next_index (unformat_input_t * input, va_list * args)
@@ -1636,7 +1635,6 @@ classify_table_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (classify_table, static) =
{
.path = "classify table",
@@ -1648,7 +1646,6 @@ VLIB_CLI_COMMAND (classify_table, static) =
"\n [del] [del-chain]",
.function = classify_table_command_fn,
};
-/* *INDENT-ON* */
static int
filter_table_mask_compare (void *a1, void *a2)
@@ -2052,7 +2049,7 @@ vlib_enable_disable_pkt_trace_filter (int enable)
/*?
* Construct an arbitrary set of packet classifier tables for use with
- * "pcap rx | tx trace," and with the vpp packet tracer
+ * "pcap trace rx | tx," and with the vpp packet tracer
*
* Packets which match a rule in the classifier table chain
* will be traced. The tables are automatically ordered so that
@@ -2095,10 +2092,10 @@ vlib_enable_disable_pkt_trace_filter (int enable)
* @cliexpar
* Configuring the classify filter
*
- * Configure a simple classify filter, and configure pcap rx trace to use it:
+ * Configure a simple classify filter, and configure pcap trace rx to use it:
*
* @cliexcmd{classify filter rx mask l3 ip4 src match l3 ip4 src 192.168.1.11}
- * <b><em>pcap rx trace on max 100 filter</em></b>
+ * <b><em>pcap trace rx max 100 filter</em></b>
*
* Configure another fairly simple filter
*
@@ -2124,7 +2121,6 @@ vlib_enable_disable_pkt_trace_filter (int enable)
* The verbose form displays all of the match rules, with hit-counters
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (classify_filter, static) =
{
.path = "classify filter",
@@ -2134,7 +2130,6 @@ VLIB_CLI_COMMAND (classify_filter, static) =
" [buckets <nn>] [memory-size <n>]",
.function = classify_filter_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_classify_filter_command_fn (vlib_main_t * vm,
@@ -2214,14 +2209,12 @@ show_classify_filter_command_fn (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_classify_filter, static) =
{
.path = "show classify filter",
.short_help = "show classify filter [verbose [nn]]",
.function = show_classify_filter_command_fn,
};
-/* *INDENT-ON* */
u8 *
format_vnet_classify_table (u8 *s, va_list *args)
@@ -2284,13 +2277,11 @@ show_classify_tables_command_fn (vlib_main_t * vm,
break;
}
- /* *INDENT-OFF* */
pool_foreach (t, cm->tables)
{
if (match_index == ~0 || (match_index == t - cm->tables))
vec_add1 (indices, t - cm->tables);
}
- /* *INDENT-ON* */
if (vec_len (indices))
{
@@ -2310,13 +2301,11 @@ show_classify_tables_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_classify_table_command, static) = {
.path = "show classify tables",
.short_help = "show classify tables [index <nn>]",
.function = show_classify_tables_command_fn,
};
-/* *INDENT-ON* */
uword
unformat_l4_match (unformat_input_t * input, va_list * args)
@@ -2783,9 +2772,9 @@ unformat_classify_match (unformat_input_t * input, va_list * args)
int
vnet_classify_add_del_session (vnet_classify_main_t *cm, u32 table_index,
- const u8 *match, u32 hit_next_index,
+ const u8 *match, u16 hit_next_index,
u32 opaque_index, i32 advance, u8 action,
- u16 metadata, int is_add)
+ u32 metadata, int is_add)
{
vnet_classify_table_t *t;
vnet_classify_entry_5_t _max_e __attribute__ ((aligned (16)));
@@ -2929,7 +2918,6 @@ classify_session_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (classify_session_command, static) = {
.path = "classify session",
.short_help =
@@ -2939,7 +2927,6 @@ VLIB_CLI_COMMAND (classify_session_command, static) = {
"\n [action set-ip4-fib-id|set-ip6-fib-id|set-sr-policy-index <n>] [del]",
.function = classify_session_command_fn,
};
-/* *INDENT-ON* */
static uword
unformat_opaque_sw_if_index (unformat_input_t * input, va_list * args)
@@ -3083,7 +3070,12 @@ vnet_is_packet_traced (vlib_buffer_t * b, u32 classify_table_index, int func)
{
return vnet_is_packet_traced_inline (b, classify_table_index, func);
}
-
+VLIB_REGISTER_TRACE_FILTER_FUNCTION (vnet_is_packet_traced_fn, static) = {
+ .name = "vnet_is_packet_traced",
+ .description = "classifier based filter",
+ .priority = 50,
+ .function = vnet_is_packet_traced
+};
#define TEST_CODE 0
@@ -3352,7 +3344,6 @@ test_classify_command_fn (vlib_main_t * vm,
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_classify_command, static) = {
.path = "test classify",
.short_help =
@@ -3361,7 +3352,6 @@ VLIB_CLI_COMMAND (test_classify_command, static) = {
" [churn-test]",
.function = test_classify_command_fn,
};
-/* *INDENT-ON* */
#endif /* TEST_CODE */
/*
diff --git a/src/vnet/classify/vnet_classify.h b/src/vnet/classify/vnet_classify.h
index 143833dfb20..768593c45af 100644
--- a/src/vnet/classify/vnet_classify.h
+++ b/src/vnet/classify/vnet_classify.h
@@ -89,15 +89,17 @@ typedef struct _vnet_classify_entry
/* last heard time */
f64 last_heard;
+ u32 metadata;
+
+ /* Graph node next index */
+ u16 next_index;
+
+ vnet_classify_action_t action;
+
/* Really only need 1 bit */
u8 flags;
#define VNET_CLASSIFY_ENTRY_FREE (1<<0)
- vnet_classify_action_t action;
- u16 metadata;
- /* Graph node next index */
- u32 next_index;
-
/* Must be aligned to a 16-octet boundary */
u32x4 key[0];
} vnet_classify_entry_t;
@@ -586,9 +588,9 @@ vnet_classify_table_t *vnet_classify_new_table (vnet_classify_main_t *cm,
u32 match_n_vectors);
int vnet_classify_add_del_session (vnet_classify_main_t *cm, u32 table_index,
- const u8 *match, u32 hit_next_index,
+ const u8 *match, u16 hit_next_index,
u32 opaque_index, i32 advance, u8 action,
- u16 metadata, int is_add);
+ u32 metadata, int is_add);
int vnet_classify_add_del_table (vnet_classify_main_t *cm, const u8 *mask,
u32 nbuckets, u32 memory_size, u32 skip,
diff --git a/src/vnet/crypto/cli.c b/src/vnet/crypto/cli.c
index 4ee14ac1100..2ca66f228c3 100644
--- a/src/vnet/crypto/cli.c
+++ b/src/vnet/crypto/cli.c
@@ -36,16 +36,13 @@ show_crypto_engines_command_fn (vlib_main_t * vm,
}
vlib_cli_output (vm, "%-20s%-8s%s", "Name", "Prio", "Description");
- /* *INDENT-OFF* */
vec_foreach (p, cm->engines)
{
vlib_cli_output (vm, "%-20s%-8u%s", p->name, p->priority, p->desc);
}
- /* *INDENT-ON* */
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_crypto_engines_command, static) =
{
.path = "show crypto engines",
@@ -145,20 +142,18 @@ show_crypto_handlers_command_fn (vlib_main_t * vm,
"Chained");
for (i = 0; i < VNET_CRYPTO_N_ALGS; i++)
- vlib_cli_output (vm, "%-16U%U", format_vnet_crypto_alg, i,
+ vlib_cli_output (vm, "%-20U%U", format_vnet_crypto_alg, i,
format_vnet_crypto_handlers, i);
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_crypto_handlers_command, static) =
{
.path = "show crypto handlers",
.short_help = "show crypto handlers",
.function = show_crypto_handlers_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
set_crypto_handler_command_fn (vlib_main_t * vm,
@@ -209,13 +204,11 @@ set_crypto_handler_command_fn (vlib_main_t * vm,
char *key;
u8 *value;
- /* *INDENT-OFF* */
hash_foreach_mem (key, value, cm->alg_index_by_name,
({
(void) value;
rc += vnet_crypto_set_handler2 (key, engine, oct);
}));
- /* *INDENT-ON* */
if (rc)
vlib_cli_output (vm, "failed to set crypto engine!");
@@ -241,7 +234,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_crypto_handler_command, static) =
{
.path = "set crypto handler",
@@ -249,7 +241,6 @@ VLIB_CLI_COMMAND (set_crypto_handler_command, static) =
" [simple|chained]",
.function = set_crypto_handler_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_vnet_crypto_async_handlers (u8 * s, va_list * args)
@@ -300,14 +291,12 @@ show_crypto_async_handlers_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_crypto_async_handlers_command, static) =
{
.path = "show crypto async handlers",
.short_help = "show crypto async handlers",
.function = show_crypto_async_handlers_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -316,7 +305,6 @@ show_crypto_async_status_command_fn (vlib_main_t * vm,
vlib_cli_command_t * cmd)
{
vnet_crypto_main_t *cm = &crypto_main;
- u32 skip_master = vlib_num_workers () > 0;
vlib_thread_main_t *tm = vlib_get_thread_main ();
unformat_input_t _line_input, *line_input = &_line_input;
int i;
@@ -324,12 +312,7 @@ show_crypto_async_status_command_fn (vlib_main_t * vm,
if (unformat_user (input, unformat_line_input, line_input))
unformat_free (line_input);
- vlib_cli_output (vm, "Crypto async dispatch mode: %s",
- cm->dispatch_mode ==
- VNET_CRYPTO_ASYNC_DISPATCH_POLLING ? "POLLING" :
- "INTERRUPT");
-
- for (i = skip_master; i < tm->n_vlib_mains; i++)
+ for (i = 0; i < tm->n_vlib_mains; i++)
{
vlib_node_state_t state = vlib_node_get_state (
vlib_get_main_by_index (i), cm->crypto_node_index);
@@ -343,14 +326,12 @@ show_crypto_async_status_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_crypto_async_status_command, static) =
{
.path = "show crypto async status",
.short_help = "show crypto async status",
.function = show_crypto_async_status_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
set_crypto_async_handler_command_fn (vlib_main_t * vm,
@@ -394,13 +375,11 @@ set_crypto_async_handler_command_fn (vlib_main_t * vm,
char *key;
u8 *value;
- /* *INDENT-OFF* */
hash_foreach_mem (key, value, cm->async_alg_index_by_name,
({
(void) value;
rc += vnet_crypto_set_async_handler2 (key, engine);
}));
- /* *INDENT-ON* */
if (rc)
vlib_cli_output (vm, "failed to set crypto engine!");
@@ -426,57 +405,52 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_crypto_async_handler_command, static) =
{
.path = "set crypto async handler",
.short_help = "set crypto async handler type [type2 type3 ...] engine",
.function = set_crypto_async_handler_command_fn,
};
-/* *INDENT-ON* */
-
-static inline void
-print_crypto_async_dispatch_warning ()
-{
- clib_warning ("Switching dispatch mode might not work is some situations.");
- clib_warning
- ("Use 'show crypto async status' to verify that the nodes' states were set");
- clib_warning ("and if not, set 'crypto async dispatch' mode again.");
-}
static clib_error_t *
-set_crypto_async_dispatch_polling_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
+set_crypto_async_dispatch_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
{
- print_crypto_async_dispatch_warning ();
- vnet_crypto_set_async_dispatch_mode (VNET_CRYPTO_ASYNC_DISPATCH_POLLING);
- return 0;
-}
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+ u8 adaptive = 0;
+ u8 mode = VLIB_NODE_STATE_INTERRUPT;
-static clib_error_t *
-set_crypto_async_dispatch_interrupt_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- print_crypto_async_dispatch_warning ();
- vnet_crypto_set_async_dispatch_mode (VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT);
- return 0;
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "polling"))
+ mode = VLIB_NODE_STATE_POLLING;
+ else if (unformat (line_input, "interrupt"))
+ mode = VLIB_NODE_STATE_INTERRUPT;
+ else if (unformat (line_input, "adaptive"))
+ adaptive = 1;
+ else
+ {
+ error = clib_error_return (0, "invalid params");
+ goto done;
+ }
+ }
+
+ vnet_crypto_set_async_dispatch (mode, adaptive);
+done:
+ unformat_free (line_input);
+ return error;
}
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (set_crypto_async_dispatch_polling_command, static) =
-{
- .path = "set crypto async dispatch polling",
- .short_help = "set crypto async dispatch polling|interrupt",
- .function = set_crypto_async_dispatch_polling_command_fn,
-};
-VLIB_CLI_COMMAND (set_crypto_async_dispatch_interrupt_command, static) =
-{
- .path = "set crypto async dispatch interrupt",
- .short_help = "set crypto async dispatch polling|interrupt",
- .function = set_crypto_async_dispatch_interrupt_command_fn,
+VLIB_CLI_COMMAND (set_crypto_async_dispatch_mode_command, static) = {
+ .path = "set crypto async dispatch mode",
+ .short_help = "set crypto async dispatch mode <polling|interrupt|adaptive>",
+ .function = set_crypto_async_dispatch_command_fn,
};
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/crypto/crypto.api b/src/vnet/crypto/crypto.api
index 6eccd8524ba..8fec805dcfc 100644
--- a/src/vnet/crypto/crypto.api
+++ b/src/vnet/crypto/crypto.api
@@ -28,7 +28,8 @@ enum crypto_op_class_type:u8
CRYPTO_API_OP_BOTH,
};
- /** \brief crypto: use polling or interrupt dispatch
+ /** \brief crypto: Use polling or interrupt dispatch.
+ Always unset the adaptive flag (that is why it is deprecated).
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@param mode - dispatch mode
@@ -36,11 +37,29 @@ enum crypto_op_class_type:u8
autoreply define crypto_set_async_dispatch
{
+ option deprecated;
+ option replaced_by="crypto_set_async_dispatch_v2";
u32 client_index;
u32 context;
vl_api_crypto_dispatch_mode_t mode;
};
+ /** \brief crypto: Change the way crypto operations are dispatched.
+ Use adaptive (or not) mode, starting in polling or interrupt state.
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param mode - dispatch initial state
+ @param adaptive - whether on not the state shall change depending on load
+*/
+
+autoreply define crypto_set_async_dispatch_v2
+{
+ u32 client_index;
+ u32 context;
+ vl_api_crypto_dispatch_mode_t mode;
+ bool adaptive;
+};
+
/** \brief crypto: set crypto handler
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/vnet/crypto/crypto.c b/src/vnet/crypto/crypto.c
index 1c724a346c2..c8e7ca90c9d 100644
--- a/src/vnet/crypto/crypto.c
+++ b/src/vnet/crypto/crypto.c
@@ -192,13 +192,16 @@ vnet_crypto_is_set_handler (vnet_crypto_alg_t alg)
vnet_crypto_op_id_t opt = 0;
int i;
- if (alg > vec_len (cm->algs))
+ if (alg >= vec_len (cm->algs))
return 0;
for (i = 0; i < VNET_CRYPTO_OP_N_TYPES; i++)
if ((opt = cm->algs[alg].op_by_type[i]) != 0)
break;
+ if (opt >= vec_len (cm->ops_handlers))
+ return 0;
+
return NULL != cm->ops_handlers[opt];
}
@@ -284,8 +287,6 @@ vnet_crypto_register_enqueue_handler (vlib_main_t *vm, u32 engine_index,
vnet_crypto_async_op_data_t *otd = cm->async_opt_data + opt;
vec_validate_aligned (cm->enqueue_handlers, VNET_CRYPTO_ASYNC_OP_N_IDS,
CLIB_CACHE_LINE_BYTES);
- vec_validate_aligned (cm->dequeue_handlers, VNET_CRYPTO_ASYNC_OP_N_IDS,
- CLIB_CACHE_LINE_BYTES);
if (!enqueue_hdl)
return;
@@ -370,6 +371,8 @@ vnet_crypto_register_dequeue_handler (vlib_main_t *vm, u32 engine_index,
e->dequeue_handler = deq_fn;
+ vnet_crypto_update_cm_dequeue_handlers ();
+
return;
}
@@ -446,11 +449,9 @@ vnet_crypto_key_add (vlib_main_t * vm, vnet_crypto_alg_t alg, u8 * data,
key->alg = alg;
vec_validate_aligned (key->data, length - 1, CLIB_CACHE_LINE_BYTES);
clib_memcpy (key->data, data, length);
- /* *INDENT-OFF* */
vec_foreach (engine, cm->engines)
if (engine->key_op_handler)
engine->key_op_handler (vm, VNET_CRYPTO_KEY_OP_ADD, index);
- /* *INDENT-ON* */
return index;
}
@@ -461,25 +462,34 @@ vnet_crypto_key_del (vlib_main_t * vm, vnet_crypto_key_index_t index)
vnet_crypto_engine_t *engine;
vnet_crypto_key_t *key = pool_elt_at_index (cm->keys, index);
- /* *INDENT-OFF* */
vec_foreach (engine, cm->engines)
if (engine->key_op_handler)
engine->key_op_handler (vm, VNET_CRYPTO_KEY_OP_DEL, index);
- /* *INDENT-ON* */
if (key->type == VNET_CRYPTO_KEY_TYPE_DATA)
{
- clib_memset (key->data, 0, vec_len (key->data));
+ clib_memset (key->data, 0xfe, vec_len (key->data));
vec_free (key->data);
}
else if (key->type == VNET_CRYPTO_KEY_TYPE_LINK)
{
- key->index_crypto = key->index_integ = 0;
+ key->index_crypto = key->index_integ = ~0;
}
pool_put (cm->keys, key);
}
+void
+vnet_crypto_key_update (vlib_main_t *vm, vnet_crypto_key_index_t index)
+{
+ vnet_crypto_main_t *cm = &crypto_main;
+ vnet_crypto_engine_t *engine;
+
+ vec_foreach (engine, cm->engines)
+ if (engine->key_op_handler)
+ engine->key_op_handler (vm, VNET_CRYPTO_KEY_OP_MODIFY, index);
+}
+
vnet_crypto_async_alg_t
vnet_crypto_link_algs (vnet_crypto_alg_t crypto_alg,
vnet_crypto_alg_t integ_alg)
@@ -518,50 +528,13 @@ vnet_crypto_key_add_linked (vlib_main_t * vm,
key->index_integ = index_integ;
key->async_alg = linked_alg;
- /* *INDENT-OFF* */
vec_foreach (engine, cm->engines)
if (engine->key_op_handler)
engine->key_op_handler (vm, VNET_CRYPTO_KEY_OP_ADD, index);
- /* *INDENT-ON* */
return index;
}
-clib_error_t *
-crypto_dispatch_enable_disable (int is_enable)
-{
- vnet_crypto_main_t *cm = &crypto_main;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- u32 skip_master = vlib_num_workers () > 0, i;
- vlib_node_state_t state = VLIB_NODE_STATE_DISABLED;
- u8 state_change = 0;
-
- CLIB_MEMORY_STORE_BARRIER ();
- if (is_enable && cm->async_refcnt > 0)
- {
- state_change = 1;
- state =
- cm->dispatch_mode ==
- VNET_CRYPTO_ASYNC_DISPATCH_POLLING ? VLIB_NODE_STATE_POLLING :
- VLIB_NODE_STATE_INTERRUPT;
- }
-
- if (!is_enable && cm->async_refcnt == 0)
- {
- state_change = 1;
- state = VLIB_NODE_STATE_DISABLED;
- }
-
- if (state_change)
- for (i = skip_master; i < tm->n_vlib_mains; i++)
- {
- vlib_main_t *ovm = vlib_get_main_by_index (i);
- if (state != vlib_node_get_state (ovm, cm->crypto_node_index))
- vlib_node_set_state (ovm, cm->crypto_node_index, state);
- }
- return 0;
-}
-
static_always_inline void
crypto_set_active_async_engine (vnet_crypto_async_op_data_t * od,
vnet_crypto_async_op_id_t id, u32 ei)
@@ -573,7 +546,6 @@ crypto_set_active_async_engine (vnet_crypto_async_op_data_t * od,
{
od->active_engine_index_async = ei;
cm->enqueue_handlers[id] = ce->enqueue_handlers[id];
- cm->dequeue_handlers[id] = ce->dequeue_handler;
}
}
@@ -585,9 +557,6 @@ vnet_crypto_set_async_handler2 (char *alg_name, char *engine)
vnet_crypto_async_alg_data_t *ad;
int i;
- if (cm->async_refcnt)
- return -EBUSY;
-
p = hash_get_mem (cm->async_alg_index_by_name, alg_name);
if (!p)
return -1;
@@ -626,13 +595,11 @@ vnet_crypto_register_post_node (vlib_main_t * vm, char *post_node_name)
if (!pn)
return ~0;
- /* *INDENT-OFF* */
- vec_foreach (cm->next_nodes, nn)
- {
- if (nn->node_idx == pn->index)
- return nn->next_idx;
- }
- /* *INDENT-ON* */
+ vec_foreach (nn, cm->next_nodes)
+ {
+ if (nn->node_idx == pn->index)
+ return nn->next_idx;
+ }
vec_validate (cm->next_nodes, index);
nn = vec_elt_at_index (cm->next_nodes, index);
@@ -645,76 +612,19 @@ vnet_crypto_register_post_node (vlib_main_t * vm, char *post_node_name)
}
void
-vnet_crypto_request_async_mode (int is_enable)
-{
- vnet_crypto_main_t *cm = &crypto_main;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- u32 skip_master = vlib_num_workers () > 0, i;
- vlib_node_state_t state = VLIB_NODE_STATE_DISABLED;
- u8 state_change = 0;
-
- CLIB_MEMORY_STORE_BARRIER ();
- if (is_enable && cm->async_refcnt == 0)
- {
- state_change = 1;
- state =
- cm->dispatch_mode == VNET_CRYPTO_ASYNC_DISPATCH_POLLING ?
- VLIB_NODE_STATE_POLLING : VLIB_NODE_STATE_INTERRUPT;
- }
- if (!is_enable && cm->async_refcnt == 1)
- {
- state_change = 1;
- state = VLIB_NODE_STATE_DISABLED;
- }
-
- if (state_change)
- {
-
- for (i = skip_master; i < tm->n_vlib_mains; i++)
- {
- vlib_main_t *ovm = vlib_get_main_by_index (i);
- if (state != vlib_node_get_state (ovm, cm->crypto_node_index))
- vlib_node_set_state (ovm, cm->crypto_node_index, state);
- }
-
- if (is_enable)
- vnet_crypto_update_cm_dequeue_handlers ();
- }
-
- if (is_enable)
- cm->async_refcnt += 1;
- else if (cm->async_refcnt > 0)
- cm->async_refcnt -= 1;
-}
-
-void
-vnet_crypto_set_async_dispatch_mode (u8 mode)
+vnet_crypto_set_async_dispatch (u8 mode, u8 adaptive)
{
- vnet_crypto_main_t *cm = &crypto_main;
- u32 skip_master = vlib_num_workers () > 0, i;
vlib_thread_main_t *tm = vlib_get_thread_main ();
- vlib_node_state_t state = VLIB_NODE_STATE_DISABLED;
+ u32 i, node_index = crypto_main.crypto_node_index;
+ vlib_node_state_t state =
+ mode ? VLIB_NODE_STATE_INTERRUPT : VLIB_NODE_STATE_POLLING;
- CLIB_MEMORY_STORE_BARRIER ();
- cm->dispatch_mode = mode;
- if (mode == VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT)
- {
- state =
- cm->async_refcnt == 0 ?
- VLIB_NODE_STATE_DISABLED : VLIB_NODE_STATE_INTERRUPT;
- }
- else if (mode == VNET_CRYPTO_ASYNC_DISPATCH_POLLING)
- {
- state =
- cm->async_refcnt == 0 ?
- VLIB_NODE_STATE_DISABLED : VLIB_NODE_STATE_POLLING;
- }
-
- for (i = skip_master; i < tm->n_vlib_mains; i++)
+ for (i = vlib_num_workers () > 0; i < tm->n_vlib_mains; i++)
{
vlib_main_t *ovm = vlib_get_main_by_index (i);
- if (state != vlib_node_get_state (ovm, cm->crypto_node_index))
- vlib_node_set_state (ovm, cm->crypto_node_index, state);
+ vlib_node_set_state (ovm, node_index, state);
+ vlib_node_set_flag (ovm, node_index, VLIB_NODE_FLAG_ADAPTIVE_MODE,
+ adaptive);
}
}
@@ -813,15 +723,13 @@ vnet_crypto_init (vlib_main_t * vm)
vlib_thread_main_t *tm = vlib_get_thread_main ();
vnet_crypto_thread_t *ct = 0;
- cm->dispatch_mode = VNET_CRYPTO_ASYNC_DISPATCH_POLLING;
cm->engine_index_by_name = hash_create_string ( /* size */ 0,
sizeof (uword));
cm->alg_index_by_name = hash_create_string (0, sizeof (uword));
cm->async_alg_index_by_name = hash_create_string (0, sizeof (uword));
vec_validate_aligned (cm->threads, tm->n_vlib_mains, CLIB_CACHE_LINE_BYTES);
vec_foreach (ct, cm->threads)
- pool_alloc_aligned (ct->frame_pool, VNET_CRYPTO_FRAME_POOL_SIZE,
- CLIB_CACHE_LINE_BYTES);
+ pool_init_fixed (ct->frame_pool, VNET_CRYPTO_FRAME_POOL_SIZE);
vec_validate (cm->algs, VNET_CRYPTO_N_ALGS);
vec_validate (cm->async_algs, VNET_CRYPTO_N_ASYNC_ALGS);
diff --git a/src/vnet/crypto/crypto.h b/src/vnet/crypto/crypto.h
index e24ad1091f3..89cf70d19e3 100644
--- a/src/vnet/crypto/crypto.h
+++ b/src/vnet/crypto/crypto.h
@@ -33,11 +33,14 @@
_(AES_256_CTR, "aes-256-ctr", 32)
/* CRYPTO_ID, PRETTY_NAME, KEY_LENGTH_IN_BYTES */
-#define foreach_crypto_aead_alg \
- _(AES_128_GCM, "aes-128-gcm", 16) \
- _(AES_192_GCM, "aes-192-gcm", 24) \
- _(AES_256_GCM, "aes-256-gcm", 32) \
- _(CHACHA20_POLY1305, "chacha20-poly1305", 32)
+#define foreach_crypto_aead_alg \
+ _ (AES_128_GCM, "aes-128-gcm", 16) \
+ _ (AES_192_GCM, "aes-192-gcm", 24) \
+ _ (AES_256_GCM, "aes-256-gcm", 32) \
+ _ (AES_128_NULL_GMAC, "aes-128-null-gmac", 16) \
+ _ (AES_192_NULL_GMAC, "aes-192-null-gmac", 24) \
+ _ (AES_256_NULL_GMAC, "aes-256-null-gmac", 32) \
+ _ (CHACHA20_POLY1305, "chacha20-poly1305", 32)
#define foreach_crypto_hash_alg \
_ (SHA1, "sha-1") \
@@ -89,6 +92,12 @@ typedef enum
_ (AES_192_GCM, "aes-192-gcm-aad12", 24, 16, 12) \
_ (AES_256_GCM, "aes-256-gcm-aad8", 32, 16, 8) \
_ (AES_256_GCM, "aes-256-gcm-aad12", 32, 16, 12) \
+ _ (AES_128_NULL_GMAC, "aes-128-null-gmac-aad8", 16, 16, 8) \
+ _ (AES_128_NULL_GMAC, "aes-128-null-gmac-aad12", 16, 16, 12) \
+ _ (AES_192_NULL_GMAC, "aes-192-null-gmac-aad8", 24, 16, 8) \
+ _ (AES_192_NULL_GMAC, "aes-192-null-gmac-aad12", 24, 16, 12) \
+ _ (AES_256_NULL_GMAC, "aes-256-null-gmac-aad8", 32, 16, 8) \
+ _ (AES_256_NULL_GMAC, "aes-256-null-gmac-aad12", 32, 16, 12) \
_ (CHACHA20_POLY1305, "chacha20-poly1305-aad8", 32, 16, 8) \
_ (CHACHA20_POLY1305, "chacha20-poly1305-aad12", 32, 16, 12) \
_ (CHACHA20_POLY1305, "chacha20-poly1305", 32, 16, 0)
@@ -142,7 +151,6 @@ typedef enum
VNET_CRYPTO_OP_N_STATUS,
} vnet_crypto_op_status_t;
-/* *INDENT-OFF* */
typedef enum
{
VNET_CRYPTO_ALG_NONE = 0,
@@ -231,7 +239,6 @@ typedef enum
#undef _
VNET_CRYPTO_N_OP_IDS,
} vnet_crypto_op_id_t;
-/* *INDENT-ON* */
typedef enum
{
@@ -260,9 +267,8 @@ typedef struct
vnet_crypto_op_id_t op:16;
vnet_crypto_op_status_t status:8;
u8 flags;
-#define VNET_CRYPTO_OP_FLAG_INIT_IV (1 << 0)
-#define VNET_CRYPTO_OP_FLAG_HMAC_CHECK (1 << 1)
-#define VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS (1 << 2)
+#define VNET_CRYPTO_OP_FLAG_HMAC_CHECK (1 << 0)
+#define VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS (1 << 1)
union
{
@@ -338,7 +344,7 @@ typedef struct
i16 crypto_start_offset; /* first buffer offset */
i16 integ_start_offset;
/* adj total_length for integ, e.g.4 bytes for IPSec ESN */
- u16 integ_length_adj;
+ i16 integ_length_adj;
vnet_crypto_op_status_t status : 8;
u8 flags; /**< share same VNET_CRYPTO_OP_FLAG_* values */
} vnet_crypto_async_frame_elt_t;
@@ -468,12 +474,8 @@ typedef struct
uword *alg_index_by_name;
uword *async_alg_index_by_name;
vnet_crypto_async_alg_data_t *async_algs;
- u32 async_refcnt;
vnet_crypto_async_next_node_t *next_nodes;
u32 crypto_node_index;
-#define VNET_CRYPTO_ASYNC_DISPATCH_POLLING 0
-#define VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT 1
- u8 dispatch_mode;
} vnet_crypto_main_t;
extern vnet_crypto_main_t crypto_main;
@@ -484,7 +486,7 @@ u32 vnet_crypto_process_chained_ops (vlib_main_t * vm, vnet_crypto_op_t ops[],
u32 vnet_crypto_process_ops (vlib_main_t * vm, vnet_crypto_op_t ops[],
u32 n_ops);
-
+void vnet_crypto_set_async_dispatch (u8 mode, u8 adaptive);
int vnet_crypto_set_handler2 (char *ops_handler_name, char *engine,
crypto_op_class_type_t oct);
int vnet_crypto_is_set_handler (vnet_crypto_alg_t alg);
@@ -492,6 +494,7 @@ int vnet_crypto_is_set_handler (vnet_crypto_alg_t alg);
u32 vnet_crypto_key_add (vlib_main_t * vm, vnet_crypto_alg_t alg,
u8 * data, u16 length);
void vnet_crypto_key_del (vlib_main_t * vm, vnet_crypto_key_index_t index);
+void vnet_crypto_key_update (vlib_main_t *vm, vnet_crypto_key_index_t index);
/**
* Use 2 created keys to generate new key for linked algs (cipher + integ)
@@ -501,21 +504,13 @@ u32 vnet_crypto_key_add_linked (vlib_main_t * vm,
vnet_crypto_key_index_t index_crypto,
vnet_crypto_key_index_t index_integ);
-clib_error_t *crypto_dispatch_enable_disable (int is_enable);
-
int vnet_crypto_set_async_handler2 (char *alg_name, char *engine);
int vnet_crypto_is_set_async_handler (vnet_crypto_async_op_id_t opt);
-void vnet_crypto_request_async_mode (int is_enable);
-
-void vnet_crypto_set_async_dispatch_mode (u8 mode);
-
vnet_crypto_async_alg_t vnet_crypto_link_algs (vnet_crypto_alg_t crypto_alg,
vnet_crypto_alg_t integ_alg);
-clib_error_t *crypto_dispatch_enable_disable (int is_enable);
-
format_function_t format_vnet_crypto_alg;
format_function_t format_vnet_crypto_engine;
format_function_t format_vnet_crypto_op;
@@ -569,12 +564,16 @@ vnet_crypto_async_get_frame (vlib_main_t * vm, vnet_crypto_async_op_id_t opt)
vnet_crypto_thread_t *ct = cm->threads + vm->thread_index;
vnet_crypto_async_frame_t *f = NULL;
- pool_get_aligned (ct->frame_pool, f, CLIB_CACHE_LINE_BYTES);
- if (CLIB_DEBUG > 0)
- clib_memset (f, 0xfe, sizeof (*f));
- f->state = VNET_CRYPTO_FRAME_STATE_NOT_PROCESSED;
- f->op = opt;
- f->n_elts = 0;
+ if (PREDICT_TRUE (pool_free_elts (ct->frame_pool)))
+ {
+ pool_get_aligned (ct->frame_pool, f, CLIB_CACHE_LINE_BYTES);
+#if CLIB_DEBUG > 0
+ clib_memset (f, 0xfe, sizeof (*f));
+#endif
+ f->state = VNET_CRYPTO_FRAME_STATE_NOT_PROCESSED;
+ f->op = opt;
+ f->n_elts = 0;
+ }
return f;
}
@@ -594,7 +593,8 @@ vnet_crypto_async_submit_open_frame (vlib_main_t * vm,
{
vnet_crypto_main_t *cm = &crypto_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
- u32 i = vlib_num_workers () > 0;
+ u32 i;
+ vlib_node_t *n;
frame->state = VNET_CRYPTO_FRAME_STATE_PENDING;
frame->enqueue_thread_index = vm->thread_index;
@@ -609,9 +609,10 @@ vnet_crypto_async_submit_open_frame (vlib_main_t * vm,
if (PREDICT_TRUE (ret == 0))
{
- if (cm->dispatch_mode == VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT)
+ n = vlib_get_node (vm, cm->crypto_node_index);
+ if (n->state == VLIB_NODE_STATE_INTERRUPT)
{
- for (; i < tm->n_vlib_mains; i++)
+ for (i = 0; i < tm->n_vlib_mains; i++)
vlib_node_set_interrupt_pending (vlib_get_main_by_index (i),
cm->crypto_node_index);
}
@@ -628,7 +629,7 @@ static_always_inline void
vnet_crypto_async_add_to_frame (vlib_main_t *vm, vnet_crypto_async_frame_t *f,
u32 key_index, u32 crypto_len,
i16 integ_len_adj, i16 crypto_start_offset,
- u16 integ_start_offset, u32 buffer_index,
+ i16 integ_start_offset, u32 buffer_index,
u16 next_node, u8 *iv, u8 *tag, u8 *aad,
u8 flags)
{
diff --git a/src/vnet/crypto/crypto_api.c b/src/vnet/crypto/crypto_api.c
index 49b12a3d377..e701864a5ba 100644
--- a/src/vnet/crypto/crypto_api.c
+++ b/src/vnet/crypto/crypto_api.c
@@ -46,12 +46,24 @@ vl_api_crypto_set_async_dispatch_t_handler (vl_api_crypto_set_async_dispatch_t
vl_api_crypto_set_async_dispatch_reply_t *rmp;
int rv = 0;
- vnet_crypto_set_async_dispatch_mode ((u8) mp->mode);
+ vnet_crypto_set_async_dispatch ((u8) mp->mode, 0);
REPLY_MACRO (VL_API_CRYPTO_SET_ASYNC_DISPATCH_REPLY);
}
static void
+vl_api_crypto_set_async_dispatch_v2_t_handler (
+ vl_api_crypto_set_async_dispatch_v2_t *mp)
+{
+ vl_api_crypto_set_async_dispatch_v2_reply_t *rmp;
+ int rv = 0;
+
+ vnet_crypto_set_async_dispatch ((u8) mp->mode, mp->adaptive ? 1 : 0);
+
+ REPLY_MACRO (VL_API_CRYPTO_SET_ASYNC_DISPATCH_V2_REPLY);
+}
+
+static void
vl_api_crypto_set_handler_t_handler (vl_api_crypto_set_handler_t * mp)
{
vl_api_crypto_set_handler_reply_t *rmp;
diff --git a/src/vnet/crypto/node.c b/src/vnet/crypto/node.c
index 216b924f96e..ee7f344ce68 100644
--- a/src/vnet/crypto/node.c
+++ b/src/vnet/crypto/node.c
@@ -135,8 +135,11 @@ crypto_dequeue_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
vnet_crypto_async_free_frame (vm, cf);
}
/* signal enqueue-thread to dequeue the processed frame (n_elts>0) */
- if (cm->dispatch_mode == VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT
- && n_elts > 0)
+ if (n_elts > 0 &&
+ ((node->state == VLIB_NODE_STATE_POLLING &&
+ (node->flags &
+ VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE)) ||
+ node->state == VLIB_NODE_STATE_INTERRUPT))
{
vlib_node_set_interrupt_pending (
vlib_get_main_by_index (enqueue_thread_idx),
@@ -161,24 +164,32 @@ VLIB_NODE_FN (crypto_dispatch_node) (vlib_main_t * vm,
u32 n_dispatched = 0, n_cache = 0, index;
vec_foreach_index (index, cm->dequeue_handlers)
{
- if (PREDICT_FALSE (cm->dequeue_handlers[index] == 0))
- continue;
n_cache = crypto_dequeue_frame (
vm, node, ct, cm->dequeue_handlers[index], n_cache, &n_dispatched);
}
- /* *INDENT-ON* */
if (n_cache)
vlib_buffer_enqueue_to_next_vec (vm, node, &ct->buffer_indices, &ct->nexts,
n_cache);
+ /* if there are still pending tasks and node in interrupt mode,
+ sending current thread signal to dequeue next loop */
+ if (pool_elts (ct->frame_pool) > 0 &&
+ ((node->state == VLIB_NODE_STATE_POLLING &&
+ (node->flags &
+ VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE)) ||
+ node->state == VLIB_NODE_STATE_INTERRUPT))
+ {
+ vlib_node_set_interrupt_pending (vm, node->node_index);
+ }
+
return n_dispatched;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (crypto_dispatch_node) = {
.name = "crypto-dispatch",
.type = VLIB_NODE_TYPE_INPUT,
- .state = VLIB_NODE_STATE_DISABLED,
+ .flags = VLIB_NODE_FLAG_ADAPTIVE_MODE,
+ .state = VLIB_NODE_STATE_INTERRUPT,
.format_trace = format_crypto_dispatch_trace,
.n_errors = ARRAY_LEN(vnet_crypto_async_error_strings),
@@ -192,7 +203,6 @@ VLIB_REGISTER_NODE (crypto_dispatch_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/dev/api.c b/src/vnet/dev/api.c
new file mode 100644
index 00000000000..114b63d6662
--- /dev/null
+++ b/src/vnet/dev/api.c
@@ -0,0 +1,275 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/pool.h"
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+#include <vnet/dev/api.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "api",
+};
+
+static int
+_vnet_dev_queue_size_validate (u32 size, vnet_dev_queue_config_t c)
+{
+ if (size < c.min_size)
+ return 0;
+ if (size > c.max_size)
+ return 0;
+ if (c.size_is_power_of_two && count_set_bits (size) != 1)
+ return 0;
+ if (c.multiplier && size % c.multiplier)
+ return 0;
+
+ return 1;
+}
+
+vnet_dev_rv_t
+vnet_dev_api_attach (vlib_main_t *vm, vnet_dev_api_attach_args_t *args)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_t *dev = 0;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ vnet_dev_bus_t *bus;
+ vnet_dev_driver_t *driver;
+ void *bus_dev_info = 0;
+ u8 *dev_desc = 0;
+
+ log_debug (0, "%s driver %s flags '%U' args '%v'", args->device_id,
+ args->driver_name, format_vnet_dev_flags, &args->flags,
+ args->args);
+
+ if (vnet_dev_by_id (args->device_id))
+ return VNET_DEV_ERR_ALREADY_IN_USE;
+
+ bus = vnet_dev_find_device_bus (vm, args->device_id);
+ if (!bus)
+ {
+ log_err (dev, "unknown bus");
+ rv = VNET_DEV_ERR_INVALID_BUS;
+ goto done;
+ }
+
+ bus_dev_info = vnet_dev_get_device_info (vm, args->device_id);
+ if (!bus_dev_info)
+ {
+ log_err (dev, "invalid or unsupported device id");
+ rv = VNET_DEV_ERR_INVALID_DEVICE_ID;
+ goto done;
+ }
+
+ vec_foreach (driver, dm->drivers)
+ {
+ if (args->driver_name[0] &&
+ strcmp (args->driver_name, driver->registration->name))
+ continue;
+ if (driver->ops.probe &&
+ (dev_desc = driver->ops.probe (vm, bus->index, bus_dev_info)))
+ break;
+ }
+
+ if (!dev_desc)
+ {
+ log_err (dev, "driver not available for %s", args->device_id);
+ rv = VNET_DEV_ERR_DRIVER_NOT_AVAILABLE;
+ goto done;
+ }
+
+ dev = vnet_dev_alloc (vm, args->device_id, driver);
+ if (!dev)
+ {
+ log_err (dev, "dev alloc failed for %s", args->device_id);
+ rv = VNET_DEV_ERR_BUG;
+ goto done;
+ }
+ dev->description = dev_desc;
+
+ if (driver->registration->args)
+ for (vnet_dev_arg_t *a = driver->registration->args;
+ a->type != VNET_DEV_ARG_END; a++)
+ vec_add1 (dev->args, *a);
+
+ if (args->args)
+ {
+ if ((rv = vnet_dev_arg_parse (vm, dev, dev->args, args->args)) !=
+ VNET_DEV_OK)
+ goto done;
+ }
+
+ if ((args->flags.e & VNET_DEV_F_NO_STATS) == 0)
+ dev->poll_stats = 1;
+
+ log_debug (0, "found '%v'", dev->description);
+
+ rv = vnet_dev_process_call_op (vm, dev, vnet_dev_init);
+
+done:
+ if (bus_dev_info)
+ bus->ops.free_device_info (vm, bus_dev_info);
+
+ if (rv != VNET_DEV_OK && dev)
+ vnet_dev_process_call_op_no_rv (vm, dev, vnet_dev_free);
+ else if (dev)
+ args->dev_index = dev->index;
+
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_api_detach (vlib_main_t *vm, vnet_dev_api_detach_args_t *args)
+{
+ vnet_dev_t *dev = vnet_dev_by_index (args->dev_index);
+
+ log_debug (dev, "detach");
+
+ if (dev)
+ return vnet_dev_process_call_op_no_rv (vm, dev, vnet_dev_detach);
+
+ return VNET_DEV_ERR_NOT_FOUND;
+}
+
+vnet_dev_rv_t
+vnet_dev_api_reset (vlib_main_t *vm, vnet_dev_api_reset_args_t *args)
+{
+ vnet_dev_t *dev = vnet_dev_by_id (args->device_id);
+
+ log_debug (dev, "detach");
+
+ if (!dev)
+ return VNET_DEV_ERR_NOT_FOUND;
+
+ if (dev->ops.reset)
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+
+ return vnet_dev_process_call_op (vm, dev, vnet_dev_reset);
+}
+
+vnet_dev_rv_t
+vnet_dev_api_create_port_if (vlib_main_t *vm,
+ vnet_dev_api_create_port_if_args_t *args)
+{
+ vnet_dev_t *dev = vnet_dev_by_index (args->dev_index);
+ vnet_dev_port_t *port = 0;
+ u16 n_threads = vlib_get_n_threads ();
+ int default_is_intr_mode;
+ vnet_dev_rv_t rv;
+
+ log_debug (dev,
+ "create_port_if: dev_index %u port %u intf_name '%s' num_rx_q %u "
+ "num_tx_q %u rx_q_sz %u tx_q_sz %u, flags '%U' args '%v'",
+ args->dev_index, args->port_id, args->intf_name,
+ args->num_rx_queues, args->num_tx_queues, args->rx_queue_size,
+ args->tx_queue_size, format_vnet_dev_port_flags, &args->flags,
+ args->args);
+
+ if (dev == 0)
+ return VNET_DEV_ERR_NOT_FOUND;
+
+ foreach_vnet_dev_port (p, dev)
+ if (p->port_id == args->port_id)
+ {
+ port = p;
+ break;
+ }
+
+ if (!port)
+ return VNET_DEV_ERR_INVALID_DEVICE_ID;
+
+ if (port->interface_created)
+ return VNET_DEV_ERR_ALREADY_EXISTS;
+
+ if (args->args)
+ {
+ rv = vnet_dev_arg_parse (vm, dev, port->args, args->args);
+ if (rv != VNET_DEV_OK)
+ return rv;
+ }
+
+ default_is_intr_mode = (args->flags.e & VNET_DEV_PORT_F_INTERRUPT_MODE) != 0;
+ if (default_is_intr_mode && port->attr.caps.interrupt_mode == 0)
+ {
+ log_err (dev, "interrupt mode requested and port doesn't support it");
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+
+ if (args->num_rx_queues)
+ {
+ if (args->num_rx_queues > port->attr.max_rx_queues)
+ return VNET_DEV_ERR_INVALID_NUM_RX_QUEUES;
+ port->intf.num_rx_queues = args->num_rx_queues;
+ }
+ else
+ port->intf.num_rx_queues = clib_min (port->attr.max_tx_queues, 1);
+
+ if (args->num_tx_queues)
+ {
+ if (args->num_tx_queues > port->attr.max_tx_queues)
+ return VNET_DEV_ERR_INVALID_NUM_TX_QUEUES;
+ port->intf.num_tx_queues = args->num_tx_queues;
+ }
+ else
+ port->intf.num_tx_queues = clib_min (port->attr.max_tx_queues, n_threads);
+
+ if (args->rx_queue_size)
+ {
+ if (!_vnet_dev_queue_size_validate (args->rx_queue_size,
+ port->rx_queue_config))
+ return VNET_DEV_ERR_INVALID_RX_QUEUE_SIZE;
+ port->intf.rxq_sz = args->rx_queue_size;
+ }
+ else
+ port->intf.rxq_sz = port->rx_queue_config.default_size;
+
+ if (args->tx_queue_size)
+ {
+ if (!_vnet_dev_queue_size_validate (args->tx_queue_size,
+ port->tx_queue_config))
+ return VNET_DEV_ERR_INVALID_TX_QUEUE_SIZE;
+ port->intf.txq_sz = args->tx_queue_size;
+ }
+ else
+ port->intf.txq_sz = port->tx_queue_config.default_size;
+
+ clib_memcpy (port->intf.name, args->intf_name, sizeof (port->intf.name));
+ port->intf.default_is_intr_mode = default_is_intr_mode;
+
+ rv = vnet_dev_process_call_port_op (vm, port, vnet_dev_port_if_create);
+ args->sw_if_index = (rv == VNET_DEV_OK) ? port->intf.sw_if_index : ~0;
+
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_api_remove_port_if (vlib_main_t *vm,
+ vnet_dev_api_remove_port_if_args_t *args)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_sw_interface_t *si;
+ vnet_hw_interface_t *hi;
+ vnet_dev_port_t *port;
+
+ si = vnet_get_sw_interface_or_null (vnm, args->sw_if_index);
+ if (!si)
+ return VNET_DEV_ERR_UNKNOWN_INTERFACE;
+
+ hi = vnet_get_hw_interface_or_null (vnm, si->hw_if_index);
+ if (!hi)
+ return VNET_DEV_ERR_UNKNOWN_INTERFACE;
+
+ if (pool_is_free_index (dm->ports_by_dev_instance, hi->dev_instance))
+ return VNET_DEV_ERR_UNKNOWN_INTERFACE;
+
+ port = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+
+ if (port->intf.hw_if_index != si->hw_if_index)
+ return VNET_DEV_ERR_UNKNOWN_INTERFACE;
+
+ return vnet_dev_process_call_port_op (vm, port, vnet_dev_port_if_remove);
+}
diff --git a/src/vnet/dev/api.h b/src/vnet/dev/api.h
new file mode 100644
index 00000000000..1b7bf27d62a
--- /dev/null
+++ b/src/vnet/dev/api.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_API_H_
+#define _VNET_DEV_API_H_
+
+#include <vppinfra/clib.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/types.h>
+
+typedef struct
+{
+ vnet_dev_device_id_t device_id;
+ vnet_dev_driver_name_t driver_name;
+ vnet_dev_flags_t flags;
+ u8 *args;
+
+ /* return */
+ u32 dev_index;
+} vnet_dev_api_attach_args_t;
+
+vnet_dev_rv_t vnet_dev_api_attach (vlib_main_t *,
+ vnet_dev_api_attach_args_t *);
+
+typedef struct
+{
+ u32 dev_index;
+} vnet_dev_api_detach_args_t;
+vnet_dev_rv_t vnet_dev_api_detach (vlib_main_t *,
+ vnet_dev_api_detach_args_t *);
+
+typedef struct
+{
+ vnet_dev_device_id_t device_id;
+} vnet_dev_api_reset_args_t;
+vnet_dev_rv_t vnet_dev_api_reset (vlib_main_t *, vnet_dev_api_reset_args_t *);
+
+typedef struct
+{
+ u32 dev_index;
+ vnet_dev_if_name_t intf_name;
+ u16 num_rx_queues;
+ u16 num_tx_queues;
+ u16 rx_queue_size;
+ u16 tx_queue_size;
+ vnet_dev_port_id_t port_id;
+ vnet_dev_port_flags_t flags;
+ u8 *args;
+
+ /* return */
+ u32 sw_if_index;
+} vnet_dev_api_create_port_if_args_t;
+
+vnet_dev_rv_t
+vnet_dev_api_create_port_if (vlib_main_t *,
+ vnet_dev_api_create_port_if_args_t *);
+
+typedef struct
+{
+ u32 sw_if_index;
+} vnet_dev_api_remove_port_if_args_t;
+
+vnet_dev_rv_t
+vnet_dev_api_remove_port_if (vlib_main_t *,
+ vnet_dev_api_remove_port_if_args_t *);
+
+#endif /* _VNET_DEV_API_H_ */
diff --git a/src/vnet/dev/args.c b/src/vnet/dev/args.c
new file mode 100644
index 00000000000..e302517cc61
--- /dev/null
+++ b/src/vnet/dev/args.c
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/pool.h"
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+#include <vnet/dev/types.h>
+#include <vppinfra/format_table.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "args",
+};
+
+void
+vnet_dev_arg_clear_value (vnet_dev_arg_t *a)
+{
+ if (a->type == VNET_DEV_ARG_TYPE_STRING)
+ vec_free (a->val.string);
+ a->val = (typeof (a->val)){};
+ a->val_set = 0;
+}
+
+void
+vnet_dev_arg_free (vnet_dev_arg_t **vp)
+{
+ vnet_dev_arg_t *v;
+ vec_foreach (v, *vp)
+ vnet_dev_arg_clear_value (v);
+ vec_free (*vp);
+}
+
+vnet_dev_rv_t
+vnet_dev_arg_parse (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_arg_t *args,
+ u8 *str)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ unformat_input_t in;
+ u8 *name = 0;
+ u8 *err = 0;
+
+ log_debug (dev, "input '%v'", str);
+ if (args == 0)
+ return rv;
+
+ unformat_init_string (&in, (char *) str, vec_len (str));
+
+ while (unformat (&in, "%U=", unformat_token, "a-zA-Z0-9_", &name))
+ {
+ vnet_dev_arg_t *a = args;
+ vec_add1 (name, 0);
+ while (a < vec_end (args))
+ if (strcmp (a->name, (char *) name) == 0)
+ break;
+ else
+ a++;
+
+ if (a->type == VNET_DEV_ARG_TYPE_BOOL)
+ {
+
+ if (unformat (&in, "true") || unformat (&in, "1") ||
+ unformat (&in, "on") || unformat (&in, "yes"))
+ a->val.boolean = 1;
+ else if (unformat (&in, "false") || unformat (&in, "0") ||
+ unformat (&in, "off") || unformat (&in, "no"))
+ a->val.boolean = 0;
+ else
+ {
+ log_err (dev, "unable to parse args: %U", format_unformat_error,
+ &in);
+ err = format (
+ 0,
+ "boolean value expected ('yes', 'no', '0', '1', 'on', "
+ "'off', 'true' or 'false') for argument '%s', found '%U'",
+ a->name, format_unformat_error, &in);
+ goto done;
+ }
+ }
+ else if (a->type == VNET_DEV_ARG_TYPE_UINT32)
+ {
+ u32 val, min = 0, max = CLIB_U32_MAX;
+ if (!unformat (&in, "%u", &val))
+ {
+ err = format (0,
+ "unsigned integer in range %u - %u expected for "
+ "argument '%s', found '%U'",
+ min, max, a->name, format_unformat_error, &in);
+ goto done;
+ }
+
+ if (a->min || a->max)
+ {
+ min = a->min;
+ max = a->max;
+ }
+
+ if (val < min || val > max)
+ {
+ err = format (0,
+ "unsigned integer in range %u - %u expected for "
+ "argument '%s', found '%u'",
+ min, max, a->name, val);
+ goto done;
+ }
+ a->val.uint32 = val;
+ }
+ else if (a->type == VNET_DEV_ARG_TYPE_STRING)
+ {
+ if (!unformat (&in, "%U", unformat_double_quoted_string,
+ &a->val.string))
+ {
+ err = format (
+ 0,
+ "double quoted string expected for argument '%s', found '%U'",
+ a->name, format_unformat_error, &in);
+ goto done;
+ }
+
+ if (a->min && vec_len (a->val.string) < a->min)
+ {
+ err =
+ format (0, "string '%v' too short, must be at least %u chars",
+ a->val.string, a->min);
+ goto done;
+ }
+ if (a->max && vec_len (a->val.string) > a->max)
+ {
+ err = format (
+ 0, "string '%v' too long, must be no longer than %u chars",
+ a->val.string, a->max);
+ goto done;
+ }
+ }
+ else
+ {
+ err = format (0, "unknown argument '%s'", name);
+ goto done;
+ }
+
+ a->val_set = 1;
+ log_debug (dev, "name '%s' type %U value %U", name,
+ format_vnet_dev_arg_type, a->type, format_vnet_dev_arg_value,
+ a->type, &a->val);
+ vec_free (name);
+ unformat (&in, ",");
+ }
+
+ if (unformat_check_input (&in) != UNFORMAT_END_OF_INPUT)
+ err = format (0, "unable to parse argument name '%U'",
+ format_unformat_error, &in);
+
+done:
+ if (err)
+ {
+ vnet_dev_arg_t *a = 0;
+ log_err (dev, "%v", err);
+ vec_free (err);
+ vec_foreach (a, args)
+ vnet_dev_arg_clear_value (a);
+ rv = VNET_DEV_ERR_INVALID_ARG;
+ }
+
+ vec_free (name);
+ unformat_free (&in);
+ return rv;
+}
+
+u8 *
+format_vnet_dev_arg_type (u8 *s, va_list *args)
+{
+ vnet_dev_arg_type_t t = va_arg (*args, u32);
+ switch (t)
+ {
+#define _(n, f, val) \
+ case VNET_DEV_ARG_TYPE_##n: \
+ return format (s, #n);
+ foreach_vnet_dev_arg_type
+#undef _
+ default : ASSERT (0);
+ break;
+ }
+ return s;
+}
+
+u8 *
+format_vnet_dev_arg_value (u8 *s, va_list *args)
+{
+ vnet_dev_arg_type_t t = va_arg (*args, u32);
+ vnet_dev_arg_value_t *v = va_arg (*args, vnet_dev_arg_value_t *);
+
+ switch (t)
+ {
+#define _(n, f, value) \
+ case VNET_DEV_ARG_TYPE_##n: \
+ s = format (s, f, v->value); \
+ break;
+ foreach_vnet_dev_arg_type
+#undef _
+ default : break;
+ }
+ return s;
+}
+
+u8 *
+format_vnet_dev_args (u8 *s, va_list *va)
+{
+ vnet_dev_arg_t *a, *args = va_arg (*va, vnet_dev_arg_t *);
+ table_t t = { .no_ansi = 1 };
+
+ table_add_header_col (&t, 4, "Name", "Value", "Default", "Description");
+ table_set_cell_align (&t, -1, 0, TTAA_LEFT);
+ table_set_cell_align (&t, -1, 3, TTAA_LEFT);
+ vec_foreach (a, args)
+ {
+ int r = a - args;
+ table_format_cell (&t, r, 0, "%s", a->name);
+ if (a->val_set)
+ table_format_cell (&t, r, 1, "%U", format_vnet_dev_arg_value, a->type,
+ &a->val);
+ else
+ table_format_cell (&t, r, 1, "<not set>");
+
+ table_format_cell (&t, r, 2, "%U", format_vnet_dev_arg_value, a->type,
+ &a->default_val);
+ table_format_cell (&t, r, 3, "%s", a->desc);
+ table_set_cell_align (&t, r, 0, TTAA_LEFT);
+ table_set_cell_align (&t, r, 3, TTAA_LEFT);
+ }
+
+ s = format (s, "%U", format_table, &t);
+
+ table_free (&t);
+ return s;
+}
diff --git a/src/vnet/dev/args.h b/src/vnet/dev/args.h
new file mode 100644
index 00000000000..a256cfe8e0e
--- /dev/null
+++ b/src/vnet/dev/args.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_ARGS_H_
+#define _VNET_DEV_ARGS_H_
+
+#include <vppinfra/clib.h>
+#include <vnet/dev/errors.h>
+
+#define foreach_vnet_dev_arg_type \
+ _ (BOOL, "%u", boolean) \
+ _ (UINT32, "%u", uint32) \
+ _ (STRING, "\'%v\'", string)
+
+typedef enum
+{
+ VNET_DEV_ARG_END,
+#define _(n, f, v) VNET_DEV_ARG_TYPE_##n,
+ foreach_vnet_dev_arg_type
+#undef _
+} __clib_packed vnet_dev_arg_type_t;
+
+typedef union
+{
+ u8 boolean;
+ u32 uint32;
+ u8 *string;
+} vnet_dev_arg_value_t;
+
+typedef struct
+{
+ char *name;
+ char *desc;
+ vnet_dev_arg_type_t type;
+ u8 val_set;
+ u32 min;
+ u32 max;
+ u64 id;
+ vnet_dev_arg_value_t val;
+ vnet_dev_arg_value_t default_val;
+} vnet_dev_arg_t;
+
+#define VNET_DEV_ARG_BOOL(ud, n, d, ...) \
+ { \
+ .type = VNET_DEV_ARG_TYPE_BOOL, .id = ud, .name = n, .desc = d, \
+ __VA_ARGS__ \
+ }
+#define VNET_DEV_ARG_UINT32(ud, n, d, ...) \
+ { \
+ .type = VNET_DEV_ARG_TYPE_UINT32, .id = ud, .name = n, .desc = d, \
+ __VA_ARGS__ \
+ }
+#define VNET_DEV_ARG_STRING(ud, n, d, ...) \
+ { \
+ .type = VNET_DEV_ARG_TYPE_STRING, .id = ud, .name = n, .desc = d, \
+ __VA_ARGS__ \
+ }
+#define VNET_DEV_ARG_END() \
+ { \
+ .type = VNET_DEV_ARG_END \
+ }
+
+#define VNET_DEV_ARGS(...) \
+ (vnet_dev_arg_t[]) { __VA_ARGS__, VNET_DEV_ARG_END () }
+
+#define foreach_vnet_dev_args(a, d) \
+ for (typeof ((d)->args[0]) *(a) = (d)->args; (a) < vec_end ((d)->args); \
+ (a)++)
+#define foreach_vnet_dev_port_args(a, p) \
+ for (typeof ((p)->args[0]) *(a) = (p)->args; (a) < vec_end ((p)->args); \
+ (a)++)
+
+#endif /* _VNET_DEV_ARGS_H_ */
diff --git a/src/vnet/dev/cli.c b/src/vnet/dev/cli.c
new file mode 100644
index 00000000000..53be4483183
--- /dev/null
+++ b/src/vnet/dev/cli.c
@@ -0,0 +1,331 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/api.h>
+
+static clib_error_t *
+device_attach_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_api_attach_args_t a = {};
+ vnet_dev_rv_t rv;
+
+ if (!unformat_user (input, unformat_c_string_array, a.device_id,
+ sizeof (a.device_id)))
+ return clib_error_return (0, "please specify valid device id");
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (!a.driver_name[0] &&
+ unformat (input, "driver %U", unformat_c_string_array, a.driver_name,
+ sizeof (a.driver_name)))
+ ;
+ else if (!a.flags.n &&
+ unformat (input, "flags %U", unformat_vnet_dev_flags, &a.flags))
+ ;
+ else if (!a.args && unformat (input, "args %v", &a.args))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ rv = vnet_dev_api_attach (vm, &a);
+
+ vec_free (a.args);
+
+ if (rv != VNET_DEV_OK)
+ return clib_error_return (0, "unable to attach '%s': %U", a.device_id,
+ format_vnet_dev_rv, rv);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (device_attach_cmd, static) = {
+ .path = "device attach",
+ .short_help = "device attach <device-id> [driver <name>] "
+ "[args <dev-args>]",
+ .function = device_attach_cmd_fn,
+};
+
+static clib_error_t *
+device_detach_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_rv_t rv;
+ vnet_dev_device_id_t device_id = {};
+ vnet_dev_t *dev;
+
+ if (!unformat_user (input, unformat_c_string_array, device_id,
+ sizeof (device_id)))
+ return clib_error_return (0, "please specify valid device id");
+
+ dev = vnet_dev_by_id (device_id);
+
+ if (dev)
+ {
+ vnet_dev_api_detach_args_t a = { .dev_index = dev->index };
+ rv = vnet_dev_api_detach (vm, &a);
+ }
+ else
+ rv = VNET_DEV_ERR_UNKNOWN_DEVICE;
+
+ if (rv != VNET_DEV_OK)
+ return clib_error_return (0, "unable to detach '%s': %U", device_id,
+ format_vnet_dev_rv, rv);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (device_detach_cmd, static) = {
+ .path = "device detach",
+ .short_help = "device detach <device-id>",
+ .function = device_detach_cmd_fn,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+device_reset_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_api_reset_args_t a = {};
+ vnet_dev_rv_t rv;
+
+ if (!unformat_user (input, unformat_c_string_array, a.device_id,
+ sizeof (a.device_id)))
+ return clib_error_return (0, "please specify valid device id");
+
+ rv = vnet_dev_api_reset (vm, &a);
+
+ if (rv != VNET_DEV_OK)
+ return clib_error_return (0, "unable to reset '%s': %U", a.device_id,
+ format_vnet_dev_rv, rv);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (device_reset_cmd, static) = {
+ .path = "device reset",
+ .short_help = "device reset <device-id>",
+ .function = device_reset_cmd_fn,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+device_create_if_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_api_create_port_if_args_t a = {};
+ vnet_dev_rv_t rv;
+ vnet_dev_device_id_t device_id = {};
+ vnet_dev_t *dev = 0;
+ u32 n;
+
+ if (unformat_user (input, unformat_c_string_array, device_id,
+ sizeof (device_id)))
+ dev = vnet_dev_by_id (device_id);
+
+ if (!dev)
+ return clib_error_return (0, "please specify valid device id");
+
+ a.dev_index = dev->index;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (!a.intf_name[0] &&
+ unformat (input, "if-name %U", unformat_c_string_array, a.intf_name,
+ sizeof (a.intf_name)))
+ ;
+ else if (!a.port_id && unformat (input, "port %u", &n))
+ a.port_id = n;
+ else if (!a.flags.n && unformat (input, "flags %U",
+ unformat_vnet_dev_port_flags, &a.flags))
+ ;
+ else if (!a.num_rx_queues && unformat (input, "num-rx-queues %u", &n))
+ a.num_rx_queues = n;
+ else if (!a.num_tx_queues && unformat (input, "num-tx-queues %u", &n))
+ a.num_tx_queues = n;
+ else if (!a.rx_queue_size && unformat (input, "rx-queues-size %u", &n))
+ a.rx_queue_size = n;
+ else if (!a.tx_queue_size && unformat (input, "tx-queues-size %u", &n))
+ a.tx_queue_size = n;
+ else if (!a.intf_name[0] &&
+ unformat (input, "name %U", unformat_c_string_array,
+ &a.intf_name, sizeof (a.intf_name)))
+ ;
+ else if (!a.args && unformat (input, "args %v", &a.args))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ rv = vnet_dev_api_create_port_if (vm, &a);
+
+ vec_free (a.args);
+
+ if (rv != VNET_DEV_OK)
+ return clib_error_return (0, "unable to create_if '%s': %U", device_id,
+ format_vnet_dev_rv, rv);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (device_create_if_cmd, static) = {
+ .path = "device create-interface",
+ .short_help = "device create-interface <device-id> [port <port-id>] "
+ "[args <iface-args>]",
+ .function = device_create_if_cmd_fn,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+device_remove_if_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_api_remove_port_if_args_t a = { .sw_if_index = ~0 };
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_dev_rv_t rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_vnet_sw_interface, vnm,
+ &a.sw_if_index))
+ ;
+ else if (unformat (input, "sw-if-index %u", &a.sw_if_index))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (a.sw_if_index == ~0)
+ return clib_error_return (0, "please specify existing interface name");
+
+ rv = vnet_dev_api_remove_port_if (vm, &a);
+
+ if (rv != VNET_DEV_OK)
+ return clib_error_return (0, "unable to remove interface: %U",
+ format_vnet_dev_rv, rv);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (device_remove_if_cmd, static) = {
+ .path = "device remove-interface",
+ .short_help = "device remove-interface [<interface-name> | sw-if-index <n>]",
+ .function = device_remove_if_cmd_fn,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+show_devices_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_format_args_t fa = {}, *a = &fa;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "counters"))
+ fa.counters = 1;
+ else if (unformat (input, "all"))
+ fa.show_zero_counters = 1;
+ else if (unformat (input, "debug"))
+ fa.debug = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ pool_foreach_pointer (dev, dm->devices)
+ {
+ vlib_cli_output (vm, "device '%s':", dev->device_id);
+ vlib_cli_output (vm, " %U", format_vnet_dev_info, a, dev);
+ foreach_vnet_dev_port (p, dev)
+ {
+ vlib_cli_output (vm, " Port %u:", p->port_id);
+ vlib_cli_output (vm, " %U", format_vnet_dev_port_info, a, p);
+ if (fa.counters)
+ vlib_cli_output (vm, " %U", format_vnet_dev_counters, a,
+ p->counter_main);
+
+ foreach_vnet_dev_port_rx_queue (q, p)
+ {
+ vlib_cli_output (vm, " RX queue %u:", q->queue_id);
+ vlib_cli_output (vm, " %U", format_vnet_dev_rx_queue_info,
+ a, q);
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, p)
+ {
+ vlib_cli_output (vm, " TX queue %u:", q->queue_id);
+ vlib_cli_output (vm, " %U", format_vnet_dev_tx_queue_info,
+ a, q);
+ }
+ }
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_devices_cmd, static) = {
+ .path = "show device",
+ .short_help = "show device [counters]",
+ .function = show_devices_cmd_fn,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+show_device_counters_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_format_args_t fa = { .counters = 1 };
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "all"))
+ fa.show_zero_counters = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ pool_foreach_pointer (dev, dm->devices)
+ {
+ vlib_cli_output (vm, "device '%s':", dev->device_id);
+ foreach_vnet_dev_port (p, dev)
+ {
+ vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa,
+ p->counter_main);
+
+ foreach_vnet_dev_port_rx_queue (q, p)
+ if (q->counter_main)
+ {
+ vlib_cli_output (vm, " RX queue %u:", q->queue_id);
+ vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa,
+ q->counter_main);
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, p)
+ if (q->counter_main)
+ {
+ vlib_cli_output (vm, " TX queue %u:", q->queue_id);
+ vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa,
+ q->counter_main);
+ }
+ }
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_device_counters_cmd, static) = {
+ .path = "show device counters",
+ .short_help = "show device counters [all]",
+ .function = show_device_counters_cmd_fn,
+ .is_mp_safe = 1,
+};
diff --git a/src/vnet/dev/config.c b/src/vnet/dev/config.c
new file mode 100644
index 00000000000..8883e727ac2
--- /dev/null
+++ b/src/vnet/dev/config.c
@@ -0,0 +1,196 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/error.h"
+#include "vppinfra/pool.h"
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/api.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "config",
+};
+
+static clib_error_t *
+vnet_dev_config_one_interface (vlib_main_t *vm, unformat_input_t *input,
+ vnet_dev_api_create_port_if_args_t *args)
+{
+ clib_error_t *err = 0;
+
+ log_debug (0, "port %u %U", args->port_id, format_unformat_input, input);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ u32 n;
+
+ if (unformat (input, "name %U", unformat_c_string_array, args->intf_name,
+ sizeof (args->intf_name)))
+ ;
+ else if (unformat (input, "num-rx-queues %u", &n))
+ args->num_rx_queues = n;
+ else if (unformat (input, "num-tx-queues %u", &n))
+ args->num_tx_queues = n;
+ else if (unformat (input, "rx-queue-size %u", &n))
+ args->rx_queue_size = n;
+ else if (unformat (input, "tx-queue-size %u", &n))
+ args->tx_queue_size = n;
+ else if (unformat (input, "flags %U", unformat_vnet_dev_port_flags,
+ &args->flags))
+ ;
+ else if (unformat (input, "args %U", unformat_single_quoted_string,
+ &args->args))
+ ;
+ else
+ {
+ err = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ break;
+ }
+ }
+ return err;
+}
+static clib_error_t *
+vnet_dev_config_one_device (vlib_main_t *vm, unformat_input_t *input,
+ char *device_id)
+{
+ log_debug (0, "device %s %U", device_id, format_unformat_input, input);
+ clib_error_t *err = 0;
+ vnet_dev_api_attach_args_t args = {};
+ vnet_dev_api_create_port_if_args_t *if_args_vec = 0, *if_args;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ unformat_input_t sub_input;
+ u32 n;
+
+ if (unformat (input, "driver %U", unformat_c_string_array,
+ args.driver_name, sizeof (args.driver_name)))
+ ;
+ else if (unformat (input, "flags %U", unformat_vnet_dev_flags,
+ &args.flags))
+ ;
+ else if (unformat (input, "args %U", unformat_single_quoted_string,
+ &args.args))
+ ;
+ else if (unformat (input, "port %u %U", &n, unformat_vlib_cli_sub_input,
+ &sub_input))
+ {
+ vnet_dev_api_create_port_if_args_t *if_args;
+ vec_add2 (if_args_vec, if_args, 1);
+ if_args->port_id = n;
+ err = vnet_dev_config_one_interface (vm, &sub_input, if_args);
+ unformat_free (&sub_input);
+ if (err)
+ break;
+ }
+ else
+ {
+ err = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ break;
+ }
+ }
+
+ if (err == 0)
+ {
+ vnet_dev_rv_t rv;
+
+ clib_memcpy (args.device_id, device_id, sizeof (args.device_id));
+ rv = vnet_dev_api_attach (vm, &args);
+ vec_free (args.args);
+
+ if (rv == VNET_DEV_OK)
+ {
+ vec_foreach (if_args, if_args_vec)
+ {
+ if_args->dev_index = args.dev_index;
+ rv = vnet_dev_api_create_port_if (vm, if_args);
+ if (rv != VNET_DEV_OK)
+ break;
+ }
+ }
+
+ if (rv != VNET_DEV_OK)
+ err = clib_error_return (0, "error: %U for device '%s'",
+ format_vnet_dev_rv, rv, device_id);
+ }
+
+ vec_free (if_args_vec);
+ return err;
+}
+
+uword
+dev_config_process_node_fn (vlib_main_t *vm, vlib_node_runtime_t *rt,
+ vlib_frame_t *f)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ unformat_input_t input;
+ clib_error_t *err = 0;
+
+ if (dm->startup_config == 0)
+ return 0;
+
+ unformat_init_vector (&input, dm->startup_config);
+ dm->startup_config = 0;
+
+ while (!err && unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
+ {
+ unformat_input_t sub_input;
+ vnet_dev_device_id_t device_id;
+ if (unformat (&input, "dev %U %U", unformat_c_string_array, device_id,
+ sizeof (device_id), unformat_vlib_cli_sub_input,
+ &sub_input))
+ {
+ err = vnet_dev_config_one_device (vm, &sub_input, device_id);
+ unformat_free (&sub_input);
+ }
+ else if (unformat (&input, "dev %U", unformat_c_string_array, device_id,
+ sizeof (device_id)))
+ {
+ unformat_input_t no_input = {};
+ unformat_init_vector (&no_input, 0);
+ err = vnet_dev_config_one_device (vm, &no_input, device_id);
+ unformat_free (&no_input);
+ }
+ else
+ err = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, &input);
+ }
+
+ unformat_free (&input);
+
+ if (err)
+ {
+ log_err (0, "%U", format_clib_error, err);
+ clib_error_free (err);
+ }
+
+ vlib_node_set_state (vm, rt->node_index, VLIB_NODE_STATE_DISABLED);
+ vlib_node_rename (vm, rt->node_index, "deleted-%u", rt->node_index);
+ vec_add1 (dm->free_process_node_indices, rt->node_index);
+ return 0;
+}
+
+VLIB_REGISTER_NODE (dev_config_process_node) = {
+ .function = dev_config_process_node_fn,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "dev-config",
+};
+
+static clib_error_t *
+devices_config (vlib_main_t *vm, unformat_input_t *input)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ uword c;
+
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ vec_add1 (dm->startup_config, c);
+
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (devices_config, "devices");
diff --git a/src/vnet/dev/counters.c b/src/vnet/dev/counters.c
new file mode 100644
index 00000000000..0a1e0a7419d
--- /dev/null
+++ b/src/vnet/dev/counters.c
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+#include <vnet/interface/rx_queue_funcs.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "counters",
+};
+
+vnet_dev_counter_main_t *
+vnet_dev_counters_alloc (vlib_main_t *vm, vnet_dev_counter_t *counters,
+ u16 n_counters, char *fmt, ...)
+{
+ vnet_dev_counter_t *c;
+ vnet_dev_counter_main_t *cm;
+ u32 alloc_sz;
+
+ alloc_sz = sizeof (*cm) + n_counters * sizeof (*c);
+ cm = clib_mem_alloc_aligned (alloc_sz, CLIB_CACHE_LINE_BYTES);
+ clib_memset (cm, 0, sizeof (*cm));
+ cm->n_counters = n_counters;
+
+ if (fmt && strlen (fmt))
+ {
+ va_list va;
+ va_start (va, fmt);
+ cm->desc = va_format (0, fmt, &va);
+ va_end (va);
+ }
+
+ for (u32 i = 0; i < n_counters; i++)
+ {
+ cm->counters[i] = counters[i];
+ cm->counters[i].index = i;
+ }
+
+ vec_validate_aligned (cm->counter_data, n_counters - 1,
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (cm->counter_start, n_counters - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ return cm;
+}
+
+void
+vnet_dev_counters_clear (vlib_main_t *vm, vnet_dev_counter_main_t *cm)
+{
+ for (int i = 0; i < cm->n_counters; i++)
+ {
+ cm->counter_start[i] = cm->counter_data[i];
+ cm->counter_data[i] = 0;
+ }
+}
+
+void
+vnet_dev_counters_free (vlib_main_t *vm, vnet_dev_counter_main_t *cm)
+{
+ vec_free (cm->desc);
+ vec_free (cm->counter_data);
+ vec_free (cm->counter_start);
+ clib_mem_free (cm);
+}
+
+u8 *
+format_vnet_dev_counter_name (u8 *s, va_list *va)
+{
+ vnet_dev_counter_t *c = va_arg (*va, vnet_dev_counter_t *);
+
+ char *std_counters[] = {
+ [VNET_DEV_CTR_TYPE_RX_BYTES] = "total bytes received",
+ [VNET_DEV_CTR_TYPE_TX_BYTES] = "total bytes transmitted",
+ [VNET_DEV_CTR_TYPE_RX_PACKETS] = "total packets received",
+ [VNET_DEV_CTR_TYPE_TX_PACKETS] = "total packets transmitted",
+ [VNET_DEV_CTR_TYPE_RX_DROPS] = "total drops received",
+ [VNET_DEV_CTR_TYPE_TX_DROPS] = "total drops transmitted",
+ };
+
+ char *directions[] = {
+ [VNET_DEV_CTR_DIR_RX] = "received",
+ [VNET_DEV_CTR_DIR_TX] = "sent",
+ };
+ char *units[] = {
+ [VNET_DEV_CTR_UNIT_BYTES] = "bytes",
+ [VNET_DEV_CTR_UNIT_PACKETS] = "packets",
+ };
+
+ if (c->type == VNET_DEV_CTR_TYPE_VENDOR)
+ {
+ s = format (s, "%s", c->name);
+
+ if (c->unit < ARRAY_LEN (units) && units[c->unit])
+ s = format (s, " %s", units[c->unit]);
+
+ if (c->dir < ARRAY_LEN (directions) && directions[c->dir])
+ s = format (s, " %s", directions[c->dir]);
+ }
+ else if (c->type < ARRAY_LEN (std_counters) && std_counters[c->type])
+ s = format (s, "%s", std_counters[c->type]);
+ else
+ ASSERT (0);
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_counters (u8 *s, va_list *va)
+{
+ vnet_dev_format_args_t *a = va_arg (*va, vnet_dev_format_args_t *);
+ vnet_dev_counter_main_t *cm = va_arg (*va, vnet_dev_counter_main_t *);
+ u32 line = 0, indent = format_get_indent (s);
+
+ foreach_vnet_dev_counter (c, cm)
+ {
+ if (a->show_zero_counters == 0 && cm->counter_data[c->index] == 0)
+ continue;
+
+ if (line++)
+ s = format (s, "\n%U", format_white_space, indent);
+
+ s = format (s, "%-45U%lu", format_vnet_dev_counter_name, c,
+ cm->counter_data[c->index]);
+ }
+
+ return s;
+}
diff --git a/src/vnet/dev/counters.h b/src/vnet/dev/counters.h
new file mode 100644
index 00000000000..33d08ffbecd
--- /dev/null
+++ b/src/vnet/dev/counters.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_COUNTERS_H_
+#define _VNET_DEV_COUNTERS_H_
+
+#include <vnet/dev/dev.h>
+
+typedef enum
+{
+ VNET_DEV_CTR_DIR_NA,
+ VNET_DEV_CTR_DIR_RX,
+ VNET_DEV_CTR_DIR_TX,
+} __clib_packed vnet_dev_counter_direction_t;
+
+typedef enum
+{
+ VNET_DEV_CTR_TYPE_RX_BYTES,
+ VNET_DEV_CTR_TYPE_RX_PACKETS,
+ VNET_DEV_CTR_TYPE_RX_DROPS,
+ VNET_DEV_CTR_TYPE_TX_BYTES,
+ VNET_DEV_CTR_TYPE_TX_PACKETS,
+ VNET_DEV_CTR_TYPE_TX_DROPS,
+ VNET_DEV_CTR_TYPE_VENDOR,
+} __clib_packed vnet_dev_counter_type_t;
+
+typedef enum
+{
+ VNET_DEV_CTR_UNIT_NA,
+ VNET_DEV_CTR_UNIT_BYTES,
+ VNET_DEV_CTR_UNIT_PACKETS,
+} __clib_packed vnet_dev_counter_unit_t;
+
+typedef struct vnet_dev_counter
+{
+ char name[24];
+ uword user_data;
+ vnet_dev_counter_type_t type;
+ vnet_dev_counter_direction_t dir;
+ vnet_dev_counter_unit_t unit;
+ u16 index;
+} vnet_dev_counter_t;
+
+typedef struct vnet_dev_counter_main
+{
+ u8 *desc;
+ u64 *counter_data;
+ u64 *counter_start;
+ u16 n_counters;
+ vnet_dev_counter_t counters[];
+} vnet_dev_counter_main_t;
+
+#define VNET_DEV_CTR_RX_BYTES(p, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_RX_BYTES, .dir = VNET_DEV_CTR_DIR_RX, \
+ .unit = VNET_DEV_CTR_UNIT_BYTES, .user_data = (p), __VA_ARGS__ \
+ }
+#define VNET_DEV_CTR_TX_BYTES(p, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_TX_BYTES, .dir = VNET_DEV_CTR_DIR_TX, \
+ .unit = VNET_DEV_CTR_UNIT_BYTES, .user_data = (p), __VA_ARGS__ \
+ }
+#define VNET_DEV_CTR_RX_PACKETS(p, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_RX_PACKETS, .dir = VNET_DEV_CTR_DIR_RX, \
+ .unit = VNET_DEV_CTR_UNIT_PACKETS, .user_data = (p), __VA_ARGS__ \
+ }
+#define VNET_DEV_CTR_TX_PACKETS(p, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_TX_PACKETS, .dir = VNET_DEV_CTR_DIR_TX, \
+ .unit = VNET_DEV_CTR_UNIT_PACKETS, .user_data = (p), __VA_ARGS__ \
+ }
+#define VNET_DEV_CTR_RX_DROPS(p, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_RX_DROPS, .dir = VNET_DEV_CTR_DIR_RX, \
+ .unit = VNET_DEV_CTR_UNIT_PACKETS, .user_data = (p), __VA_ARGS__ \
+ }
+#define VNET_DEV_CTR_TX_DROPS(p, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_TX_DROPS, .dir = VNET_DEV_CTR_DIR_TX, \
+ .unit = VNET_DEV_CTR_UNIT_PACKETS, .user_data = (p), __VA_ARGS__ \
+ }
+#define VNET_DEV_CTR_VENDOR(p, d, u, n, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_VENDOR, .user_data = (p), .name = n, \
+ .dir = VNET_DEV_CTR_DIR_##d, .unit = VNET_DEV_CTR_UNIT_##u, __VA_ARGS__ \
+ }
+
+vnet_dev_counter_main_t *vnet_dev_counters_alloc (vlib_main_t *,
+ vnet_dev_counter_t *, u16,
+ char *, ...);
+void vnet_dev_counters_clear (vlib_main_t *, vnet_dev_counter_main_t *);
+void vnet_dev_counters_free (vlib_main_t *, vnet_dev_counter_main_t *);
+
+format_function_t format_vnet_dev_counters;
+format_function_t format_vnet_dev_counters_all;
+
+static_always_inline vnet_dev_counter_main_t *
+vnet_dev_counter_get_main (vnet_dev_counter_t *counter)
+{
+ return (vnet_dev_counter_main_t *) ((u8 *) (counter - counter->index) -
+ STRUCT_OFFSET_OF (
+ vnet_dev_counter_main_t, counters));
+}
+
+static_always_inline void
+vnet_dev_counter_value_add (vlib_main_t *vm, vnet_dev_counter_t *counter,
+ u64 val)
+{
+ vnet_dev_counter_main_t *cm = vnet_dev_counter_get_main (counter);
+ cm->counter_data[counter->index] += val;
+}
+
+static_always_inline void
+vnet_dev_counter_value_update (vlib_main_t *vm, vnet_dev_counter_t *counter,
+ u64 val)
+{
+ vnet_dev_counter_main_t *cm = vnet_dev_counter_get_main (counter);
+ cm->counter_data[counter->index] = val - cm->counter_start[counter->index];
+}
+
+#define foreach_vnet_dev_counter(c, cm) \
+ if (cm) \
+ for (typeof (*(cm)->counters) *(c) = (cm)->counters; \
+ (c) < (cm)->counters + (cm)->n_counters; (c)++)
+
+#endif /* _VNET_DEV_COUNTERS_H_ */
diff --git a/src/vnet/dev/dev.api b/src/vnet/dev/dev.api
new file mode 100644
index 00000000000..552b778949b
--- /dev/null
+++ b/src/vnet/dev/dev.api
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+option version = "0.0.1";
+
+enumflag dev_flags : u32
+{
+ VL_API_DEV_FLAG_NO_STATS = 0x1,
+};
+
+enumflag dev_port_flags : u32
+{
+ VL_API_DEV_PORT_FLAG_INTERRUPT_MODE = 0x1,
+};
+
+autoendian define dev_attach
+{
+ u32 client_index;
+ u32 context;
+ string device_id[48];
+ string driver_name[16];
+ vl_api_dev_flags_t flags;
+ string args[];
+};
+
+autoendian define dev_attach_reply
+{
+ u32 context;
+ u32 dev_index;
+ i32 retval;
+ string error_string[];
+};
+
+autoendian define dev_detach
+{
+ u32 client_index;
+ u32 context;
+ u32 dev_index;
+};
+
+autoendian define dev_detach_reply
+{
+ u32 context;
+ i32 retval;
+ string error_string[];
+};
+
+autoendian define dev_create_port_if
+{
+ u32 client_index;
+ u32 context;
+ u32 dev_index;
+ string intf_name[32];
+ u16 num_rx_queues;
+ u16 num_tx_queues;
+ u16 rx_queue_size;
+ u16 tx_queue_size;
+ u16 port_id;
+ vl_api_dev_port_flags_t flags;
+ string args[];
+};
+
+autoendian define dev_create_port_if_reply
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ i32 retval;
+ string error_string[];
+};
+
+autoendian define dev_remove_port_if
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+};
+
+autoendian define dev_remove_port_if_reply
+{
+ u32 context;
+ i32 retval;
+ string error_string[];
+};
+
diff --git a/src/vnet/dev/dev.c b/src/vnet/dev/dev.c
new file mode 100644
index 00000000000..e04fa161ce2
--- /dev/null
+++ b/src/vnet/dev/dev.c
@@ -0,0 +1,461 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/pool.h"
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/log.h>
+#include <vnet/dev/counters.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+};
+
+vnet_dev_main_t vnet_dev_main = { .next_rx_queue_thread = 1 };
+
+vnet_dev_bus_t *
+vnet_dev_find_device_bus (vlib_main_t *vm, vnet_dev_device_id_t id)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_bus_t *bus;
+
+ pool_foreach (bus, dm->buses)
+ {
+ int n = strlen (bus->registration->name);
+ int l = strlen (id);
+ int dl = strlen (VNET_DEV_DEVICE_ID_PREFIX_DELIMITER);
+
+ if (l <= n + dl)
+ continue;
+
+ if (strncmp (bus->registration->name, id, n))
+ continue;
+
+ if (strncmp (VNET_DEV_DEVICE_ID_PREFIX_DELIMITER, id + n, dl))
+ continue;
+
+ return bus;
+ }
+
+ return 0;
+}
+
+void *
+vnet_dev_get_device_info (vlib_main_t *vm, vnet_dev_device_id_t id)
+{
+ vnet_dev_bus_t *bus;
+
+ bus = vnet_dev_find_device_bus (vm, id);
+ if (bus == 0)
+ return 0;
+
+ return bus->ops.get_device_info (vm, id);
+}
+
+vnet_dev_t *
+vnet_dev_alloc (vlib_main_t *vm, vnet_dev_device_id_t id,
+ vnet_dev_driver_t *driver)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_t *dev = 0, **devp = 0;
+
+ dev = vnet_dev_alloc_with_data (sizeof (vnet_dev_t),
+ driver->registration->device_data_sz);
+
+ pool_get (dm->devices, devp);
+ devp[0] = dev;
+ dev->index = devp - dm->devices;
+ dev->driver_index = driver->index;
+ dev->ops = driver->registration->ops;
+ dev->bus_index = driver->bus_index;
+ clib_memcpy (dev->device_id, id, sizeof (dev->device_id));
+ hash_set (dm->device_index_by_id, dev->device_id, dev->index);
+
+ if ((vnet_dev_process_create (vm, dev)) == VNET_DEV_OK)
+ return dev;
+
+ vnet_dev_free (vm, dev);
+ return 0;
+}
+
+vnet_dev_rv_t
+vnet_dev_init (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_bus_t *bus = pool_elt_at_index (dm->buses, dev->bus_index);
+ vnet_dev_rv_t rv;
+
+ vnet_dev_validate (vm, dev);
+
+ if ((rv = bus->ops.device_open (vm, dev)) != VNET_DEV_OK)
+ return rv;
+
+ if (dev->ops.alloc)
+ {
+ rv = dev->ops.alloc (vm, dev);
+ if (rv != VNET_DEV_OK)
+ {
+ log_err (dev, "device init failed [rv %d]", rv);
+ if (dev->ops.deinit)
+ dev->ops.deinit (vm, dev);
+ if (dev->ops.free)
+ dev->ops.free (vm, dev);
+ return rv;
+ }
+ }
+
+ if ((rv = dev->ops.init (vm, dev)) != VNET_DEV_OK)
+ {
+ log_err (dev, "device init failed [rv %d]", rv);
+ if (dev->ops.deinit)
+ dev->ops.deinit (vm, dev);
+ if (dev->ops.free)
+ dev->ops.free (vm, dev);
+ return rv;
+ }
+
+ dev->initialized = 1;
+ dev->not_first_init = 1;
+ return VNET_DEV_OK;
+}
+
+void
+vnet_dev_deinit (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ASSERT (dev->initialized == 1);
+ vnet_dev_bus_t *bus;
+
+ vnet_dev_validate (vm, dev);
+
+ foreach_vnet_dev_port (p, dev)
+ ASSERT (p->interface_created == 0);
+
+ if (dev->ops.deinit)
+ dev->ops.deinit (vm, dev);
+
+ bus = vnet_dev_get_bus (dev);
+ if (bus->ops.device_close)
+ bus->ops.device_close (vm, dev);
+
+ vnet_dev_process_quit (vm, dev);
+
+ dev->initialized = 0;
+}
+
+void
+vnet_dev_free (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+
+ vnet_dev_validate (vm, dev);
+
+ ASSERT (dev->initialized == 0);
+
+ foreach_vnet_dev_port (p, dev)
+ vnet_dev_port_free (vm, p);
+
+ vec_free (dev->description);
+ pool_free (dev->ports);
+ pool_free (dev->periodic_ops);
+ hash_unset (dm->device_index_by_id, dev->device_id);
+ vnet_dev_arg_free (&dev->args);
+ pool_put_index (dm->devices, dev->index);
+}
+
+vnet_dev_rv_t
+vnet_dev_reset (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vnet_dev_rv_t rv;
+
+ ASSERT (dev->initialized == 1);
+ vnet_dev_validate (vm, dev);
+
+ if (dev->ops.reset == 0)
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+
+ if ((rv = dev->ops.reset (vm, dev)) != VNET_DEV_OK)
+ {
+ log_err (dev, "device reset failed [rv %d]", rv);
+ return rv;
+ }
+
+ return VNET_DEV_OK;
+}
+
+void
+vnet_dev_detach (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ foreach_vnet_dev_port (p, dev)
+ if (p->interface_created)
+ vnet_dev_port_if_remove (vm, p);
+ vnet_dev_deinit (vm, dev);
+ vnet_dev_free (vm, dev);
+}
+
+vnet_dev_rv_t
+vnet_dev_dma_mem_alloc (vlib_main_t *vm, vnet_dev_t *dev, u32 size, u32 align,
+ void **pp)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_bus_t *bus = pool_elt_at_index (dm->buses, dev->bus_index);
+ vnet_dev_rv_t rv;
+
+ vnet_dev_validate (vm, dev);
+
+ if (!bus->ops.dma_mem_alloc_fn)
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+
+ rv = bus->ops.dma_mem_alloc_fn (vm, dev, size, align, pp);
+ if (rv == VNET_DEV_OK)
+ log_debug (dev, "%u bytes va %p dma-addr 0x%lx numa %u align %u", size,
+ *pp, vnet_dev_get_dma_addr (vm, dev, *pp), dev->numa_node,
+ align);
+ return rv;
+}
+
+void
+vnet_dev_dma_mem_free (vlib_main_t *vm, vnet_dev_t *dev, void *p)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_bus_t *bus = pool_elt_at_index (dm->buses, dev->bus_index);
+
+ vnet_dev_validate (vm, dev);
+
+ if (p == 0 || !bus->ops.dma_mem_free_fn)
+ return;
+
+ return bus->ops.dma_mem_free_fn (vm, dev, p);
+}
+
+clib_error_t *
+vnet_dev_admin_up_down_fn (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
+{
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ u32 is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+
+ if (is_up && p->started == 0)
+ rv = vnet_dev_process_call_port_op (vm, p, vnet_dev_port_start);
+ else if (!is_up && p->started)
+ rv = vnet_dev_process_call_port_op_no_rv (vm, p, vnet_dev_port_stop);
+
+ if (rv != VNET_DEV_OK)
+ return clib_error_return (0, "failed to change port admin state: %U",
+ format_vnet_dev_rv, rv);
+
+ return 0;
+}
+
+static void
+vnet_dev_feature_update_cb (u32 sw_if_index, u8 arc_index, u8 is_enable,
+ void *cb)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_feature_main_t *fm = &feature_main;
+ vnet_feature_config_main_t *cm;
+ vnet_dev_main_t *vdm = &vnet_dev_main;
+ vnet_dev_port_t *port;
+ vnet_hw_interface_t *hw;
+ u32 current_config_index = ~0;
+ u32 next_index = ~0;
+ int update_runtime = 0;
+
+ if (arc_index != vdm->eth_port_rx_feature_arc_index)
+ return;
+
+ hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ port = vnet_dev_get_port_from_dev_instance (hw->dev_instance);
+
+ if (port == 0 || port->intf.sw_if_index != sw_if_index)
+ return;
+
+ if (vnet_have_features (arc_index, sw_if_index))
+ {
+ cm = &fm->feature_config_mains[arc_index];
+ current_config_index =
+ vec_elt (cm->config_index_by_sw_if_index, sw_if_index);
+ vnet_get_config_data (&cm->config_main, &current_config_index,
+ &next_index, 0);
+ if (port->intf.feature_arc == 0 ||
+ port->intf.rx_next_index != next_index ||
+ port->intf.current_config_index != current_config_index)
+ {
+ port->intf.current_config_index = current_config_index;
+ port->intf.rx_next_index = next_index;
+ port->intf.feature_arc_index = arc_index;
+ port->intf.feature_arc = 1;
+ update_runtime = 1;
+ }
+ }
+ else
+ {
+ if (port->intf.feature_arc)
+ {
+ port->intf.current_config_index = 0;
+ port->intf.rx_next_index =
+ port->intf.redirect_to_node ?
+ port->intf.redirect_to_node_next_index :
+ vnet_dev_default_next_index_by_port_type[port->attr.type];
+ port->intf.feature_arc_index = 0;
+ port->intf.feature_arc = 0;
+ update_runtime = 1;
+ }
+ }
+
+ if (update_runtime)
+ {
+ foreach_vnet_dev_port_rx_queue (rxq, port)
+ vnet_dev_rx_queue_rt_request (
+ vm, rxq,
+ (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1,
+ .update_feature_arc = 1 });
+ log_debug (port->dev, "runtime update requested due to chgange in "
+ "feature arc configuration");
+ }
+}
+
+static int
+sort_driver_registrations (void *a0, void *a1)
+{
+ vnet_dev_driver_registration_t **r0 = a0;
+ vnet_dev_driver_registration_t **r1 = a1;
+
+ if (r0[0]->priority > r1[0]->priority)
+ return -1;
+ else if (r0[0]->priority < r1[0]->priority)
+ return 1;
+
+ return 0;
+}
+
+static clib_error_t *
+vnet_dev_main_init (vlib_main_t *vm)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_driver_registration_t **drv = 0;
+ u32 temp_space_sz = 0;
+
+ dm->device_index_by_id = hash_create_string (0, sizeof (uword));
+
+ for (vnet_dev_bus_registration_t *r = dm->bus_registrations; r;
+ r = r->next_registration)
+ {
+ vnet_dev_bus_t *bus;
+ pool_get_zero (dm->buses, bus);
+ bus->registration = r;
+ bus->index = bus - dm->buses;
+ bus->ops = r->ops;
+ if (!r->device_data_size ||
+ r->device_data_size > STRUCT_SIZE_OF (vnet_dev_t, bus_data))
+ return clib_error_return (
+ 0, "bus device data for bus '%s' is too big not specified", r->name);
+
+ log_debug (0, "bus '%s' registered", r->name);
+ }
+
+ for (vnet_dev_driver_registration_t *r = dm->driver_registrations; r;
+ r = r->next_registration)
+ vec_add1 (drv, r);
+
+ vec_sort_with_function (drv, sort_driver_registrations);
+
+ vec_foreach_pointer (r, drv)
+ {
+ vnet_dev_driver_t *driver;
+ vnet_dev_bus_t *bus;
+ vnet_device_class_t *dev_class;
+ int bus_index = -1;
+
+ pool_foreach (bus, dm->buses)
+ {
+ if (strcmp (bus->registration->name, r->bus) == 0)
+ {
+ bus_index = bus->index;
+ break;
+ }
+ }
+
+ if (bus_index < 0)
+ return clib_error_return (0, "unknown bus '%s'", r->bus);
+
+ pool_get_zero (dm->drivers, driver);
+ driver->registration = r;
+ driver->index = driver - dm->drivers;
+ driver->bus_index = bus_index;
+ driver->ops = r->ops;
+ dev_class = clib_mem_alloc (sizeof (vnet_device_class_t));
+ *dev_class = (vnet_device_class_t){
+ .name = r->name,
+ .format_device_name = format_vnet_dev_interface_name,
+ .format_device = format_vnet_dev_interface_info,
+ .admin_up_down_function = vnet_dev_admin_up_down_fn,
+ .rx_redirect_to_node = vnet_dev_set_interface_next_node,
+ .clear_counters = vnet_dev_clear_hw_interface_counters,
+ .mac_addr_change_function = vnet_dev_port_mac_change,
+ .mac_addr_add_del_function = vnet_dev_add_del_mac_address,
+ .flow_ops_function = vnet_dev_flow_ops_fn,
+ .format_flow = format_vnet_dev_flow,
+ .set_rss_queues_function = vnet_dev_interface_set_rss_queues,
+ };
+ driver->dev_class_index = vnet_register_device_class (vm, dev_class);
+ log_debug (0, "driver '%s' registered on bus '%s'", r->name,
+ bus->registration->name);
+
+ if (temp_space_sz < r->runtime_temp_space_sz)
+ temp_space_sz = r->runtime_temp_space_sz;
+ }
+
+ if (dm->startup_config)
+ log_debug (0, "startup config: %v", dm->startup_config);
+
+ vec_free (drv);
+
+ if (temp_space_sz > 0)
+ {
+ const u32 align = CLIB_CACHE_LINE_BYTES;
+ u32 sz = round_pow2 (temp_space_sz, align);
+ dm->log2_runtime_temp_space_sz =
+ get_lowest_set_bit_index (max_pow2 (sz));
+ sz = 1 << dm->log2_runtime_temp_space_sz;
+ sz *= vlib_get_n_threads ();
+ dm->runtime_temp_spaces = clib_mem_alloc_aligned (sz, align);
+ clib_memset (dm->runtime_temp_spaces, 0, sz);
+ log_debug (0,
+ "requested %u bytes for runtime temp storage, allocated %u "
+ "per thread (total %u)",
+ temp_space_sz, 1 << dm->log2_runtime_temp_space_sz, sz);
+ }
+
+ vnet_feature_register (vnet_dev_feature_update_cb, 0);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (vnet_dev_main_init);
+
+clib_error_t *
+vnet_dev_num_workers_change (vlib_main_t *vm)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+
+ if (dm->log2_runtime_temp_space_sz > 0)
+ {
+ const u32 align = CLIB_CACHE_LINE_BYTES;
+ uword sz =
+ (1ULL << dm->log2_runtime_temp_space_sz) * vlib_get_n_threads ();
+ if (dm->runtime_temp_spaces)
+ clib_mem_free (dm->runtime_temp_spaces);
+ dm->runtime_temp_spaces = clib_mem_alloc_aligned (sz, align);
+ clib_memset (dm->runtime_temp_spaces, 0, sz);
+ log_debug (0, "runtime temp storage resized to %u", sz);
+ }
+
+ return 0;
+}
+
+VLIB_NUM_WORKERS_CHANGE_FN (vnet_dev_num_workers_change);
diff --git a/src/vnet/dev/dev.h b/src/vnet/dev/dev.h
new file mode 100644
index 00000000000..bbf2f9dff21
--- /dev/null
+++ b/src/vnet/dev/dev.h
@@ -0,0 +1,753 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_H_
+#define _VNET_DEV_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/format.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/types.h>
+#include <vnet/dev/args.h>
+
+#define VNET_DEV_DEVICE_ID_PREFIX_DELIMITER "/"
+
+#define foreach_vnet_dev_port_type \
+ _ (0, UNKNOWN) \
+ _ (1, ETHERNET)
+
+typedef enum
+{
+#define _(b, n) VNET_DEV_PORT_TYPE_##n = (1U << (b)),
+ foreach_vnet_dev_port_type
+#undef _
+} vnet_dev_port_type_t;
+
+#define foreach_vnet_dev_port_caps \
+ _ (interrupt_mode) \
+ _ (rss) \
+ _ (change_max_rx_frame_size) \
+ _ (mac_filter)
+
+#define foreach_vnet_dev_port_rx_offloads _ (ip4_cksum)
+
+#define foreach_vnet_dev_port_tx_offloads \
+ _ (ip4_cksum) \
+ _ (tcp_gso) \
+ _ (udp_gso)
+
+typedef union
+{
+ struct
+ {
+#define _(n) u8 n : 1;
+ foreach_vnet_dev_port_caps
+#undef _
+ };
+ u8 as_number;
+} vnet_dev_port_caps_t;
+
+typedef union
+{
+ struct
+ {
+#define _(n) u8 n : 1;
+ foreach_vnet_dev_port_rx_offloads
+#undef _
+ };
+ u8 as_number;
+} vnet_dev_port_rx_offloads_t;
+
+typedef union
+{
+ struct
+ {
+#define _(n) u8 n : 1;
+ foreach_vnet_dev_port_tx_offloads
+#undef _
+ };
+ u8 as_number;
+} vnet_dev_port_tx_offloads_t;
+
+typedef union
+{
+ u8 eth_mac[6];
+ u8 raw[8];
+} vnet_dev_hw_addr_t;
+
+typedef struct vnet_dev_bus_registration vnet_dev_bus_registration_t;
+typedef struct vnet_dev_driver_registration vnet_dev_driver_registration_t;
+
+typedef struct vnet_dev vnet_dev_t;
+typedef struct vnet_dev_port vnet_dev_port_t;
+typedef struct vnet_dev_rx_queue vnet_dev_rx_queue_t;
+typedef struct vnet_dev_tx_queue vnet_dev_tx_queue_t;
+typedef struct vnet_dev_bus_registration vnet_dev_bus_registration_t;
+typedef struct vnet_dev_driver_registration vnet_dev_driver_registration_t;
+typedef struct vnet_dev_counter vnet_dev_counter_t;
+typedef struct vnet_dev_counter_main vnet_dev_counter_main_t;
+typedef struct vnet_dev_port_cfg_change_req vnet_dev_port_cfg_change_req_t;
+
+typedef vnet_dev_rv_t (vnet_dev_op_t) (vlib_main_t *, vnet_dev_t *);
+typedef vnet_dev_rv_t (vnet_dev_port_op_t) (vlib_main_t *, vnet_dev_port_t *);
+typedef vnet_dev_rv_t (vnet_dev_port_cfg_change_op_t) (
+ vlib_main_t *, vnet_dev_port_t *, vnet_dev_port_cfg_change_req_t *);
+typedef vnet_dev_rv_t (vnet_dev_rx_queue_op_t) (vlib_main_t *,
+ vnet_dev_rx_queue_t *);
+typedef vnet_dev_rv_t (vnet_dev_tx_queue_op_t) (vlib_main_t *,
+ vnet_dev_tx_queue_t *);
+typedef void (vnet_dev_op_no_rv_t) (vlib_main_t *, vnet_dev_t *);
+typedef void (vnet_dev_port_op_no_rv_t) (vlib_main_t *, vnet_dev_port_t *);
+typedef void (vnet_dev_rx_queue_op_no_rv_t) (vlib_main_t *,
+ vnet_dev_rx_queue_t *);
+typedef void (vnet_dev_tx_queue_op_no_rv_t) (vlib_main_t *,
+ vnet_dev_tx_queue_t *);
+
+typedef u16 vnet_dev_queue_id_t;
+typedef u16 vnet_dev_bus_index_t;
+typedef u16 vnet_dev_driver_index_t;
+
+typedef struct
+{
+ vnet_dev_rx_queue_op_t *alloc;
+ vnet_dev_rx_queue_op_t *start;
+ vnet_dev_rx_queue_op_no_rv_t *stop;
+ vnet_dev_rx_queue_op_no_rv_t *free;
+ format_function_t *format_info;
+} vnet_dev_rx_queue_ops_t;
+
+typedef struct
+{
+ vnet_dev_tx_queue_op_t *alloc;
+ vnet_dev_tx_queue_op_t *start;
+ vnet_dev_tx_queue_op_no_rv_t *stop;
+ vnet_dev_tx_queue_op_no_rv_t *free;
+ format_function_t *format_info;
+} vnet_dev_tx_queue_ops_t;
+
+typedef struct
+{
+ u16 data_size;
+ u16 min_size;
+ u16 max_size;
+ u16 default_size;
+ u8 multiplier;
+ u8 size_is_power_of_two : 1;
+} vnet_dev_queue_config_t;
+
+#define foreach_vnet_dev_port_cfg_type \
+ _ (PROMISC_MODE) \
+ _ (MAX_RX_FRAME_SIZE) \
+ _ (CHANGE_PRIMARY_HW_ADDR) \
+ _ (ADD_SECONDARY_HW_ADDR) \
+ _ (REMOVE_SECONDARY_HW_ADDR) \
+ _ (RXQ_INTR_MODE_ENABLE) \
+ _ (RXQ_INTR_MODE_DISABLE) \
+ _ (ADD_RX_FLOW) \
+ _ (DEL_RX_FLOW) \
+ _ (GET_RX_FLOW_COUNTER) \
+ _ (RESET_RX_FLOW_COUNTER)
+
+typedef enum
+{
+ VNET_DEV_PORT_CFG_UNKNOWN,
+#define _(n) VNET_DEV_PORT_CFG_##n,
+ foreach_vnet_dev_port_cfg_type
+#undef _
+} __clib_packed vnet_dev_port_cfg_type_t;
+
+typedef struct vnet_dev_port_cfg_change_req
+{
+ vnet_dev_port_cfg_type_t type;
+ u8 validated : 1;
+ u8 all_queues : 1;
+
+ union
+ {
+ u8 promisc : 1;
+ vnet_dev_hw_addr_t addr;
+ u16 max_rx_frame_size;
+ vnet_dev_queue_id_t queue_id;
+ struct
+ {
+ u32 flow_index;
+ uword *private_data;
+ };
+ };
+
+} vnet_dev_port_cfg_change_req_t;
+
+typedef struct
+{
+ vnet_dev_hw_addr_t hw_addr;
+ u16 max_rx_queues;
+ u16 max_tx_queues;
+ u16 max_supported_rx_frame_size;
+ vnet_dev_port_type_t type;
+ vnet_dev_port_caps_t caps;
+ vnet_dev_port_rx_offloads_t rx_offloads;
+ vnet_dev_port_tx_offloads_t tx_offloads;
+} vnet_dev_port_attr_t;
+
+typedef enum
+{
+ VNET_DEV_PERIODIC_OP_TYPE_DEV = 1,
+ VNET_DEV_PERIODIC_OP_TYPE_PORT = 2,
+} __clib_packed vnet_dev_periodic_op_type_t;
+
+typedef struct
+{
+ f64 interval;
+ f64 last_run;
+ vnet_dev_periodic_op_type_t type;
+ union
+ {
+ vnet_dev_t *dev;
+ vnet_dev_port_t *port;
+ void *arg;
+ };
+ union
+ {
+ vnet_dev_op_no_rv_t *dev_op;
+ vnet_dev_port_op_no_rv_t *port_op;
+ void *op;
+ };
+} vnet_dev_periodic_op_t;
+
+typedef struct
+{
+ struct _vlib_node_fn_registration *registrations;
+ format_function_t *format_trace;
+ vlib_error_desc_t *error_counters;
+ u16 n_error_counters;
+} vnet_dev_node_t;
+
+typedef struct
+{
+ vnet_dev_op_t *alloc;
+ vnet_dev_op_t *init;
+ vnet_dev_op_no_rv_t *deinit;
+ vnet_dev_op_t *reset;
+ vnet_dev_op_no_rv_t *free;
+ u8 *(*probe) (vlib_main_t *, vnet_dev_bus_index_t, void *);
+ format_function_t *format_info;
+} vnet_dev_ops_t;
+
+typedef struct
+{
+ vnet_dev_port_op_t *alloc;
+ vnet_dev_port_op_t *init;
+ vnet_dev_port_cfg_change_op_t *config_change;
+ vnet_dev_port_cfg_change_op_t *config_change_validate;
+ vnet_dev_port_op_t *start;
+ vnet_dev_port_op_no_rv_t *stop;
+ vnet_dev_port_op_no_rv_t *deinit;
+ vnet_dev_port_op_no_rv_t *free;
+ format_function_t *format_status;
+ format_function_t *format_flow;
+} vnet_dev_port_ops_t;
+
+typedef union
+{
+ struct
+ {
+ u8 update_next_index : 1;
+ u8 update_feature_arc : 1;
+ u8 suspend_off : 1;
+ u8 suspend_on : 1;
+ };
+ u8 as_number;
+} vnet_dev_rx_queue_rt_req_t;
+
+typedef struct vnet_dev_rx_queue
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ vnet_dev_port_t *port;
+ u16 rx_thread_index;
+ u16 index;
+ vnet_dev_counter_main_t *counter_main;
+ CLIB_CACHE_LINE_ALIGN_MARK (runtime0);
+ vnet_dev_rx_queue_t *next_on_thread;
+ u8 interrupt_mode : 1;
+ u8 enabled : 1;
+ u8 started : 1;
+ u8 suspended : 1;
+ vnet_dev_queue_id_t queue_id;
+ u16 size;
+ u16 next_index;
+ vnet_dev_rx_queue_rt_req_t runtime_request;
+ CLIB_CACHE_LINE_ALIGN_MARK (runtime1);
+ vlib_buffer_template_t buffer_template;
+ CLIB_CACHE_LINE_ALIGN_MARK (driver_data);
+ u8 data[];
+} vnet_dev_rx_queue_t;
+
+STATIC_ASSERT_SIZEOF (vnet_dev_rx_queue_t, 3 * CLIB_CACHE_LINE_BYTES);
+
+typedef struct vnet_dev_tx_queue
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ vnet_dev_port_t *port;
+ clib_bitmap_t *assigned_threads;
+ u16 index;
+ vnet_dev_counter_main_t *counter_main;
+ CLIB_CACHE_LINE_ALIGN_MARK (runtime0);
+ vnet_dev_queue_id_t queue_id;
+ u8 started : 1;
+ u8 enabled : 1;
+ u8 lock_needed : 1;
+ u8 lock;
+ u16 size;
+ CLIB_ALIGN_MARK (private_data, 16);
+ u8 data[];
+} vnet_dev_tx_queue_t;
+
+STATIC_ASSERT_SIZEOF (vnet_dev_tx_queue_t, 2 * CLIB_CACHE_LINE_BYTES);
+
+typedef struct vnet_dev_port
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ vnet_dev_t *dev;
+ vnet_dev_port_id_t port_id;
+ vnet_dev_driver_index_t driver_index;
+ u8 initialized : 1;
+ u8 started : 1;
+ u8 link_up : 1;
+ u8 promisc : 1;
+ u8 interface_created : 1;
+ u8 rx_node_assigned : 1;
+ vnet_dev_counter_main_t *counter_main;
+ vnet_dev_queue_config_t rx_queue_config;
+ vnet_dev_queue_config_t tx_queue_config;
+ vnet_dev_port_attr_t attr;
+ u32 max_rx_frame_size;
+ vnet_dev_hw_addr_t primary_hw_addr;
+ vnet_dev_hw_addr_t *secondary_hw_addr;
+ u32 index;
+ u32 speed;
+ vnet_dev_rx_queue_t **rx_queues;
+ vnet_dev_tx_queue_t **tx_queues;
+ vnet_dev_port_ops_t port_ops;
+ vnet_dev_arg_t *args;
+ vnet_dev_rx_queue_ops_t rx_queue_ops;
+ vnet_dev_tx_queue_ops_t tx_queue_ops;
+ vnet_dev_node_t rx_node;
+ vnet_dev_node_t tx_node;
+
+ struct
+ {
+ vnet_dev_if_name_t name;
+ u32 dev_instance;
+ u32 rx_node_index;
+ u32 current_config_index;
+ u16 rx_next_index;
+ u16 redirect_to_node_next_index;
+ u8 feature_arc_index;
+ u8 feature_arc : 1;
+ u8 redirect_to_node : 1;
+ u8 default_is_intr_mode : 1;
+ u32 tx_node_index;
+ u32 hw_if_index;
+ u32 sw_if_index;
+ u16 num_rx_queues;
+ u16 num_tx_queues;
+ u16 txq_sz;
+ u16 rxq_sz;
+ } intf;
+
+ CLIB_CACHE_LINE_ALIGN_MARK (data0);
+ u8 data[];
+} vnet_dev_port_t;
+
+typedef struct vnet_dev
+{
+ vnet_dev_device_id_t device_id;
+ u16 initialized : 1;
+ u16 not_first_init : 1;
+ u16 va_dma : 1;
+ u16 process_node_quit : 1;
+ u16 process_node_periodic : 1;
+ u16 poll_stats : 1;
+ u16 bus_index;
+ u8 numa_node;
+ u16 max_rx_queues;
+ u16 max_tx_queues;
+ vnet_dev_driver_index_t driver_index;
+ u32 index;
+ u32 process_node_index;
+ u8 bus_data[32] __clib_aligned (16);
+ vnet_dev_ops_t ops;
+ vnet_dev_port_t **ports;
+ vnet_dev_periodic_op_t *periodic_ops;
+ u8 *description;
+ vnet_dev_arg_t *args;
+ u8 __clib_aligned (16)
+ data[];
+} vnet_dev_t;
+
+typedef struct
+{
+ u16 vendor_id, device_id;
+ char *description;
+} vnet_dev_match_t;
+
+#define VNET_DEV_MATCH(...) \
+ (vnet_dev_match_t[]) \
+ { \
+ __VA_ARGS__, {} \
+ }
+
+typedef struct
+{
+ vnet_dev_op_t *device_open;
+ vnet_dev_op_no_rv_t *device_close;
+ vnet_dev_rv_t (*dma_mem_alloc_fn) (vlib_main_t *, vnet_dev_t *, u32, u32,
+ void **);
+ void (*dma_mem_free_fn) (vlib_main_t *, vnet_dev_t *, void *);
+ void *(*get_device_info) (vlib_main_t *, char *);
+ void (*free_device_info) (vlib_main_t *, void *);
+ format_function_t *format_device_info;
+ format_function_t *format_device_addr;
+} vnet_dev_bus_ops_t;
+
+struct vnet_dev_bus_registration
+{
+ vnet_dev_bus_registration_t *next_registration;
+ vnet_dev_driver_name_t name;
+ u16 device_data_size;
+ vnet_dev_bus_ops_t ops;
+};
+
+struct vnet_dev_driver_registration
+{
+ vnet_dev_driver_registration_t *next_registration;
+ u8 bus_master_enable : 1;
+ vnet_dev_driver_name_t name;
+ vnet_dev_bus_name_t bus;
+ u16 device_data_sz;
+ u16 runtime_temp_space_sz;
+ vnet_dev_match_t *match;
+ int priority;
+ vnet_dev_ops_t ops;
+ vnet_dev_arg_t *args;
+};
+
+typedef struct
+{
+ u32 index;
+ vnet_dev_bus_registration_t *registration;
+ vnet_dev_bus_ops_t ops;
+} vnet_dev_bus_t;
+
+typedef struct
+{
+ u32 index;
+ void *dev_data;
+ vnet_dev_driver_registration_t *registration;
+ u32 dev_class_index;
+ vnet_dev_bus_index_t bus_index;
+ vnet_dev_ops_t ops;
+} vnet_dev_driver_t;
+
+typedef struct
+{
+ vnet_dev_bus_t *buses;
+ vnet_dev_driver_t *drivers;
+ vnet_dev_t **devices;
+ vnet_dev_port_t **ports_by_dev_instance;
+ vnet_dev_bus_registration_t *bus_registrations;
+ vnet_dev_driver_registration_t *driver_registrations;
+ void *runtime_temp_spaces;
+ u32 log2_runtime_temp_space_sz;
+ u32 *free_process_node_indices;
+ u32 *free_rx_node_indices;
+ uword *device_index_by_id;
+
+ u8 *startup_config;
+ u16 next_rx_queue_thread;
+ u8 eth_port_rx_feature_arc_index;
+} vnet_dev_main_t;
+
+extern vnet_dev_main_t vnet_dev_main;
+
+typedef struct
+{
+ struct
+ {
+ vnet_dev_port_attr_t attr;
+ vnet_dev_port_ops_t ops;
+ vnet_dev_arg_t *args;
+ u16 data_size;
+ void *initial_data;
+ } port;
+
+ vnet_dev_node_t *rx_node;
+ vnet_dev_node_t *tx_node;
+
+ struct
+ {
+ vnet_dev_queue_config_t config;
+ vnet_dev_rx_queue_ops_t ops;
+ } rx_queue;
+
+ struct
+ {
+ vnet_dev_queue_config_t config;
+ vnet_dev_tx_queue_ops_t ops;
+ } tx_queue;
+} vnet_dev_port_add_args_t;
+
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ u8 link_speed : 1;
+ u8 link_state : 1;
+ u8 link_duplex : 1;
+ };
+ u8 any;
+ } change;
+ u8 link_state : 1;
+ u8 full_duplex : 1;
+ u32 link_speed;
+} vnet_dev_port_state_changes_t;
+
+/* args.c */
+vnet_dev_rv_t vnet_dev_arg_parse (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_arg_t *, u8 *);
+void vnet_dev_arg_free (vnet_dev_arg_t **);
+void vnet_dev_arg_clear_value (vnet_dev_arg_t *);
+format_function_t format_vnet_dev_arg_type;
+format_function_t format_vnet_dev_arg_value;
+format_function_t format_vnet_dev_args;
+
+/* dev.c */
+vnet_dev_t *vnet_dev_alloc (vlib_main_t *, vnet_dev_device_id_t,
+ vnet_dev_driver_t *);
+void vnet_dev_free (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_init (vlib_main_t *, vnet_dev_t *);
+void vnet_dev_deinit (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_reset (vlib_main_t *, vnet_dev_t *);
+void vnet_dev_detach (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_port_add (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_port_id_t,
+ vnet_dev_port_add_args_t *);
+vnet_dev_rv_t vnet_dev_dma_mem_alloc (vlib_main_t *, vnet_dev_t *, u32, u32,
+ void **);
+void vnet_dev_dma_mem_free (vlib_main_t *, vnet_dev_t *, void *);
+vnet_dev_bus_t *vnet_dev_find_device_bus (vlib_main_t *, vnet_dev_device_id_t);
+void *vnet_dev_get_device_info (vlib_main_t *, vnet_dev_device_id_t);
+
+/* error.c */
+clib_error_t *vnet_dev_port_err (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_rv_t, char *, ...);
+int vnet_dev_flow_err (vlib_main_t *, vnet_dev_rv_t);
+
+/* handlers.c */
+clib_error_t *vnet_dev_port_set_max_frame_size (vnet_main_t *,
+ vnet_hw_interface_t *, u32);
+u32 vnet_dev_port_eth_flag_change (vnet_main_t *, vnet_hw_interface_t *, u32);
+clib_error_t *vnet_dev_port_mac_change (vnet_hw_interface_t *, const u8 *,
+ const u8 *);
+clib_error_t *vnet_dev_add_del_mac_address (vnet_hw_interface_t *, const u8 *,
+ u8);
+int vnet_dev_flow_ops_fn (vnet_main_t *, vnet_flow_dev_op_t, u32, u32,
+ uword *);
+clib_error_t *vnet_dev_interface_set_rss_queues (vnet_main_t *,
+ vnet_hw_interface_t *,
+ clib_bitmap_t *);
+void vnet_dev_clear_hw_interface_counters (u32);
+void vnet_dev_set_interface_next_node (vnet_main_t *, u32, u32);
+
+/* port.c */
+vnet_dev_rv_t vnet_dev_port_start (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t vnet_dev_port_start_all_rx_queues (vlib_main_t *,
+ vnet_dev_port_t *);
+vnet_dev_rv_t vnet_dev_port_start_all_tx_queues (vlib_main_t *,
+ vnet_dev_port_t *);
+void vnet_dev_port_stop (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_deinit (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_free (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_add_counters (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_counter_t *, u16);
+void vnet_dev_port_free_counters (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_update_tx_node_runtime (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_state_change (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_state_changes_t);
+void vnet_dev_port_clear_counters (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t
+vnet_dev_port_cfg_change_req_validate (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+vnet_dev_rv_t vnet_dev_port_cfg_change (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+vnet_dev_rv_t vnet_dev_port_if_create (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t vnet_dev_port_if_remove (vlib_main_t *, vnet_dev_port_t *);
+
+/* queue.c */
+vnet_dev_rv_t vnet_dev_rx_queue_alloc (vlib_main_t *, vnet_dev_port_t *, u16);
+vnet_dev_rv_t vnet_dev_tx_queue_alloc (vlib_main_t *, vnet_dev_port_t *, u16);
+void vnet_dev_rx_queue_free (vlib_main_t *, vnet_dev_rx_queue_t *);
+void vnet_dev_tx_queue_free (vlib_main_t *, vnet_dev_tx_queue_t *);
+void vnet_dev_rx_queue_add_counters (vlib_main_t *, vnet_dev_rx_queue_t *,
+ vnet_dev_counter_t *, u16);
+void vnet_dev_rx_queue_free_counters (vlib_main_t *, vnet_dev_rx_queue_t *);
+void vnet_dev_tx_queue_add_counters (vlib_main_t *, vnet_dev_tx_queue_t *,
+ vnet_dev_counter_t *, u16);
+void vnet_dev_tx_queue_free_counters (vlib_main_t *, vnet_dev_tx_queue_t *);
+vnet_dev_rv_t vnet_dev_rx_queue_start (vlib_main_t *, vnet_dev_rx_queue_t *);
+vnet_dev_rv_t vnet_dev_tx_queue_start (vlib_main_t *, vnet_dev_tx_queue_t *);
+void vnet_dev_rx_queue_stop (vlib_main_t *, vnet_dev_rx_queue_t *);
+void vnet_dev_tx_queue_stop (vlib_main_t *, vnet_dev_tx_queue_t *);
+
+/* process.c */
+vnet_dev_rv_t vnet_dev_process_create (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_process_call_op (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_op_t *);
+vnet_dev_rv_t vnet_dev_process_call_op_no_rv (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_op_no_rv_t *);
+void vnet_dev_process_call_op_no_wait (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_op_no_rv_t *);
+vnet_dev_rv_t vnet_dev_process_call_port_op (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_op_t *);
+vnet_dev_rv_t vnet_dev_process_call_port_op_no_rv (vlib_main_t *vm,
+ vnet_dev_port_t *,
+ vnet_dev_port_op_no_rv_t *);
+void vnet_dev_process_call_port_op_no_wait (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_op_no_rv_t *);
+vnet_dev_rv_t
+vnet_dev_process_port_cfg_change_req (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+void vnet_dev_process_quit (vlib_main_t *, vnet_dev_t *);
+void vnet_dev_poll_dev_add (vlib_main_t *, vnet_dev_t *, f64,
+ vnet_dev_op_no_rv_t *);
+void vnet_dev_poll_dev_remove (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_op_no_rv_t *);
+void vnet_dev_poll_port_add (vlib_main_t *, vnet_dev_port_t *, f64,
+ vnet_dev_port_op_no_rv_t *);
+void vnet_dev_poll_port_remove (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_op_no_rv_t *);
+
+typedef struct
+{
+ u16 thread_index;
+ u8 completed;
+ u8 in_order;
+ vnet_dev_port_t *port;
+} vnet_dev_rt_op_t;
+
+vnet_dev_rv_t vnet_dev_rt_exec_ops (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_rt_op_t *, u32);
+
+/* format.c */
+typedef struct
+{
+ u8 counters : 1;
+ u8 show_zero_counters : 1;
+ u8 debug : 1;
+} vnet_dev_format_args_t;
+
+format_function_t format_vnet_dev_addr;
+format_function_t format_vnet_dev_flags;
+format_function_t format_vnet_dev_hw_addr;
+format_function_t format_vnet_dev_info;
+format_function_t format_vnet_dev_interface_info;
+format_function_t format_vnet_dev_interface_name;
+format_function_t format_vnet_dev_log;
+format_function_t format_vnet_dev_port_caps;
+format_function_t format_vnet_dev_port_flags;
+format_function_t format_vnet_dev_port_info;
+format_function_t format_vnet_dev_port_rx_offloads;
+format_function_t format_vnet_dev_port_tx_offloads;
+format_function_t format_vnet_dev_rv;
+format_function_t format_vnet_dev_rx_queue_info;
+format_function_t format_vnet_dev_tx_queue_info;
+format_function_t format_vnet_dev_flow;
+unformat_function_t unformat_vnet_dev_flags;
+unformat_function_t unformat_vnet_dev_port_flags;
+
+typedef struct
+{
+ vnet_dev_rx_queue_t *first_rx_queue;
+} vnet_dev_rx_node_runtime_t;
+
+STATIC_ASSERT (sizeof (vnet_dev_rx_node_runtime_t) <=
+ VLIB_NODE_RUNTIME_DATA_SIZE,
+ "must fit into runtime data");
+
+#define foreach_vnet_dev_port_rx_next \
+ _ (ETH_INPUT, "ethernet-input") \
+ _ (DROP, "error-drop")
+
+typedef enum
+{
+#define _(n, s) VNET_DEV_ETH_RX_PORT_NEXT_##n,
+ foreach_vnet_dev_port_rx_next
+#undef _
+ VNET_DEV_ETH_RX_PORT_N_NEXTS
+} vnet_dev_eth_port_rx_next_t;
+
+extern u16 vnet_dev_default_next_index_by_port_type[];
+extern vlib_node_registration_t port_rx_eth_node;
+
+typedef vnet_interface_output_runtime_t vnet_dev_tx_node_runtime_t;
+
+STATIC_ASSERT (sizeof (vnet_dev_tx_node_runtime_t) <=
+ VLIB_NODE_RUNTIME_DATA_SIZE,
+ "must fit into runtime data");
+
+#define VNET_DEV_REGISTER_BUS(x, ...) \
+ __VA_ARGS__ vnet_dev_bus_registration_t __vnet_dev_bus_registration_##x; \
+ static void __clib_constructor __vnet_dev_bus_registration_fn_##x (void) \
+ { \
+ vnet_dev_main_t *dm = &vnet_dev_main; \
+ __vnet_dev_bus_registration_##x.next_registration = \
+ dm->bus_registrations; \
+ dm->bus_registrations = &__vnet_dev_bus_registration_##x; \
+ } \
+ __VA_ARGS__ vnet_dev_bus_registration_t __vnet_dev_bus_registration_##x
+
+#define VNET_DEV_REGISTER_DRIVER(x, ...) \
+ __VA_ARGS__ vnet_dev_driver_registration_t \
+ __vnet_dev_driver_registration_##x; \
+ static void __clib_constructor __vnet_dev_driver_registration_fn_##x (void) \
+ { \
+ vnet_dev_main_t *dm = &vnet_dev_main; \
+ __vnet_dev_driver_registration_##x.next_registration = \
+ dm->driver_registrations; \
+ dm->driver_registrations = &__vnet_dev_driver_registration_##x; \
+ } \
+ __VA_ARGS__ vnet_dev_driver_registration_t __vnet_dev_driver_registration_##x
+
+#define VNET_DEV_NODE_FN(node) \
+ uword CLIB_MARCH_SFX (node##_fn) (vlib_main_t *, vlib_node_runtime_t *, \
+ vlib_frame_t *); \
+ static vlib_node_fn_registration_t CLIB_MARCH_SFX ( \
+ node##_fn_registration) = { \
+ .function = &CLIB_MARCH_SFX (node##_fn), \
+ }; \
+ \
+ static void __clib_constructor CLIB_MARCH_SFX ( \
+ node##_fn_multiarch_register) (void) \
+ { \
+ extern vnet_dev_node_t node; \
+ vlib_node_fn_registration_t *r; \
+ r = &CLIB_MARCH_SFX (node##_fn_registration); \
+ r->march_variant = CLIB_MARCH_SFX (CLIB_MARCH_VARIANT_TYPE); \
+ r->next_registration = (node).registrations; \
+ (node).registrations = r; \
+ } \
+ uword CLIB_MARCH_SFX (node##_fn)
+
+#define foreach_vnet_dev_port(p, d) pool_foreach_pointer (p, d->ports)
+#define foreach_vnet_dev_port_rx_queue(q, p) \
+ pool_foreach_pointer (q, p->rx_queues)
+#define foreach_vnet_dev_port_tx_queue(q, p) \
+ pool_foreach_pointer (q, p->tx_queues)
+
+#include <vnet/dev/dev_funcs.h>
+
+#endif /* _VNET_DEV_H_ */
diff --git a/src/vnet/dev/dev_api.c b/src/vnet/dev/dev_api.c
new file mode 100644
index 00000000000..5e9ac502b5d
--- /dev/null
+++ b/src/vnet/dev/dev_api.c
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/api.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+/* define message IDs */
+#include <dev/dev.api_enum.h>
+#include <dev/dev.api_types.h>
+
+static u16 vnet_dev_api_msg_id_base;
+
+#define REPLY_MSG_ID_BASE (vnet_dev_api_msg_id_base)
+#include <vlibapi/api_helper_macros.h>
+
+#define _(b, n, d) \
+ STATIC_ASSERT ((int) VL_API_DEV_FLAG_##n == (int) VNET_DEV_F_##n, "");
+foreach_vnet_dev_flag;
+#undef _
+
+#define _(b, n, d) \
+ STATIC_ASSERT ((int) VL_API_DEV_PORT_FLAG_##n == (int) VNET_DEV_PORT_F_##n, \
+ "");
+foreach_vnet_dev_port_flag;
+#undef _
+
+static void
+vl_api_dev_attach_t_handler (vl_api_dev_attach_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_dev_attach_reply_t *rmp;
+ vnet_dev_api_attach_args_t a = {};
+ vnet_dev_rv_t rv;
+ u8 *error_string = 0;
+
+ STATIC_ASSERT (sizeof (mp->device_id) == sizeof (a.device_id), "");
+ STATIC_ASSERT (sizeof (mp->driver_name) == sizeof (a.driver_name), "");
+ STATIC_ASSERT (sizeof (mp->flags) == sizeof (a.flags), "");
+
+ a.flags.n = mp->flags;
+ strncpy (a.device_id, (char *) mp->device_id, sizeof (a.device_id));
+ strncpy (a.driver_name, (char *) mp->driver_name, sizeof (a.driver_name));
+ vec_add (a.args, mp->args.buf, mp->args.length);
+
+ rv = vnet_dev_api_attach (vm, &a);
+
+ if (rv != VNET_DEV_OK)
+ error_string = format (0, "%U", format_vnet_dev_rv, rv);
+
+ vec_free (a.args);
+
+ REPLY_MACRO3_END (VL_API_DEV_ATTACH_REPLY, vec_len (error_string), ({
+ rmp->retval = rv;
+ if (error_string)
+ {
+ rmp->dev_index = ~0;
+ vl_api_vec_to_api_string (error_string,
+ &rmp->error_string);
+ }
+ else
+ rmp->dev_index = a.dev_index;
+ }));
+
+ vec_free (a.args);
+ vec_free (error_string);
+}
+
+static void
+vl_api_dev_detach_t_handler (vl_api_dev_detach_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_dev_detach_reply_t *rmp;
+ vnet_dev_api_detach_args_t a = {};
+ vnet_dev_rv_t rv;
+ u8 *error_string = 0;
+
+ a.dev_index = mp->dev_index;
+
+ rv = vnet_dev_api_detach (vm, &a);
+
+ if (rv != VNET_DEV_OK)
+ error_string = format (0, "%U", format_vnet_dev_rv, rv);
+
+ REPLY_MACRO3_END (VL_API_DEV_DETACH_REPLY, vec_len (error_string), ({
+ rmp->retval = rv;
+ if (error_string)
+ vl_api_vec_to_api_string (error_string,
+ &rmp->error_string);
+ }));
+
+ vec_free (error_string);
+}
+
+static void
+vl_api_dev_create_port_if_t_handler (vl_api_dev_create_port_if_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_dev_create_port_if_reply_t *rmp;
+ vnet_dev_api_create_port_if_args_t a = {};
+ vnet_dev_rv_t rv;
+ u8 *error_string = 0;
+
+ STATIC_ASSERT (sizeof (mp->intf_name) == sizeof (a.intf_name), "");
+ STATIC_ASSERT (sizeof (mp->flags) == sizeof (a.flags), "");
+
+ a.flags.n = mp->flags;
+#define _(n) a.n = mp->n;
+ _ (dev_index)
+ _ (port_id)
+ _ (num_rx_queues)
+ _ (num_tx_queues)
+ _ (rx_queue_size)
+ _ (tx_queue_size)
+#undef _
+
+ strncpy (a.intf_name, (char *) mp->intf_name, sizeof (a.intf_name));
+ vec_add (a.args, mp->args.buf, mp->args.length);
+
+ rv = vnet_dev_api_create_port_if (vm, &a);
+
+ if (rv != VNET_DEV_OK)
+ error_string = format (0, "%U", format_vnet_dev_rv, rv);
+
+ vec_free (a.args);
+
+ REPLY_MACRO3_END (VL_API_DEV_CREATE_PORT_IF_REPLY, vec_len (error_string), ({
+ rmp->retval = rv;
+ if (error_string)
+ {
+ rmp->sw_if_index = ~0;
+ vl_api_vec_to_api_string (error_string,
+ &rmp->error_string);
+ }
+ else
+ rmp->sw_if_index = a.sw_if_index;
+ }));
+
+ vec_free (a.args);
+ vec_free (error_string);
+}
+
+static void
+vl_api_dev_remove_port_if_t_handler (vl_api_dev_remove_port_if_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_dev_remove_port_if_reply_t *rmp;
+ vnet_dev_api_remove_port_if_args_t a = {};
+ vnet_dev_rv_t rv;
+ u8 *error_string = 0;
+
+ a.sw_if_index = mp->sw_if_index;
+
+ rv = vnet_dev_api_remove_port_if (vm, &a);
+
+ if (rv != VNET_DEV_OK)
+ error_string = format (0, "%U", format_vnet_dev_rv, rv);
+
+ REPLY_MACRO3_END (VL_API_DEV_REMOVE_PORT_IF_REPLY, vec_len (error_string), ({
+ rmp->retval = rv;
+ if (error_string)
+ vl_api_vec_to_api_string (error_string,
+ &rmp->error_string);
+ }));
+
+ vec_free (error_string);
+}
+
+/* set tup the API message handling tables */
+
+#include <dev/dev.api.c>
+
+static clib_error_t *
+vnet_dev_api_hookup (vlib_main_t *vm)
+{
+ api_main_t *am = vlibapi_get_main ();
+
+ /* ask for a correctly-sized block of API message decode slots */
+ vnet_dev_api_msg_id_base = setup_message_id_table ();
+
+ foreach_int (i, VL_API_DEV_ATTACH, VL_API_DEV_DETACH,
+ VL_API_DEV_CREATE_PORT_IF, VL_API_DEV_REMOVE_PORT_IF)
+ vl_api_set_msg_thread_safe (am, vnet_dev_api_msg_id_base + i, 1);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (vnet_dev_api_hookup);
diff --git a/src/vnet/dev/dev_funcs.h b/src/vnet/dev/dev_funcs.h
new file mode 100644
index 00000000000..521157abbec
--- /dev/null
+++ b/src/vnet/dev/dev_funcs.h
@@ -0,0 +1,332 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_FUNCS_H_
+#define _VNET_DEV_FUNCS_H_
+
+#include <vppinfra/clib.h>
+#include <vnet/dev/dev.h>
+
+static_always_inline void *
+vnet_dev_get_data (vnet_dev_t *dev)
+{
+ return dev->data;
+}
+
+static_always_inline vnet_dev_t *
+vnet_dev_from_data (void *p)
+{
+ return (void *) ((u8 *) p - STRUCT_OFFSET_OF (vnet_dev_t, data));
+}
+
+static_always_inline void *
+vnet_dev_get_port_data (vnet_dev_port_t *port)
+{
+ return port->data;
+}
+
+static_always_inline void *
+vnet_dev_get_rx_queue_data (vnet_dev_rx_queue_t *rxq)
+{
+ return rxq->data;
+}
+
+static_always_inline void *
+vnet_dev_get_tx_queue_data (vnet_dev_tx_queue_t *txq)
+{
+ return txq->data;
+}
+
+static_always_inline vnet_dev_t *
+vnet_dev_get_by_index (u32 index)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ return pool_elt_at_index (dm->devices, index)[0];
+}
+
+static_always_inline vnet_dev_port_t *
+vnet_dev_get_port_by_index (vnet_dev_t *dev, u32 index)
+{
+ return pool_elt_at_index (dev->ports, index)[0];
+}
+
+static_always_inline vnet_dev_port_t *
+vnet_dev_get_port_from_dev_instance (u32 dev_instance)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ if (pool_is_free_index (dm->ports_by_dev_instance, dev_instance))
+ return 0;
+ return pool_elt_at_index (dm->ports_by_dev_instance, dev_instance)[0];
+}
+
+static_always_inline vnet_dev_port_t *
+vnet_dev_get_port_from_hw_if_index (u32 hw_if_index)
+{
+ vnet_hw_interface_t *hw;
+ vnet_dev_port_t *port;
+ hw = vnet_get_hw_interface (vnet_get_main (), hw_if_index);
+ port = vnet_dev_get_port_from_dev_instance (hw->dev_instance);
+
+ if (!port || port->intf.hw_if_index != hw_if_index)
+ return 0;
+
+ return port;
+}
+
+static_always_inline vnet_dev_t *
+vnet_dev_by_index (u32 index)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ if (pool_is_free_index (dm->devices, index))
+ return 0;
+
+ return *pool_elt_at_index (dm->devices, index);
+}
+
+static_always_inline vnet_dev_t *
+vnet_dev_by_id (char *id)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ uword *p = hash_get (dm->device_index_by_id, id);
+ if (p)
+ return *pool_elt_at_index (dm->devices, p[0]);
+ return 0;
+}
+
+static_always_inline uword
+vnet_dev_get_dma_addr (vlib_main_t *vm, vnet_dev_t *dev, void *p)
+{
+ return dev->va_dma ? pointer_to_uword (p) : vlib_physmem_get_pa (vm, p);
+}
+
+static_always_inline void *
+vnet_dev_get_bus_data (vnet_dev_t *dev)
+{
+ return (void *) dev->bus_data;
+}
+
+static_always_inline vnet_dev_bus_t *
+vnet_dev_get_bus (vnet_dev_t *dev)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ return pool_elt_at_index (dm->buses, dev->bus_index);
+}
+
+static_always_inline void
+vnet_dev_validate (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ASSERT (dev->process_node_index == vlib_get_current_process_node_index (vm));
+ ASSERT (vm->thread_index == 0);
+}
+
+static_always_inline void
+vnet_dev_port_validate (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ ASSERT (port->dev->process_node_index ==
+ vlib_get_current_process_node_index (vm));
+ ASSERT (vm->thread_index == 0);
+}
+
+static_always_inline u32
+vnet_dev_port_get_sw_if_index (vnet_dev_port_t *port)
+{
+ return port->intf.sw_if_index;
+}
+
+static_always_inline vnet_dev_port_t *
+vnet_dev_get_port_by_id (vnet_dev_t *dev, vnet_dev_port_id_t port_id)
+{
+ foreach_vnet_dev_port (p, dev)
+ if (p->port_id == port_id)
+ return p;
+ return 0;
+}
+
+static_always_inline vnet_dev_rx_queue_t *
+vnet_dev_port_get_rx_queue_by_id (vnet_dev_port_t *port,
+ vnet_dev_queue_id_t queue_id)
+{
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if (q->queue_id == queue_id)
+ return q;
+ return 0;
+}
+
+static_always_inline vnet_dev_tx_queue_t *
+vnet_dev_port_get_tx_queue_by_id (vnet_dev_port_t *port,
+ vnet_dev_queue_id_t queue_id)
+{
+ foreach_vnet_dev_port_tx_queue (q, port)
+ if (q->queue_id == queue_id)
+ return q;
+ return 0;
+}
+
+static_always_inline void *
+vnet_dev_alloc_with_data (u32 sz, u32 data_sz)
+{
+ void *p;
+ sz += data_sz;
+ sz = round_pow2 (sz, CLIB_CACHE_LINE_BYTES);
+ p = clib_mem_alloc_aligned (sz, CLIB_CACHE_LINE_BYTES);
+ clib_memset (p, 0, sz);
+ return p;
+}
+
+static_always_inline void
+vnet_dev_tx_queue_lock_if_needed (vnet_dev_tx_queue_t *txq)
+{
+ u8 free = 0;
+
+ if (!txq->lock_needed)
+ return;
+
+ while (!__atomic_compare_exchange_n (&txq->lock, &free, 1, 0,
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
+ {
+ while (__atomic_load_n (&txq->lock, __ATOMIC_RELAXED))
+ CLIB_PAUSE ();
+ free = 0;
+ }
+}
+
+static_always_inline void
+vnet_dev_tx_queue_unlock_if_needed (vnet_dev_tx_queue_t *txq)
+{
+ if (!txq->lock_needed)
+ return;
+ __atomic_store_n (&txq->lock, 0, __ATOMIC_RELEASE);
+}
+
+static_always_inline u8
+vnet_dev_get_rx_queue_buffer_pool_index (vnet_dev_rx_queue_t *rxq)
+{
+ return rxq->buffer_template.buffer_pool_index;
+}
+
+static_always_inline u32
+vnet_dev_get_rx_queue_buffer_data_size (vlib_main_t *vm,
+ vnet_dev_rx_queue_t *rxq)
+{
+ u8 bpi = vnet_dev_get_rx_queue_buffer_pool_index (rxq);
+ return vlib_get_buffer_pool (vm, bpi)->data_size;
+}
+
+static_always_inline void
+vnet_dev_rx_queue_rt_request (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq,
+ vnet_dev_rx_queue_rt_req_t req)
+{
+ __atomic_fetch_or (&rxq->runtime_request.as_number, req.as_number,
+ __ATOMIC_RELEASE);
+}
+
+static_always_inline vnet_dev_rx_node_runtime_t *
+vnet_dev_get_rx_node_runtime (vlib_node_runtime_t *node)
+{
+ return (void *) node->runtime_data;
+}
+
+static_always_inline vnet_dev_tx_node_runtime_t *
+vnet_dev_get_tx_node_runtime (vlib_node_runtime_t *node)
+{
+ return (void *) node->runtime_data;
+}
+
+static_always_inline vnet_dev_rx_queue_t *
+foreach_vnet_dev_rx_queue_runtime_helper (vlib_node_runtime_t *node,
+ vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_port_t *port;
+ vnet_dev_rx_queue_rt_req_t req;
+
+ if (rxq == 0)
+ rxq = vnet_dev_get_rx_node_runtime (node)->first_rx_queue;
+ else
+ next:
+ rxq = rxq->next_on_thread;
+
+ if (PREDICT_FALSE (rxq == 0))
+ return 0;
+
+ if (PREDICT_TRUE (rxq->runtime_request.as_number == 0))
+ return rxq;
+
+ req.as_number =
+ __atomic_exchange_n (&rxq->runtime_request.as_number, 0, __ATOMIC_ACQUIRE);
+
+ port = rxq->port;
+ if (req.update_next_index)
+ rxq->next_index = port->intf.rx_next_index;
+
+ if (req.update_feature_arc)
+ {
+ vlib_buffer_template_t *bt = &rxq->buffer_template;
+ bt->current_config_index = port->intf.current_config_index;
+ vnet_buffer (bt)->feature_arc_index = port->intf.feature_arc_index;
+ }
+
+ if (req.suspend_on)
+ {
+ rxq->suspended = 1;
+ goto next;
+ }
+
+ if (req.suspend_off)
+ rxq->suspended = 0;
+
+ return rxq;
+}
+
+#define foreach_vnet_dev_rx_queue_runtime(q, node) \
+ for (vnet_dev_rx_queue_t * (q) = \
+ foreach_vnet_dev_rx_queue_runtime_helper (node, 0); \
+ q; (q) = foreach_vnet_dev_rx_queue_runtime_helper (node, q))
+
+static_always_inline void *
+vnet_dev_get_rt_temp_space (vlib_main_t *vm)
+{
+ return vnet_dev_main.runtime_temp_spaces +
+ ((uword) vm->thread_index
+ << vnet_dev_main.log2_runtime_temp_space_sz);
+}
+
+static_always_inline void
+vnet_dev_set_hw_addr_eth_mac (vnet_dev_hw_addr_t *addr, const u8 *eth_mac_addr)
+{
+ vnet_dev_hw_addr_t ha = {};
+ clib_memcpy_fast (&ha.eth_mac, eth_mac_addr, sizeof (ha.eth_mac));
+ *addr = ha;
+}
+
+static_always_inline vnet_dev_arg_t *
+vnet_dev_get_port_arg_by_id (vnet_dev_port_t *port, u32 id)
+{
+ foreach_vnet_dev_port_args (a, port)
+ if (a->id == id)
+ return a;
+ return 0;
+}
+
+static_always_inline int
+vnet_dev_arg_get_bool (vnet_dev_arg_t *arg)
+{
+ ASSERT (arg->type == VNET_DEV_ARG_TYPE_BOOL);
+ return arg->val_set ? arg->val.boolean : arg->default_val.boolean;
+}
+
+static_always_inline u32
+vnet_dev_arg_get_uint32 (vnet_dev_arg_t *arg)
+{
+ ASSERT (arg->type == VNET_DEV_ARG_TYPE_UINT32);
+ return arg->val_set ? arg->val.uint32 : arg->default_val.uint32;
+}
+
+static_always_inline u8 *
+vnet_dev_arg_get_string (vnet_dev_arg_t *arg)
+{
+ ASSERT (arg->type == VNET_DEV_ARG_TYPE_STRING);
+ return arg->val_set ? arg->val.string : arg->default_val.string;
+}
+
+#endif /* _VNET_DEV_FUNCS_H_ */
diff --git a/src/vnet/dev/error.c b/src/vnet/dev/error.c
new file mode 100644
index 00000000000..4e057010af0
--- /dev/null
+++ b/src/vnet/dev/error.c
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/flow/flow.h>
+
+clib_error_t *
+vnet_dev_port_err (vlib_main_t *vm, vnet_dev_port_t *port, vnet_dev_rv_t rv,
+ char *fmt, ...)
+{
+ clib_error_t *err;
+ va_list va;
+ u8 *s;
+
+ if (rv == VNET_DEV_OK)
+ return 0;
+
+ va_start (va, fmt);
+ s = va_format (0, fmt, &va);
+ va_end (va);
+
+ err = clib_error_return (0, "%s port %u: %U (%v)", port->dev->device_id,
+ port->port_id, format_vnet_dev_rv, rv, s);
+ vec_free (s);
+ return err;
+}
+
+int
+vnet_dev_flow_err (vlib_main_t *vm, vnet_dev_rv_t rv)
+{
+ if (rv == VNET_DEV_OK)
+ return 0;
+
+ switch (rv)
+ {
+ /* clang-format off */
+#define _(n, e, s) \
+ case VNET_DEV_ERR_##e: \
+ return VNET_FLOW_ERROR_##e;
+ foreach_flow_error;
+#undef _
+ /* clang-format on */
+ default:
+ ASSERT (0);
+ }
+
+ ASSERT (0);
+
+ return 0;
+}
diff --git a/src/vnet/dev/errors.h b/src/vnet/dev/errors.h
new file mode 100644
index 00000000000..430a6aef282
--- /dev/null
+++ b/src/vnet/dev/errors.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_ERRORS_H_
+#define _VNET_DEV_ERRORS_H_
+
+#define foreach_vnet_dev_rv_type \
+ _ (ALREADY_EXISTS, "already exists") \
+ _ (ALREADY_IN_USE, "already in use") \
+ _ (BUFFER_ALLOC_FAIL, "packet buffer allocation failure") \
+ _ (BUG, "bug") \
+ _ (BUS, "bus error") \
+ _ (DEVICE_NO_REPLY, "no reply from device") \
+ _ (DMA_MEM_ALLOC_FAIL, "DMA memory allocation error") \
+ _ (DRIVER_NOT_AVAILABLE, "driver not available") \
+ _ (INVALID_ARG, "invalid argument") \
+ _ (INVALID_BUS, "invalid bus") \
+ _ (INVALID_DATA, "invalid data") \
+ _ (INVALID_DEVICE_ID, "invalid device id") \
+ _ (INVALID_NUM_RX_QUEUES, "invalid number of rx queues") \
+ _ (INVALID_NUM_TX_QUEUES, "invalid number of tx queues") \
+ _ (INVALID_PORT_ID, "invalid port id") \
+ _ (INVALID_RX_QUEUE_SIZE, "invalid rx queue size") \
+ _ (INVALID_TX_QUEUE_SIZE, "invalid tx queue size") \
+ _ (INVALID_VALUE, "invalid value") \
+ _ (INTERNAL, "internal error") \
+ _ (NOT_FOUND, "not found") \
+ _ (NOT_READY, "not ready") \
+ _ (NOT_SUPPORTED, "not supported") \
+ _ (NO_CHANGE, "no change") \
+ _ (NO_AVAIL_QUEUES, "no queues available") \
+ _ (NO_SUCH_ENTRY, "no such enty") \
+ _ (PORT_STARTED, "port started") \
+ _ (PROCESS_REPLY, "dev process reply error") \
+ _ (RESOURCE_NOT_AVAILABLE, "resource not available") \
+ _ (TIMEOUT, "timeout") \
+ _ (UNKNOWN_DEVICE, "unknown device") \
+ _ (UNKNOWN_INTERFACE, "unknown interface") \
+ _ (UNSUPPORTED_CONFIG, "unsupported config") \
+ _ (UNSUPPORTED_DEVICE, "unsupported device") \
+ _ (UNSUPPORTED_DEVICE_VER, "unsupported device version") \
+ _ (ALREADY_DONE, "already done") \
+ _ (NO_SUCH_INTERFACE, "no such interface")
+
+#endif /* _VNET_DEV_ERRORS_H_ */
diff --git a/src/vnet/dev/format.c b/src/vnet/dev/format.c
new file mode 100644
index 00000000000..ed83a0eba95
--- /dev/null
+++ b/src/vnet/dev/format.c
@@ -0,0 +1,507 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/ethernet/ethernet.h>
+
+u8 *
+format_vnet_dev_rv (u8 *s, va_list *args)
+{
+ vnet_dev_rv_t rv = va_arg (*args, vnet_dev_rv_t);
+ u32 index = -rv;
+
+ char *strings[] = { [0] = "OK",
+#define _(n, d) [-VNET_DEV_ERR_##n] = d,
+ foreach_vnet_dev_rv_type
+#undef _
+ };
+
+ if (index >= ARRAY_LEN (strings))
+ return format (s, "unknown return value (%d)", rv);
+ return format (s, "%s", strings[index]);
+}
+
+u8 *
+format_vnet_dev_addr (u8 *s, va_list *args)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ vnet_dev_bus_t *bus;
+
+ if (dev == 0)
+ return 0;
+
+ bus = pool_elt_at_index (dm->buses, dev->bus_index);
+ s = format (s, "%U", bus->ops.format_device_addr, dev);
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_interface_name (u8 *s, va_list *args)
+{
+ u32 i = va_arg (*args, u32);
+ vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (i);
+
+ return format (s, "%s", port->intf.name);
+}
+
+u8 *
+format_vnet_dev_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t *a = va_arg (*args, vnet_dev_format_args_t *);
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ vnet_dev_driver_t *dr = pool_elt_at_index (dm->drivers, dev->driver_index);
+ vnet_dev_bus_t *bus = pool_elt_at_index (dm->buses, dev->bus_index);
+
+ u32 indent = format_get_indent (s);
+ s = format (s, "Driver is '%s', bus is '%s'", dr->registration->name,
+ bus->registration->name);
+
+ if (dev->description)
+ s = format (s, ", description is '%v'", dev->description);
+
+ if (bus->ops.format_device_info)
+ s = format (s, "\n%U%U", format_white_space, indent,
+ bus->ops.format_device_info, a, dev);
+
+ s = format (s, "\n%UAssigned process node is '%U'", format_white_space,
+ indent, format_vlib_node_name, vm, dev->process_node_index);
+ if (dev->args)
+ s = format (s, "\n%UDevice Specific Arguments:\n%U%U", format_white_space,
+ indent, format_white_space, indent + 2, format_vnet_dev_args,
+ dev->args);
+ if (dev->ops.format_info)
+ s =
+ format (s, "\n%UDevice Specific Info:\n%U%U", format_white_space, indent,
+ format_white_space, indent + 2, dev->ops.format_info, a, dev);
+ return s;
+}
+
+u8 *
+format_vnet_dev_hw_addr (u8 *s, va_list *args)
+{
+ vnet_dev_hw_addr_t *addr = va_arg (*args, vnet_dev_hw_addr_t *);
+ return format (s, "%U", format_ethernet_address, addr->eth_mac);
+}
+
+u8 *
+format_vnet_dev_port_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t *a = va_arg (*args, vnet_dev_format_args_t *);
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_dev_port_t *port = va_arg (*args, vnet_dev_port_t *);
+
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "Hardware Address is %U", format_vnet_dev_hw_addr,
+ &port->attr.hw_addr);
+ s = format (s, ", %u RX queues (max %u), %u TX queues (max %u)",
+ pool_elts (port->rx_queues), port->attr.max_rx_queues,
+ pool_elts (port->tx_queues), port->attr.max_tx_queues);
+ if (pool_elts (port->secondary_hw_addr))
+ {
+ u32 i = 0;
+ vnet_dev_hw_addr_t *a;
+ s = format (s, "\n%USecondary Hardware Address%s:", format_white_space,
+ indent,
+ pool_elts (port->secondary_hw_addr) > 1 ? "es are" : " is");
+ pool_foreach (a, port->secondary_hw_addr)
+ {
+ if (i++ % 6 == 0)
+ s = format (s, "\n%U", format_white_space, indent + 1);
+ s = format (s, " %U", format_vnet_dev_hw_addr, a);
+ }
+ }
+ s = format (s, "\n%UMax RX frame size is %u (max supported %u)",
+ format_white_space, indent, port->max_rx_frame_size,
+ port->attr.max_supported_rx_frame_size);
+ s = format (s, "\n%UCaps: %U", format_white_space, indent,
+ format_vnet_dev_port_caps, &port->attr.caps);
+ s = format (s, "\n%URX Offloads: %U", format_white_space, indent,
+ format_vnet_dev_port_rx_offloads, &port->attr.rx_offloads);
+ s = format (s, "\n%UTX Offloads: %U", format_white_space, indent,
+ format_vnet_dev_port_tx_offloads, &port->attr.tx_offloads);
+ if (port->port_ops.format_status)
+ s = format (s, "\n%UDevice Specific Port Status:\n%U%U",
+ format_white_space, indent, format_white_space, indent + 2,
+ port->port_ops.format_status, a, port);
+ if (port->args)
+ s = format (s, "\n%UDevice Specific Port Arguments:\n%U%U",
+ format_white_space, indent, format_white_space, indent + 2,
+ format_vnet_dev_args, port->args);
+
+ s = format (s, "\n%UInterface ", format_white_space, indent);
+ if (port->interface_created)
+ {
+ s = format (s, "assigned, interface name is '%U', RX node is '%U'",
+ format_vnet_sw_if_index_name, vnm, port->intf.sw_if_index,
+ format_vlib_node_name, vm, port->intf.rx_node_index);
+ }
+ else
+ s = format (s, "not assigned");
+ return s;
+}
+
+u8 *
+format_vnet_dev_rx_queue_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t __clib_unused *a =
+ va_arg (*args, vnet_dev_format_args_t *);
+ vnet_dev_rx_queue_t *rxq = va_arg (*args, vnet_dev_rx_queue_t *);
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "Size is %u, buffer pool index is %u", rxq->size,
+ vnet_dev_get_rx_queue_buffer_pool_index (rxq));
+ s = format (s, "\n%UPolling thread is %u, %sabled, %sstarted, %s mode",
+ format_white_space, indent, rxq->rx_thread_index,
+ rxq->enabled ? "en" : "dis", rxq->started ? "" : "not-",
+ rxq->interrupt_mode ? "interrupt" : "polling");
+ if (rxq->port->rx_queue_ops.format_info)
+ s = format (s, "\n%U%U", format_white_space, indent,
+ rxq->port->rx_queue_ops.format_info, a, rxq);
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_tx_queue_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t __clib_unused *a =
+ va_arg (*args, vnet_dev_format_args_t *);
+ vnet_dev_tx_queue_t *txq = va_arg (*args, vnet_dev_tx_queue_t *);
+ u32 indent = format_get_indent (s);
+ u32 n;
+
+ s = format (s, "Size is %u", txq->size);
+ s = format (s, "\n%U", format_white_space, indent);
+ n = clib_bitmap_count_set_bits (txq->assigned_threads);
+ if (n == 0)
+ s = format (s, "Not used by any thread");
+ else
+ s = format (s, "Used by thread%s %U", n > 1 ? "s" : "", format_bitmap_list,
+ txq->assigned_threads);
+ if (txq->port->tx_queue_ops.format_info)
+ s = format (s, "\n%U%U", format_white_space, indent,
+ txq->port->tx_queue_ops.format_info, a, txq);
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_interface_info (u8 *s, va_list *args)
+{
+ u32 i = va_arg (*args, u32);
+ vnet_dev_format_args_t fa = {}, *a = &fa;
+ vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (i);
+ vnet_dev_t *dev = port->dev;
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "Device:");
+ s = format (s, "\n%U%U", format_white_space, indent + 2,
+ format_vnet_dev_info, a, dev);
+
+ s = format (s, "\n%UPort %u:", format_white_space, indent, port->port_id);
+ s = format (s, "\n%U%U", format_white_space, indent + 2,
+ format_vnet_dev_port_info, a, port);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ s = format (s, "\n%URX queue %u:", format_white_space, indent + 2,
+ q->queue_id);
+ s = format (s, "\n%U%U", format_white_space, indent + 4,
+ format_vnet_dev_rx_queue_info, a, q);
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ s = format (s, "\n%UTX queue %u:", format_white_space, indent + 2,
+ q->queue_id);
+ s = format (s, "\n%U%U", format_white_space, indent + 4,
+ format_vnet_dev_tx_queue_info, a, q);
+ }
+ return s;
+}
+
+static u64
+unformat_flags (unformat_input_t *input, char *names[], u64 val[], u32 n_flags)
+{
+ u64 rv = 0;
+ uword c = 0;
+ u8 *s = 0;
+
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ {
+ switch (c)
+ {
+ case 'a' ... 'z':
+ c -= 'a' - 'A';
+ case '0' ... '9':
+ case 'A' ... 'Z':
+ vec_add1 (s, c);
+ break;
+ case '-':
+ vec_add1 (s, '_');
+ break;
+ case ',':
+ vec_add1 (s, 0);
+ break;
+ default:
+ goto end_of_string;
+ }
+ }
+end_of_string:
+
+ if (s == 0)
+ return 0;
+
+ vec_add1 (s, 0);
+
+ for (u8 *p = s, *end = vec_end (s); p < end; p += strlen ((char *) p) + 1)
+ {
+ for (c = 0; c < n_flags; c++)
+ if (strcmp (names[c], (char *) p) == 0)
+ {
+ rv |= val[c];
+ break;
+ }
+ if (c == n_flags)
+ goto done;
+ }
+
+done:
+ vec_free (s);
+ return rv;
+}
+
+uword
+unformat_vnet_dev_flags (unformat_input_t *input, va_list *args)
+{
+ vnet_dev_flags_t *fp = va_arg (*args, vnet_dev_flags_t *);
+ u64 val;
+
+ char *names[] = {
+#define _(b, n, d) #n,
+ foreach_vnet_dev_flag
+#undef _
+ };
+ u64 vals[] = {
+#define _(b, n, d) 1ull << (b)
+ foreach_vnet_dev_flag
+#undef _
+ };
+
+ val = unformat_flags (input, names, vals, ARRAY_LEN (names));
+
+ if (!val)
+ return 0;
+
+ fp->n = val;
+ return 1;
+}
+
+uword
+unformat_vnet_dev_port_flags (unformat_input_t *input, va_list *args)
+{
+ vnet_dev_port_flags_t *fp = va_arg (*args, vnet_dev_port_flags_t *);
+ u64 val;
+
+ char *flag_names[] = {
+#define _(b, n, d) #n,
+ foreach_vnet_dev_port_flag
+#undef _
+ };
+ u64 flag_values[] = {
+#define _(b, n, d) 1ull << (b)
+ foreach_vnet_dev_port_flag
+#undef _
+ };
+
+ val =
+ unformat_flags (input, flag_names, flag_values, ARRAY_LEN (flag_names));
+
+ if (!val)
+ return 0;
+
+ fp->n = val;
+ return 1;
+}
+
+static u8 *
+format_flags (u8 *s, u64 val, char *flag_names[], u64 flag_values[],
+ u32 n_flags)
+{
+ u32 n = 0;
+ for (int i = 0; i < n_flags; i++)
+ {
+ if ((val & flag_values[i]) == 0)
+ continue;
+
+ if (n++)
+ vec_add1 (s, ' ');
+
+ for (char *c = flag_names[i]; c[0] != 0; c++)
+ {
+ switch (c[0])
+ {
+ case 'A' ... 'Z':
+ vec_add1 (s, c[0] + 'a' - 'A');
+ break;
+ case '_':
+ vec_add1 (s, '-');
+ break;
+ default:
+ vec_add1 (s, c[0]);
+ }
+ }
+ }
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_flags (u8 *s, va_list *args)
+{
+ vnet_dev_flags_t *fp = va_arg (*args, vnet_dev_flags_t *);
+ char *flag_names[] = {
+#define _(b, n, d) #n,
+ foreach_vnet_dev_flag
+#undef _
+ };
+ u64 flag_values[] = {
+#define _(b, n, d) 1ull << (b)
+ foreach_vnet_dev_flag
+#undef _
+ };
+
+ return format_flags (s, fp->n, flag_names, flag_values,
+ ARRAY_LEN (flag_names));
+}
+
+u8 *
+format_vnet_dev_port_flags (u8 *s, va_list *args)
+{
+ vnet_dev_port_flags_t *fp = va_arg (*args, vnet_dev_port_flags_t *);
+ char *flag_names[] = {
+#define _(b, n, d) #n,
+ foreach_vnet_dev_port_flag
+#undef _
+ };
+ u64 flag_values[] = {
+#define _(b, n, d) 1ull << (b)
+ foreach_vnet_dev_port_flag
+#undef _
+ };
+
+ return format_flags (s, fp->n, flag_names, flag_values,
+ ARRAY_LEN (flag_names));
+}
+
+u8 *
+format_vnet_dev_log (u8 *s, va_list *args)
+{
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ char *func = va_arg (*args, char *);
+
+ if (dev)
+ s = format (s, "%U", format_vnet_dev_addr, dev);
+ if (dev && func)
+ vec_add1 (s, ' ');
+ if (func)
+ s = format (s, "%s", func);
+ vec_add1 (s, ':');
+ vec_add1 (s, ' ');
+ return s;
+}
+
+u8 *
+format_vnet_dev_port_caps (u8 *s, va_list *args)
+{
+ vnet_dev_port_caps_t *c = va_arg (*args, vnet_dev_port_caps_t *);
+ u32 line = 0;
+
+ if (c->as_number == 0)
+ return s;
+
+#define _(n) \
+ if (c->n) \
+ { \
+ if (line++) \
+ vec_add1 (s, ' '); \
+ for (char *str = #n; *str; str++) \
+ vec_add1 (s, *str == '_' ? '-' : *str); \
+ }
+ foreach_vnet_dev_port_caps;
+#undef _
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_port_rx_offloads (u8 *s, va_list *args)
+{
+ vnet_dev_port_rx_offloads_t *c =
+ va_arg (*args, vnet_dev_port_rx_offloads_t *);
+ u32 line = 0;
+
+ if (c->as_number == 0)
+ return s;
+
+#define _(n) \
+ if (c->n) \
+ { \
+ if (line++) \
+ vec_add1 (s, ' '); \
+ for (char *str = #n; *str; str++) \
+ vec_add1 (s, *str == '_' ? '-' : *str); \
+ }
+ foreach_vnet_dev_port_rx_offloads;
+#undef _
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_port_tx_offloads (u8 *s, va_list *args)
+{
+ vnet_dev_port_tx_offloads_t *c =
+ va_arg (*args, vnet_dev_port_tx_offloads_t *);
+ u32 line = 0;
+
+ if (c->as_number == 0)
+ return s;
+
+#define _(n) \
+ if (c->n) \
+ { \
+ if (line++) \
+ vec_add1 (s, ' '); \
+ for (char *str = #n; *str; str++) \
+ vec_add1 (s, *str == '_' ? '-' : *str); \
+ }
+ foreach_vnet_dev_port_tx_offloads;
+#undef _
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_flow (u8 *s, va_list *args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ u32 flow_index = va_arg (*args, u32);
+ uword private_data = va_arg (*args, uword);
+ vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (dev_instance);
+
+ if (port->port_ops.format_flow)
+ s = format (s, "%U", port->port_ops.format_flow, port, flow_index,
+ private_data);
+
+ return s;
+}
diff --git a/src/vnet/dev/handlers.c b/src/vnet/dev/handlers.c
new file mode 100644
index 00000000000..2a55affe3e3
--- /dev/null
+++ b/src/vnet/dev/handlers.c
@@ -0,0 +1,256 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+#include <vnet/flow/flow.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "handler",
+};
+
+clib_error_t *
+vnet_dev_port_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hw,
+ u32 frame_size)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hw->dev_instance);
+ vnet_dev_rv_t rv;
+
+ vnet_dev_port_cfg_change_req_t req = {
+ .type = VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE,
+ .max_rx_frame_size = frame_size,
+ };
+
+ log_debug (p->dev, "size %u", frame_size);
+
+ rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req);
+ if (rv == VNET_DEV_ERR_NO_CHANGE)
+ return 0;
+
+ if (rv != VNET_DEV_OK)
+ return vnet_dev_port_err (vm, p, rv,
+ "new max frame size is not valid for port");
+
+ if ((rv = vnet_dev_process_port_cfg_change_req (vm, p, &req)) != VNET_DEV_OK)
+ return vnet_dev_port_err (vm, p, rv,
+ "device failed to change max frame size");
+
+ return 0;
+}
+
+u32
+vnet_dev_port_eth_flag_change (vnet_main_t *vnm, vnet_hw_interface_t *hw,
+ u32 flags)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hw->dev_instance);
+ vnet_dev_rv_t rv;
+
+ vnet_dev_port_cfg_change_req_t req = {
+ .type = VNET_DEV_PORT_CFG_PROMISC_MODE,
+ };
+
+ switch (flags)
+ {
+ case ETHERNET_INTERFACE_FLAG_DEFAULT_L3:
+ log_debug (p->dev, "promisc off");
+ break;
+ case ETHERNET_INTERFACE_FLAG_ACCEPT_ALL:
+ log_debug (p->dev, "promisc on");
+ req.promisc = 1;
+ break;
+ default:
+ return ~0;
+ }
+
+ rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req);
+ if (rv == VNET_DEV_ERR_NO_CHANGE)
+ return 0;
+
+ if (rv != VNET_DEV_OK)
+ return ~0;
+
+ rv = vnet_dev_process_port_cfg_change_req (vm, p, &req);
+ if (rv == VNET_DEV_OK || rv == VNET_DEV_ERR_NO_CHANGE)
+ return 0;
+ return ~0;
+}
+
+clib_error_t *
+vnet_dev_port_mac_change (vnet_hw_interface_t *hi, const u8 *old,
+ const u8 *new)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+ vnet_dev_rv_t rv;
+
+ vnet_dev_port_cfg_change_req_t req = {
+ .type = VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR,
+ };
+
+ vnet_dev_set_hw_addr_eth_mac (&req.addr, new);
+
+ log_debug (p->dev, "new mac %U", format_vnet_dev_hw_addr, &req.addr);
+
+ rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req);
+ if (rv == VNET_DEV_ERR_NO_CHANGE)
+ return 0;
+
+ if (rv != VNET_DEV_OK)
+ return vnet_dev_port_err (vm, p, rv, "hw address is not valid for port");
+
+ if ((rv = vnet_dev_process_port_cfg_change_req (vm, p, &req)) != VNET_DEV_OK)
+ return vnet_dev_port_err (vm, p, rv, "device failed to change hw address");
+
+ return 0;
+}
+
+clib_error_t *
+vnet_dev_add_del_mac_address (vnet_hw_interface_t *hi, const u8 *address,
+ u8 is_add)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+ vnet_dev_rv_t rv;
+
+ vnet_dev_port_cfg_change_req_t req = {
+ .type = is_add ? VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR :
+ VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR,
+ };
+
+ vnet_dev_set_hw_addr_eth_mac (&req.addr, address);
+
+ log_debug (p->dev, "received (addr %U is_add %u", format_vnet_dev_hw_addr,
+ &req.addr, is_add);
+
+ rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req);
+ if (rv != VNET_DEV_OK)
+ return vnet_dev_port_err (vm, p, rv,
+ "provided secondary hw addresses cannot "
+ "be added/removed");
+
+ if ((rv = vnet_dev_process_port_cfg_change_req (vm, p, &req)) != VNET_DEV_OK)
+ return vnet_dev_port_err (
+ vm, p, rv, "device failed to add/remove secondary hw address");
+
+ return 0;
+}
+
+int
+vnet_dev_flow_ops_fn (vnet_main_t *vnm, vnet_flow_dev_op_t op,
+ u32 dev_instance, u32 flow_index, uword *private_data)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (dev_instance);
+ vnet_dev_port_cfg_change_req_t req;
+ vnet_dev_rv_t rv;
+
+ switch (op)
+ {
+ case VNET_FLOW_DEV_OP_ADD_FLOW:
+ req.type = VNET_DEV_PORT_CFG_ADD_RX_FLOW;
+ break;
+ case VNET_FLOW_DEV_OP_DEL_FLOW:
+ req.type = VNET_DEV_PORT_CFG_DEL_RX_FLOW;
+ break;
+ case VNET_FLOW_DEV_OP_GET_COUNTER:
+ req.type = VNET_DEV_PORT_CFG_GET_RX_FLOW_COUNTER;
+ break;
+ case VNET_FLOW_DEV_OP_RESET_COUNTER:
+ req.type = VNET_DEV_PORT_CFG_RESET_RX_FLOW_COUNTER;
+ break;
+ default:
+ log_warn (p->dev, "unsupported request for flow_ops received");
+ return VNET_FLOW_ERROR_NOT_SUPPORTED;
+ }
+
+ req.flow_index = flow_index;
+ req.private_data = private_data;
+
+ rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req);
+ if (rv != VNET_DEV_OK)
+ {
+ log_err (p->dev, "validation failed for flow_ops");
+ return VNET_FLOW_ERROR_NOT_SUPPORTED;
+ }
+
+ if ((rv = vnet_dev_process_port_cfg_change_req (vm, p, &req)) != VNET_DEV_OK)
+ {
+ log_err (p->dev, "request for flow_ops failed");
+ return vnet_dev_flow_err (vm, rv);
+ }
+
+ return 0;
+}
+
+clib_error_t *
+vnet_dev_interface_set_rss_queues (vnet_main_t *vnm, vnet_hw_interface_t *hi,
+ clib_bitmap_t *bitmap)
+{
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+ log_warn (p->dev, "unsupported request for flow_ops received");
+ return vnet_error (VNET_ERR_UNSUPPORTED, "not implemented");
+}
+
+void
+vnet_dev_clear_hw_interface_counters (u32 instance)
+{
+ vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (instance);
+ vlib_main_t *vm = vlib_get_main ();
+
+ vnet_dev_process_call_port_op_no_rv (vm, port, vnet_dev_port_clear_counters);
+}
+
+void
+vnet_dev_set_interface_next_node (vnet_main_t *vnm, u32 hw_if_index,
+ u32 node_index)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ vnet_dev_port_t *port =
+ vnet_dev_get_port_from_dev_instance (hw->dev_instance);
+ int runtime_update = 0;
+
+ if (node_index == ~0)
+ {
+ port->intf.redirect_to_node_next_index = 0;
+ if (port->intf.feature_arc == 0)
+ {
+ port->intf.rx_next_index =
+ vnet_dev_default_next_index_by_port_type[port->attr.type];
+ runtime_update = 1;
+ }
+ port->intf.redirect_to_node = 0;
+ }
+ else
+ {
+ u16 next_index = vlib_node_add_next (vlib_get_main (),
+ port_rx_eth_node.index, node_index);
+ port->intf.redirect_to_node_next_index = next_index;
+ if (port->intf.feature_arc == 0)
+ {
+ port->intf.rx_next_index = next_index;
+ runtime_update = 1;
+ }
+ port->intf.redirect_to_node = 1;
+ }
+ port->intf.rx_next_index =
+ node_index == ~0 ?
+ vnet_dev_default_next_index_by_port_type[port->attr.type] :
+ node_index;
+
+ if (runtime_update)
+ {
+ foreach_vnet_dev_port_rx_queue (rxq, port)
+ vnet_dev_rx_queue_rt_request (
+ vm, rxq, (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1 });
+ log_debug (port->dev, "runtime update requested due to chgange in "
+ "reditect-to-next configuration");
+ }
+}
diff --git a/src/vnet/dev/log.h b/src/vnet/dev/log.h
new file mode 100644
index 00000000000..5ca7b6620e9
--- /dev/null
+++ b/src/vnet/dev/log.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_LOG_H_
+#define _VNET_DEV_LOG_H_
+
+#define log_debug(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, dev_log.class, "%U" f, format_vnet_dev_log, \
+ dev, clib_string_skip_prefix (__func__, "vnet_dev_"), \
+ ##__VA_ARGS__)
+#define log_notice(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_NOTICE, dev_log.class, "%U" f, \
+ format_vnet_dev_log, dev, 0, ##__VA_ARGS__)
+#define log_warn(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_WARNING, dev_log.class, "%U" f, \
+ format_vnet_dev_log, dev, 0, ##__VA_ARGS__)
+#define log_err(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_ERR, dev_log.class, "%U" f, format_vnet_dev_log, \
+ dev, 0, ##__VA_ARGS__)
+
+#endif /* _VNET_DEV_LOG_H_ */
diff --git a/src/vnet/dev/mgmt.h b/src/vnet/dev/mgmt.h
new file mode 100644
index 00000000000..f13f4075255
--- /dev/null
+++ b/src/vnet/dev/mgmt.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_MGMT_H_
+#define _VNET_DEV_MGMT_H_
+
+#include <vppinfra/clib.h>
+
+#endif /* _VNET_DEV_MGMT_H_ */
diff --git a/src/vnet/dev/pci.c b/src/vnet/dev/pci.c
new file mode 100644
index 00000000000..3cc0cba5003
--- /dev/null
+++ b/src/vnet/dev/pci.c
@@ -0,0 +1,458 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/log.h>
+#include <vlib/unix/unix.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "pci",
+};
+
+static int
+vnet_dev_bus_pci_device_id_to_pci_addr (vlib_pci_addr_t *addr, char *str)
+{
+ unformat_input_t input;
+ uword rv;
+ unformat_init_string (&input, str, strlen (str));
+ rv = unformat (&input, "pci" VNET_DEV_DEVICE_ID_PREFIX_DELIMITER "%U",
+ unformat_vlib_pci_addr, addr);
+ unformat_free (&input);
+ return rv;
+}
+
+static void *
+vnet_dev_bus_pci_get_device_info (vlib_main_t *vm, char *device_id)
+{
+ vnet_dev_bus_pci_device_info_t *info;
+ vlib_pci_addr_t addr = {};
+ clib_error_t *err = 0;
+ vlib_pci_device_info_t *di = 0;
+
+ vlib_log_debug (dev_log.class, "device %s", device_id);
+
+ if (vnet_dev_bus_pci_device_id_to_pci_addr (&addr, device_id) == 0)
+ return 0;
+
+ di = vlib_pci_get_device_info (vm, &addr, &err);
+ if (err)
+ {
+ vlib_log_err (dev_log.class, "get_device_info: %U", format_clib_error,
+ err);
+ clib_error_free (err);
+ return 0;
+ }
+
+ info = clib_mem_alloc (sizeof (vnet_dev_bus_pci_device_info_t));
+ info->addr = addr;
+ info->vendor_id = di->vendor_id;
+ info->device_id = di->device_id;
+ info->revision = di->revision;
+
+ vlib_pci_free_device_info (di);
+ return info;
+}
+
+static void
+vnet_dev_bus_pci_free_device_info (vlib_main_t *vm, void *dev_info)
+{
+ clib_mem_free (dev_info);
+}
+
+static vnet_dev_rv_t
+vnet_dev_bus_pci_open (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ clib_error_t *err = 0;
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+
+ if (vnet_dev_bus_pci_device_id_to_pci_addr (&pdd->addr, dev->device_id) == 0)
+ return VNET_DEV_ERR_INVALID_DEVICE_ID;
+
+ if ((err = vlib_pci_device_open (vm, &pdd->addr, 0, &pdd->handle)))
+ {
+ log_err (dev, "device_open: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ dev->numa_node = vlib_pci_get_numa_node (vm, pdd->handle);
+
+ if (vlib_pci_supports_virtual_addr_dma (vm, pdd->handle))
+ {
+ dev->va_dma = 1;
+ log_debug (dev, "device supports VA DMA");
+ }
+
+ vlib_pci_set_private_data (vm, pdd->handle, (uword) dev);
+
+ pdd->n_msix_int = vlib_pci_get_num_msix_interrupts (vm, pdd->handle);
+ if (pdd->n_msix_int)
+ {
+ u32 sz = sizeof (pdd->msix_handlers[0]) * pdd->n_msix_int;
+ sz = round_pow2 (sz, CLIB_CACHE_LINE_BYTES);
+ pdd->msix_handlers = clib_mem_alloc_aligned (sz, CLIB_CACHE_LINE_BYTES);
+ clib_memset (pdd->msix_handlers, 0, sz);
+ }
+
+ return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_bus_pci_close (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+
+ if (pdd->intx_handler)
+ vnet_dev_pci_intx_remove_handler (vm, dev);
+
+ if (pdd->msix_handlers)
+ {
+ for (u16 i = 0; i < pdd->n_msix_int; i++)
+ if (pdd->msix_handlers[i])
+ vnet_dev_pci_msix_remove_handler (vm, dev, i, 1);
+ clib_mem_free (pdd->msix_handlers);
+ pdd->msix_handlers = 0;
+ }
+
+ if (pdd->pci_handle_valid)
+ vlib_pci_device_close (vm, pdd->handle);
+}
+
+static vnet_dev_rv_t
+vnet_dev_bus_pci_dma_mem_alloc (vlib_main_t *vm, vnet_dev_t *dev, u32 size,
+ u32 align, void **pp)
+{
+ clib_error_t *err;
+ void *p;
+
+ align = align ? align : CLIB_CACHE_LINE_BYTES;
+ size = round_pow2 (size, align);
+
+ p = vlib_physmem_alloc_aligned_on_numa (vm, size, align, dev->numa_node);
+
+ if (p == 0)
+ {
+ err = vlib_physmem_last_error (vm);
+ log_err (dev, "dev_dma_mem_alloc: physmem_alloc_aligned error %U",
+ format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_DMA_MEM_ALLOC_FAIL;
+ }
+
+ if ((err = vlib_pci_map_dma (vm, vnet_dev_get_pci_handle (dev), p)))
+ {
+ log_err (dev, "dev_dma_mem_alloc: pci_map_dma: %U", format_clib_error,
+ err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_DMA_MEM_ALLOC_FAIL;
+ }
+
+ clib_memset (p, 0, size);
+ pp[0] = p;
+ return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_bus_pci_dma_mem_free (vlib_main_t *vm, vnet_dev_t *dev, void *p)
+{
+ if (p)
+ vlib_physmem_free (vm, p);
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_read_config_header (vlib_main_t *vm, vnet_dev_t *dev,
+ vlib_pci_config_hdr_t *hdr)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_read_write_config (vm, h, VLIB_READ, 0, hdr, sizeof (*hdr));
+ if (err)
+ {
+ log_err (dev, "pci_read_config_header: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_map_region (vlib_main_t *vm, vnet_dev_t *dev, u8 region,
+ void **pp)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ if ((err = vlib_pci_map_region (vm, h, region, pp)))
+ {
+ log_err (dev, "pci_map_region: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_function_level_reset (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ if ((err = vlib_pci_function_level_reset (vm, h)))
+ {
+ log_err (dev, "pci_function_level_reset: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_bus_master_enable (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ if ((err = vlib_pci_bus_master_enable (vm, h)))
+ {
+ log_err (dev, "pci_bus_master_enable: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+ return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_pci_intx_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h)
+{
+ vnet_dev_t *dev = (vnet_dev_t *) vlib_pci_get_private_data (vm, h);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+
+ if (pdd->intx_handler)
+ pdd->intx_handler (vm, dev);
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_intx_add_handler (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_pci_intx_handler_fn_t *fn)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_register_intx_handler (vm, h, vnet_dev_pci_intx_handler);
+
+ if (err)
+ {
+ log_err (dev, "pci_register_intx_handler: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_intx_remove_handler (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_unregister_intx_handler (vm, h);
+
+ if (err)
+ {
+ log_err (dev, "pci_unregister_intx_handler: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ pdd->intx_handler = 0;
+
+ return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_pci_msix_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h, u16 line)
+{
+ vnet_dev_t *dev = (vnet_dev_t *) vlib_pci_get_private_data (vm, h);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+
+ if (line < pdd->n_msix_int && pdd->msix_handlers[line])
+ pdd->msix_handlers[line](vm, dev, line);
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_msix_add_handler (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_pci_msix_handler_fn_t *fn, u16 first,
+ u16 count)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_register_msix_handler (vm, h, first, count,
+ vnet_dev_pci_msix_handler);
+
+ if (err)
+ {
+ log_err (dev, "pci_register_msix_handler: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ for (u16 i = first; i < first + count; i++)
+ {
+ ASSERT (pdd->msix_handlers[i] == 0);
+ pdd->msix_handlers[i] = fn;
+ }
+
+ return VNET_DEV_OK;
+}
+
+void
+vnet_dev_pci_msix_set_polling_thread (vlib_main_t *vm, vnet_dev_t *dev,
+ u16 line, u16 thread_index)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ u32 index;
+
+ index = vlib_pci_get_msix_file_index (vm, h, line);
+
+ clib_file_set_polling_thread (&file_main, index, thread_index);
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_msix_remove_handler (vlib_main_t *vm, vnet_dev_t *dev, u16 first,
+ u16 count)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_unregister_msix_handler (vm, h, first, count);
+
+ if (err)
+ {
+ log_err (dev, "pci_unregister_msix_handler: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ for (u16 i = first; i < first + count; i++)
+ {
+ ASSERT (pdd->msix_handlers[i] != 0);
+ pdd->msix_handlers[i] = 0;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_msix_enable (vlib_main_t *vm, vnet_dev_t *dev, u16 first,
+ u16 count)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_enable_msix_irq (vm, h, first, count);
+
+ if (err)
+ {
+ log_err (dev, "pci_enable_msix_irq: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_msix_disable (vlib_main_t *vm, vnet_dev_t *dev, u16 first,
+ u16 count)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_disable_msix_irq (vm, h, first, count);
+
+ if (err)
+ {
+ log_err (dev, "pci_disble_msix_irq: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_bus_master_disable (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ if ((err = vlib_pci_bus_master_disable (vm, h)))
+ {
+ log_err (dev, "pci_bus_master_disable: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+ return VNET_DEV_OK;
+}
+
+static u8 *
+format_dev_pci_device_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t __clib_unused *a =
+ va_arg (*args, vnet_dev_format_args_t *);
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_pci_config_t cfg = {};
+ clib_error_t *err;
+
+ s = format (s, "PCIe address is %U", format_vlib_pci_addr, &pdd->addr);
+
+ err = vlib_pci_read_write_config (vm, pdd->handle, VLIB_READ, 0, &cfg,
+ sizeof (cfg));
+ if (!err)
+ {
+ s = format (s, ", port is %U, speed is %U (max %U)",
+ format_vlib_pci_link_port, &cfg, format_vlib_pci_link_speed,
+ &cfg, format_vlib_pci_link_speed_cap, &cfg);
+ }
+ else
+ clib_error_free (err);
+
+ return s;
+}
+
+static u8 *
+format_dev_pci_device_addr (u8 *s, va_list *args)
+{
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+ return format (s, "%U", format_vlib_pci_addr, &pdd->addr);
+}
+
+VNET_DEV_REGISTER_BUS (pci) = {
+ .name = "pci",
+ .device_data_size = sizeof (vnet_dev_bus_pci_device_info_t),
+ .ops = {
+ .device_open = vnet_dev_bus_pci_open,
+ .device_close = vnet_dev_bus_pci_close,
+ .get_device_info = vnet_dev_bus_pci_get_device_info,
+ .free_device_info = vnet_dev_bus_pci_free_device_info,
+ .dma_mem_alloc_fn = vnet_dev_bus_pci_dma_mem_alloc,
+ .dma_mem_free_fn = vnet_dev_bus_pci_dma_mem_free,
+ .format_device_info = format_dev_pci_device_info,
+ .format_device_addr = format_dev_pci_device_addr,
+ },
+};
diff --git a/src/vnet/dev/pci.h b/src/vnet/dev/pci.h
new file mode 100644
index 00000000000..ce9a53aa273
--- /dev/null
+++ b/src/vnet/dev/pci.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_PCI_H_
+#define _VNET_DEV_PCI_H_
+
+#include <vppinfra/clib.h>
+#include <vlib/pci/pci.h>
+#include <vnet/dev/dev.h>
+
+typedef void (vnet_dev_pci_intx_handler_fn_t) (vlib_main_t *vm,
+ vnet_dev_t *dev);
+typedef void (vnet_dev_pci_msix_handler_fn_t) (vlib_main_t *vm,
+ vnet_dev_t *dev, u16 line);
+
+typedef struct
+{
+ vlib_pci_addr_t addr;
+ u16 vendor_id;
+ u16 device_id;
+ u8 revision;
+} vnet_dev_bus_pci_device_info_t;
+
+typedef struct
+{
+ u8 pci_handle_valid : 1;
+ u16 n_msix_int;
+ vlib_pci_addr_t addr;
+ vlib_pci_dev_handle_t handle;
+ vnet_dev_pci_intx_handler_fn_t *intx_handler;
+ vnet_dev_pci_msix_handler_fn_t **msix_handlers;
+} vnet_dev_bus_pci_device_data_t;
+
+static_always_inline vnet_dev_bus_pci_device_data_t *
+vnet_dev_get_bus_pci_device_data (vnet_dev_t *dev)
+{
+ return (void *) dev->bus_data;
+}
+static_always_inline vlib_pci_dev_handle_t
+vnet_dev_get_pci_handle (vnet_dev_t *dev)
+{
+ return ((vnet_dev_bus_pci_device_data_t *) (dev->bus_data))->handle;
+}
+
+static_always_inline vlib_pci_addr_t
+vnet_dev_get_pci_addr (vnet_dev_t *dev)
+{
+ return ((vnet_dev_bus_pci_device_data_t *) (dev->bus_data))->addr;
+}
+
+static_always_inline vlib_pci_dev_handle_t
+vnet_dev_get_pci_n_msix_interrupts (vnet_dev_t *dev)
+{
+ return vnet_dev_get_bus_pci_device_data (dev)->n_msix_int;
+}
+
+vnet_dev_rv_t vnet_dev_pci_read_config_header (vlib_main_t *, vnet_dev_t *,
+ vlib_pci_config_hdr_t *);
+
+vnet_dev_rv_t vnet_dev_pci_map_region (vlib_main_t *, vnet_dev_t *, u8,
+ void **);
+vnet_dev_rv_t vnet_dev_pci_function_level_reset (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_pci_bus_master_enable (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_pci_bus_master_disable (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_pci_intx_add_handler (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_pci_intx_handler_fn_t *);
+vnet_dev_rv_t vnet_dev_pci_intx_remove_handler (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_pci_msix_add_handler (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_pci_msix_handler_fn_t *,
+ u16, u16);
+vnet_dev_rv_t vnet_dev_pci_msix_remove_handler (vlib_main_t *, vnet_dev_t *,
+ u16, u16);
+vnet_dev_rv_t vnet_dev_pci_msix_enable (vlib_main_t *, vnet_dev_t *, u16, u16);
+vnet_dev_rv_t vnet_dev_pci_msix_disable (vlib_main_t *, vnet_dev_t *, u16,
+ u16);
+void vnet_dev_pci_msix_set_polling_thread (vlib_main_t *, vnet_dev_t *, u16,
+ u16);
+
+#endif /* _VNET_DEV_PCI_H_ */
diff --git a/src/vnet/dev/port.c b/src/vnet/dev/port.c
new file mode 100644
index 00000000000..8a6df54cbc8
--- /dev/null
+++ b/src/vnet/dev/port.c
@@ -0,0 +1,748 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "port",
+};
+
+static uword
+dummy_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ ASSERT (0);
+ return 0;
+}
+
+VLIB_REGISTER_NODE (port_rx_eth_node) = {
+ .function = dummy_input_fn,
+ .name = "port-rx-eth",
+ .runtime_data_bytes = sizeof (vnet_dev_rx_node_runtime_t),
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_DISABLED,
+ .n_next_nodes = VNET_DEV_ETH_RX_PORT_N_NEXTS,
+ .next_nodes = {
+#define _(n, s) [VNET_DEV_ETH_RX_PORT_NEXT_##n] = s,
+ foreach_vnet_dev_port_rx_next
+#undef _
+ },
+};
+
+u16 vnet_dev_default_next_index_by_port_type[] = {
+ [VNET_DEV_PORT_TYPE_ETHERNET] = VNET_DEV_ETH_RX_PORT_NEXT_ETH_INPUT,
+};
+
+VNET_FEATURE_ARC_INIT (eth_port_rx, static) = {
+ .arc_name = "port-rx-eth",
+ .start_nodes = VNET_FEATURES ("port-rx-eth"),
+ .last_in_arc = "ethernet-input",
+ .arc_index_ptr = &vnet_dev_main.eth_port_rx_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (l2_patch, static) = {
+ .arc_name = "port-rx-eth",
+ .node_name = "l2-patch",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+
+VNET_FEATURE_INIT (worker_handoff, static) = {
+ .arc_name = "port-rx-eth",
+ .node_name = "worker-handoff",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+
+VNET_FEATURE_INIT (span_input, static) = {
+ .arc_name = "port-rx-eth",
+ .node_name = "span-input",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+
+VNET_FEATURE_INIT (p2p_ethernet_node, static) = {
+ .arc_name = "port-rx-eth",
+ .node_name = "p2p-ethernet-input",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+
+VNET_FEATURE_INIT (ethernet_input, static) = {
+ .arc_name = "port-rx-eth",
+ .node_name = "ethernet-input",
+ .runs_before = 0, /* not before any other features */
+};
+
+void
+vnet_dev_port_free (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+
+ vnet_dev_port_validate (vm, port);
+
+ ASSERT (port->started == 0);
+
+ log_debug (dev, "port %u", port->port_id);
+
+ if (port->port_ops.free)
+ port->port_ops.free (vm, port);
+
+ pool_free (port->secondary_hw_addr);
+ pool_free (port->rx_queues);
+ pool_free (port->tx_queues);
+ vnet_dev_arg_free (&port->args);
+ pool_put_index (dev->ports, port->index);
+ clib_mem_free (port);
+}
+
+void
+vnet_dev_port_update_tx_node_runtime (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_port_validate (vm, port);
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ u32 ti;
+ clib_bitmap_foreach (ti, q->assigned_threads)
+ {
+ vlib_main_t *tvm = vlib_get_main_by_index (ti);
+ vlib_node_runtime_t *nr =
+ vlib_node_get_runtime (tvm, port->intf.tx_node_index);
+ vnet_dev_tx_node_runtime_t *tnr = vnet_dev_get_tx_node_runtime (nr);
+ tnr->hw_if_index = port->intf.hw_if_index;
+ tnr->tx_queue = q;
+ }
+ }
+}
+
+void
+vnet_dev_port_stop (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_rt_op_t *ops = 0;
+ u16 n_threads = vlib_get_n_threads ();
+
+ log_debug (dev, "stopping port %u", port->port_id);
+
+ for (u16 i = 0; i < n_threads; i++)
+ {
+ vnet_dev_rt_op_t op = { .thread_index = i, .port = port };
+ vec_add1 (ops, op);
+ }
+
+ vnet_dev_rt_exec_ops (vm, dev, ops, vec_len (ops));
+ vec_free (ops);
+
+ port->port_ops.stop (vm, port);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ q->started = 0;
+ log_debug (dev, "port %u rx queue %u stopped", port->port_id,
+ q->queue_id);
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ q->started = 0;
+ log_debug (dev, "port %u tx queue %u stopped", port->port_id,
+ q->queue_id);
+ }
+
+ log_debug (dev, "port %u stopped", port->port_id);
+ port->started = 0;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_start_all_rx_queues (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ vnet_dev_port_validate (vm, port);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ rv = vnet_dev_rx_queue_start (vm, q);
+ if (rv != VNET_DEV_OK)
+ return rv;
+ }
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_start_all_tx_queues (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ vnet_dev_port_validate (vm, port);
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ rv = vnet_dev_tx_queue_start (vm, q);
+ if (rv != VNET_DEV_OK)
+ return rv;
+ }
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_start (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ u16 n_threads = vlib_get_n_threads ();
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_rt_op_t *ops = 0;
+ vnet_dev_rv_t rv;
+
+ vnet_dev_port_validate (vm, port);
+
+ log_debug (dev, "starting port %u", port->port_id);
+
+ vnet_dev_port_update_tx_node_runtime (vm, port);
+
+ if ((rv = port->port_ops.start (vm, port)) != VNET_DEV_OK)
+ {
+ vnet_dev_port_stop (vm, port);
+ return rv;
+ }
+
+ for (u16 i = 0; i < n_threads; i++)
+ {
+ vnet_dev_rt_op_t op = { .thread_index = i, .port = port };
+ vec_add1 (ops, op);
+ }
+
+ vnet_dev_rt_exec_ops (vm, dev, ops, vec_len (ops));
+ vec_free (ops);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if (q->enabled)
+ {
+ log_debug (dev, "port %u rx queue %u started", port->port_id,
+ q->queue_id);
+ q->started = 1;
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ if (q->enabled)
+ {
+ log_debug (dev, "port %u tx queue %u started", port->port_id,
+ q->queue_id);
+ q->started = 1;
+ }
+
+ port->started = 1;
+ log_debug (dev, "port %u started", port->port_id);
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_add (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_port_id_t id,
+ vnet_dev_port_add_args_t *args)
+{
+ vnet_dev_port_t **pp, *port;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ ASSERT (args->port.attr.type != VNET_DEV_PORT_TYPE_UNKNOWN);
+ ASSERT (args->port.attr.max_supported_rx_frame_size);
+
+ port =
+ vnet_dev_alloc_with_data (sizeof (vnet_dev_port_t), args->port.data_size);
+ pool_get (dev->ports, pp);
+ pp[0] = port;
+ clib_memcpy (vnet_dev_get_port_data (port), args->port.initial_data,
+ args->port.data_size);
+ port->port_id = id;
+ port->index = pp - dev->ports;
+ port->dev = dev;
+ port->attr = args->port.attr;
+ port->rx_queue_config = args->rx_queue.config;
+ port->tx_queue_config = args->tx_queue.config;
+ port->rx_queue_ops = args->rx_queue.ops;
+ port->tx_queue_ops = args->tx_queue.ops;
+ port->port_ops = args->port.ops;
+ port->rx_node = *args->rx_node;
+ port->tx_node = *args->tx_node;
+
+ if (args->port.args)
+ for (vnet_dev_arg_t *a = args->port.args; a->type != VNET_DEV_ARG_END; a++)
+ vec_add1 (port->args, *a);
+
+ /* defaults out of port attributes */
+ port->max_rx_frame_size = args->port.attr.max_supported_rx_frame_size;
+ port->primary_hw_addr = args->port.attr.hw_addr;
+
+ if (port->attr.type == VNET_DEV_PORT_TYPE_ETHERNET)
+ {
+ if (port->max_rx_frame_size > 1514 &&
+ port->attr.caps.change_max_rx_frame_size)
+ port->max_rx_frame_size = 1514;
+ }
+
+ if (port->port_ops.alloc)
+ rv = port->port_ops.alloc (vm, port);
+
+ if (rv == VNET_DEV_OK)
+ port->initialized = 1;
+
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_cfg_change_req_validate (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *req)
+{
+ vnet_dev_rv_t rv;
+ vnet_dev_hw_addr_t *addr;
+ int found;
+
+ if (req->validated)
+ return VNET_DEV_OK;
+
+ switch (req->type)
+ {
+ case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE:
+ if (req->max_rx_frame_size > port->attr.max_supported_rx_frame_size)
+ return VNET_DEV_ERR_INVALID_VALUE;
+ if (req->max_rx_frame_size == port->max_rx_frame_size)
+ return VNET_DEV_ERR_NO_CHANGE;
+ break;
+
+ case VNET_DEV_PORT_CFG_PROMISC_MODE:
+ if (req->promisc == port->promisc)
+ return VNET_DEV_ERR_NO_CHANGE;
+ break;
+
+ case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+ if (clib_memcmp (&req->addr, &port->primary_hw_addr,
+ sizeof (vnet_dev_hw_addr_t)) == 0)
+ return VNET_DEV_ERR_NO_CHANGE;
+ break;
+
+ case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
+ pool_foreach (addr, port->secondary_hw_addr)
+ if (clib_memcmp (addr, &req->addr, sizeof (*addr)) == 0)
+ return VNET_DEV_ERR_ALREADY_EXISTS;
+ break;
+
+ case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
+ found = 0;
+ pool_foreach (addr, port->secondary_hw_addr)
+ if (clib_memcmp (addr, &req->addr, sizeof (*addr)) == 0)
+ found = 1;
+ if (!found)
+ return VNET_DEV_ERR_NO_SUCH_ENTRY;
+ break;
+
+ default:
+ break;
+ }
+
+ if (port->port_ops.config_change_validate)
+ {
+ rv = port->port_ops.config_change_validate (vm, port, req);
+ if (rv != VNET_DEV_OK)
+ return rv;
+ }
+ else
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+
+ req->validated = 1;
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *req)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ vnet_dev_hw_addr_t *a;
+ vnet_dev_rx_queue_t *rxq = 0;
+ u8 enable = 0;
+
+ vnet_dev_port_validate (vm, port);
+
+ if (req->type == VNET_DEV_PORT_CFG_RXQ_INTR_MODE_ENABLE ||
+ req->type == VNET_DEV_PORT_CFG_RXQ_INTR_MODE_DISABLE)
+ {
+ if (req->all_queues == 0)
+ {
+ rxq = vnet_dev_port_get_rx_queue_by_id (port, req->queue_id);
+ if (rxq == 0)
+ return VNET_DEV_ERR_BUG;
+ }
+ }
+
+ if ((rv = vnet_dev_port_cfg_change_req_validate (vm, port, req)))
+ return rv;
+
+ if (port->port_ops.config_change)
+ rv = port->port_ops.config_change (vm, port, req);
+ else
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+
+ if (rv != VNET_DEV_OK)
+ return rv;
+
+ switch (req->type)
+ {
+ case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE:
+ port->max_rx_frame_size = req->max_rx_frame_size;
+ break;
+
+ case VNET_DEV_PORT_CFG_PROMISC_MODE:
+ port->promisc = req->promisc;
+ break;
+
+ case VNET_DEV_PORT_CFG_RXQ_INTR_MODE_ENABLE:
+ enable = 1;
+ case VNET_DEV_PORT_CFG_RXQ_INTR_MODE_DISABLE:
+ if (req->all_queues)
+ {
+ clib_bitmap_t *bmp = 0;
+ vnet_dev_rt_op_t *ops = 0;
+ u32 i;
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ q->interrupt_mode = enable;
+ bmp = clib_bitmap_set (bmp, q->rx_thread_index, 1);
+ }
+
+ clib_bitmap_foreach (i, bmp)
+ {
+ vnet_dev_rt_op_t op = { .port = port, .thread_index = i };
+ vec_add1 (ops, op);
+ }
+
+ vnet_dev_rt_exec_ops (vm, port->dev, ops, vec_len (ops));
+ clib_bitmap_free (bmp);
+ vec_free (ops);
+ }
+ else
+ {
+ rxq->interrupt_mode = enable;
+ vnet_dev_rt_exec_ops (vm, port->dev,
+ &(vnet_dev_rt_op_t){
+ .port = port,
+ .thread_index = rxq->rx_thread_index,
+ },
+ 1);
+ }
+ break;
+
+ case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+ clib_memcpy (&port->primary_hw_addr, &req->addr,
+ sizeof (vnet_dev_hw_addr_t));
+ break;
+
+ case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
+ pool_get (port->secondary_hw_addr, a);
+ clib_memcpy (a, &req->addr, sizeof (vnet_dev_hw_addr_t));
+ break;
+
+ case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
+ pool_foreach (a, port->secondary_hw_addr)
+ if (clib_memcmp (a, &req->addr, sizeof (vnet_dev_hw_addr_t)) == 0)
+ {
+ pool_put (port->secondary_hw_addr, a);
+ break;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return VNET_DEV_OK;
+}
+
+void
+vnet_dev_port_state_change (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_state_changes_t changes)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+
+ vnet_dev_port_validate (vm, port);
+
+ if (changes.change.link_speed)
+ {
+ port->speed = changes.link_speed;
+ if (port->interface_created)
+ vnet_hw_interface_set_link_speed (vnm, port->intf.hw_if_index,
+ changes.link_speed);
+ log_debug (port->dev, "port speed changed to %u", changes.link_speed);
+ }
+
+ if (changes.change.link_state)
+ {
+ port->link_up = changes.link_state;
+ if (port->interface_created)
+ vnet_hw_interface_set_flags (
+ vnm, port->intf.hw_if_index,
+ changes.link_state ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
+ log_debug (port->dev, "port link state changed to %s",
+ changes.link_state ? "up" : "down");
+ }
+}
+
+void
+vnet_dev_port_add_counters (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_counter_t *counters, u16 n_counters)
+{
+ vnet_dev_port_validate (vm, port);
+
+ port->counter_main =
+ vnet_dev_counters_alloc (vm, counters, n_counters, "%s port %u counters",
+ port->dev->device_id, port->port_id);
+}
+
+void
+vnet_dev_port_free_counters (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_port_validate (vm, port);
+
+ if (port->counter_main)
+ vnet_dev_counters_free (vm, port->counter_main);
+}
+
+vnet_dev_rv_t
+vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u16 n_threads = vlib_get_n_threads ();
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_port_t **pp;
+ vnet_dev_rv_t rv;
+ u16 ti = 0;
+
+ if (port->intf.name[0] == 0)
+ {
+ u8 *s;
+ s = format (0, "%s%u/%u",
+ dm->drivers[port->dev->driver_index].registration->name,
+ port->dev->index, port->index);
+ u32 n = vec_len (s);
+
+ if (n >= sizeof (port->intf.name))
+ {
+ vec_free (s);
+ return VNET_DEV_ERR_BUG;
+ }
+ clib_memcpy (port->intf.name, s, n);
+ port->intf.name[n] = 0;
+ vec_free (s);
+ }
+
+ log_debug (
+ dev, "allocating %u rx queues with size %u and %u tx queues with size %u",
+ port->intf.num_rx_queues, port->intf.rxq_sz, port->intf.num_tx_queues,
+ port->intf.txq_sz);
+
+ for (int i = 0; i < port->intf.num_rx_queues; i++)
+ if ((rv = vnet_dev_rx_queue_alloc (vm, port, port->intf.rxq_sz)) !=
+ VNET_DEV_OK)
+ goto error;
+
+ for (u32 i = 0; i < port->intf.num_tx_queues; i++)
+ if ((rv = vnet_dev_tx_queue_alloc (vm, port, port->intf.txq_sz)) !=
+ VNET_DEV_OK)
+ goto error;
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ q->assigned_threads = clib_bitmap_set (q->assigned_threads, ti, 1);
+ log_debug (dev, "port %u tx queue %u assigned to thread %u",
+ port->port_id, q->queue_id, ti);
+ if (++ti >= n_threads)
+ break;
+ }
+
+ /* pool of port pointers helps us to assign unique dev_instance */
+ pool_get (dm->ports_by_dev_instance, pp);
+ port->intf.dev_instance = pp - dm->ports_by_dev_instance;
+ pp[0] = port;
+
+ if (port->attr.type == VNET_DEV_PORT_TYPE_ETHERNET)
+ {
+ vnet_device_class_t *dev_class;
+ vnet_dev_driver_t *driver;
+ vnet_sw_interface_t *sw;
+ vnet_hw_interface_t *hw;
+ vnet_hw_if_caps_t caps = 0;
+ u32 rx_node_index;
+
+ driver = pool_elt_at_index (dm->drivers, dev->driver_index);
+
+ /* hack to provide per-port tx node function */
+ dev_class = vnet_get_device_class (vnm, driver->dev_class_index);
+ dev_class->tx_fn_registrations = port->tx_node.registrations;
+ dev_class->format_tx_trace = port->tx_node.format_trace;
+ dev_class->tx_function_error_counters = port->tx_node.error_counters;
+ dev_class->tx_function_n_errors = port->tx_node.n_error_counters;
+
+ /* create new interface including tx and output nodes */
+ port->intf.hw_if_index = vnet_eth_register_interface (
+ vnm, &(vnet_eth_interface_registration_t){
+ .address = port->primary_hw_addr.eth_mac,
+ .max_frame_size = port->max_rx_frame_size,
+ .dev_class_index = driver->dev_class_index,
+ .dev_instance = port->intf.dev_instance,
+ .cb.set_max_frame_size = vnet_dev_port_set_max_frame_size,
+ .cb.flag_change = vnet_dev_port_eth_flag_change,
+ });
+
+ sw = vnet_get_hw_sw_interface (vnm, port->intf.hw_if_index);
+ hw = vnet_get_hw_interface (vnm, port->intf.hw_if_index);
+ port->intf.sw_if_index = sw->sw_if_index;
+ vnet_hw_interface_set_flags (
+ vnm, port->intf.hw_if_index,
+ port->link_up ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
+ if (port->speed)
+ vnet_hw_interface_set_link_speed (vnm, port->intf.hw_if_index,
+ port->speed);
+
+ port->intf.tx_node_index = hw->tx_node_index;
+
+ caps |= port->attr.caps.interrupt_mode ? VNET_HW_IF_CAP_INT_MODE : 0;
+ caps |= port->attr.caps.mac_filter ? VNET_HW_IF_CAP_MAC_FILTER : 0;
+ caps |= port->attr.tx_offloads.tcp_gso ? VNET_HW_IF_CAP_TCP_GSO : 0;
+ caps |= port->attr.tx_offloads.ip4_cksum ? VNET_HW_IF_CAP_TX_CKSUM : 0;
+
+ if (caps)
+ vnet_hw_if_set_caps (vnm, port->intf.hw_if_index, caps);
+
+ /* create / reuse rx node */
+ if (vec_len (dm->free_rx_node_indices))
+ {
+ vlib_node_t *n;
+ rx_node_index = vec_pop (dm->free_rx_node_indices);
+ vlib_node_rename (vm, rx_node_index, "%s-rx", port->intf.name);
+ n = vlib_get_node (vm, rx_node_index);
+ n->function = vlib_node_get_preferred_node_fn_variant (
+ vm, port->rx_node.registrations);
+ n->format_trace = port->rx_node.format_trace;
+ vlib_register_errors (vm, rx_node_index,
+ port->rx_node.n_error_counters, 0,
+ port->rx_node.error_counters);
+ }
+ else
+ {
+ dev_class->format_tx_trace = port->tx_node.format_trace;
+ dev_class->tx_function_error_counters = port->tx_node.error_counters;
+ dev_class->tx_function_n_errors = port->tx_node.n_error_counters;
+ vlib_node_registration_t rx_node_reg = {
+ .sibling_of = "port-rx-eth",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_DISABLED,
+ .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
+ .node_fn_registrations = port->rx_node.registrations,
+ .format_trace = port->rx_node.format_trace,
+ .error_counters = port->rx_node.error_counters,
+ .n_errors = port->rx_node.n_error_counters,
+ };
+ rx_node_index =
+ vlib_register_node (vm, &rx_node_reg, "%s-rx", port->intf.name);
+ }
+ port->rx_node_assigned = 1;
+ port->intf.rx_node_index = rx_node_index;
+ port->intf.rx_next_index =
+ vnet_dev_default_next_index_by_port_type[port->attr.type];
+
+ vlib_worker_thread_node_runtime_update ();
+ log_debug (dev,
+ "ethernet interface created, hw_if_index %u sw_if_index %u "
+ "rx_node_index %u tx_node_index %u",
+ port->intf.hw_if_index, port->intf.sw_if_index,
+ port->intf.rx_node_index, port->intf.tx_node_index);
+ }
+
+ port->interface_created = 1;
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ vnet_buffer (&q->buffer_template)->sw_if_index[VLIB_RX] =
+ port->intf.sw_if_index;
+ /* poison to catch node not calling runtime update function */
+ q->next_index = ~0;
+ q->interrupt_mode = port->intf.default_is_intr_mode;
+ vnet_dev_rx_queue_rt_request (
+ vm, q, (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1 });
+ }
+
+ vnet_dev_port_update_tx_node_runtime (vm, port);
+
+ if (port->port_ops.init)
+ rv = port->port_ops.init (vm, port);
+
+error:
+ if (rv != VNET_DEV_OK)
+ vnet_dev_port_if_remove (vm, port);
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_if_remove (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ vnet_dev_port_validate (vm, port);
+
+ if (port->started)
+ vnet_dev_port_stop (vm, port);
+
+ if (port->rx_node_assigned)
+ {
+ vlib_node_rename (vm, port->intf.rx_node_index, "deleted-%u",
+ port->intf.rx_node_index);
+ vec_add1 (dm->free_rx_node_indices, port->intf.rx_node_index);
+ port->rx_node_assigned = 0;
+ }
+
+ if (port->interface_created)
+ {
+ vlib_worker_thread_barrier_sync (vm);
+ vnet_delete_hw_interface (vnm, port->intf.hw_if_index);
+ vlib_worker_thread_barrier_release (vm);
+ pool_put_index (dm->ports_by_dev_instance, port->intf.dev_instance);
+ port->interface_created = 0;
+ }
+
+ port->intf = (typeof (port->intf)){};
+
+ if (port->port_ops.deinit)
+ port->port_ops.deinit (vm, port);
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ vnet_dev_tx_queue_free (vm, q);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ vnet_dev_rx_queue_free (vm, q);
+
+ vnet_dev_port_free_counters (vm, port);
+
+ foreach_vnet_dev_port_args (v, port)
+ vnet_dev_arg_clear_value (v);
+
+ return VNET_DEV_OK;
+}
+void
+vnet_dev_port_clear_counters (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ if (port->counter_main)
+ vnet_dev_counters_clear (vm, port->counter_main);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if (q->counter_main)
+ vnet_dev_counters_clear (vm, q->counter_main);
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ if (q->counter_main)
+ vnet_dev_counters_clear (vm, q->counter_main);
+
+ log_notice (port->dev, "counters cleared on port %u", port->port_id);
+}
diff --git a/src/vnet/dev/process.c b/src/vnet/dev/process.c
new file mode 100644
index 00000000000..3c1f0b8d2d8
--- /dev/null
+++ b/src/vnet/dev/process.c
@@ -0,0 +1,474 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/error.h"
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "process",
+};
+
+typedef enum
+{
+ VNET_DEV_EVENT_PERIODIC_STOP,
+ VNET_DEV_EVENT_PERIODIC_START,
+ VNET_DEV_EVENT_PORT_CONFIG_CHANGE_REQ,
+ VNET_DEV_EVENT_PROCESS_QUIT,
+ VNET_DEV_EVENT_CALL_OP,
+ VNET_DEV_EVENT_CALL_OP_NO_RV,
+ VNET_DEV_EVENT_CALL_OP_NO_WAIT,
+ VNET_DEV_EVENT_CALL_PORT_OP,
+ VNET_DEV_EVENT_CALL_PORT_OP_NO_RV,
+ VNET_DEV_EVENT_CALL_PORT_OP_NO_WAIT,
+ VNET_DEV_EVENT_CLOCK = ~0
+} __clib_packed vnet_dev_event_t;
+
+typedef struct
+{
+ vnet_dev_event_t event;
+ u8 reply_needed : 1;
+ u32 calling_process_index;
+ union
+ {
+ struct
+ {
+ vnet_dev_port_t *port;
+ vnet_dev_port_cfg_change_req_t *change_req;
+ } port_cfg_change;
+ struct
+ {
+ vnet_dev_op_t *op;
+ } call_op;
+ struct
+ {
+ vnet_dev_op_no_rv_t *op;
+ } call_op_no_rv;
+ struct
+ {
+ vnet_dev_op_no_rv_t *op;
+ } call_op_no_wait;
+ struct
+ {
+ vnet_dev_port_op_t *op;
+ vnet_dev_port_t *port;
+ } call_port_op;
+ struct
+ {
+ vnet_dev_port_op_no_rv_t *op;
+ vnet_dev_port_t *port;
+ } call_port_op_no_rv;
+ struct
+ {
+ vnet_dev_port_op_no_rv_t *op;
+ vnet_dev_port_t *port;
+ } call_port_op_no_wait;
+ };
+} vnet_dev_event_data_t;
+
+static vnet_dev_rv_t
+vnet_dev_process_one_event (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_event_data_t *ed)
+{
+ vnet_dev_port_t *p;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ switch (ed->event)
+ {
+ case VNET_DEV_EVENT_CLOCK:
+ break;
+ case VNET_DEV_EVENT_PROCESS_QUIT:
+ log_debug (dev, "quit requested");
+ dev->process_node_quit = 1;
+ break;
+ case VNET_DEV_EVENT_PERIODIC_START:
+ log_debug (dev, "periodic start");
+ dev->process_node_periodic = 1;
+ break;
+ case VNET_DEV_EVENT_PERIODIC_STOP:
+ log_debug (dev, "periodic stop");
+ dev->process_node_periodic = 0;
+ break;
+ case VNET_DEV_EVENT_PORT_CONFIG_CHANGE_REQ:
+ log_debug (dev, "port config change");
+ p = ed->port_cfg_change.port;
+ rv = vnet_dev_port_cfg_change (vm, p, ed->port_cfg_change.change_req);
+ break;
+ case VNET_DEV_EVENT_CALL_OP:
+ log_debug (dev, "call op");
+ rv = ed->call_op.op (vm, dev);
+ break;
+ case VNET_DEV_EVENT_CALL_OP_NO_RV:
+ log_debug (dev, "call op no rv");
+ ed->call_op_no_rv.op (vm, dev);
+ break;
+ case VNET_DEV_EVENT_CALL_OP_NO_WAIT:
+ log_debug (dev, "call op no wait");
+ ed->call_op_no_wait.op (vm, dev);
+ break;
+ case VNET_DEV_EVENT_CALL_PORT_OP:
+ log_debug (dev, "call port op");
+ rv = ed->call_port_op.op (vm, ed->call_port_op.port);
+ break;
+ case VNET_DEV_EVENT_CALL_PORT_OP_NO_RV:
+ log_debug (dev, "call port op no rv");
+ ed->call_port_op_no_rv.op (vm, ed->call_port_op_no_rv.port);
+ break;
+ case VNET_DEV_EVENT_CALL_PORT_OP_NO_WAIT:
+ log_debug (dev, "call port op no wait");
+ ed->call_port_op_no_wait.op (vm, ed->call_port_op_no_wait.port);
+ break;
+ default:
+ ASSERT (0);
+ }
+ return rv;
+}
+
+static uword
+vnet_dev_process (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_periodic_op_t *pop, *pops = 0;
+ f64 next = CLIB_F64_MAX;
+ vnet_dev_event_data_t *event_data = 0, *new_event_data, *ed;
+
+ vnet_dev_t *dev =
+ *((vnet_dev_t **) vlib_node_get_runtime_data (vm, rt->node_index));
+
+ log_debug (dev, "process '%U' started", format_vlib_node_name, vm,
+ rt->node_index);
+
+ while (dev->process_node_quit == 0)
+ {
+ uword event_type;
+ f64 now = vlib_time_now (vm);
+
+ if (dev->process_node_periodic)
+ vlib_process_wait_for_event_or_clock (vm, next > now ? next - now : 0);
+ else
+ vlib_process_wait_for_event (vm);
+
+ new_event_data = vlib_process_get_event_data (vm, &event_type);
+
+ if (new_event_data)
+ {
+ vec_append (event_data, new_event_data);
+ vlib_process_put_event_data (vm, new_event_data);
+
+ ASSERT (event_type == 0);
+
+ vec_foreach (ed, event_data)
+ {
+ vnet_dev_rv_t rv;
+ rv = vnet_dev_process_one_event (vm, dev, ed);
+ if (ed->reply_needed)
+ vlib_process_signal_event (vm, ed->calling_process_index,
+ ed->event, rv);
+ }
+ vec_reset_length (event_data);
+ }
+
+ next = CLIB_F64_MAX;
+ pool_foreach (pop, dev->periodic_ops)
+ {
+ if (pop->last_run + pop->interval < now)
+ {
+ vec_add1 (pops, *pop);
+ pop->last_run = now;
+ }
+ if (pop->last_run + pop->interval < next)
+ next = pop->last_run + pop->interval;
+ }
+
+ vec_foreach (pop, pops)
+ {
+ switch (pop->type)
+ {
+ case VNET_DEV_PERIODIC_OP_TYPE_DEV:
+ pop->dev_op (vm, pop->dev);
+ break;
+ case VNET_DEV_PERIODIC_OP_TYPE_PORT:
+ pop->port_op (vm, pop->port);
+ break;
+ default:
+ ASSERT (0);
+ }
+ }
+ vec_reset_length (pops);
+ }
+
+ log_debug (dev, "process '%U' quit", format_vlib_node_name, vm,
+ rt->node_index);
+ vlib_node_set_state (vm, rt->node_index, VLIB_NODE_STATE_DISABLED);
+ vlib_node_rename (vm, rt->node_index, "deleted-%u", rt->node_index);
+
+ /* add node index to the freelist */
+ vec_add1 (dm->free_process_node_indices, rt->node_index);
+ vec_free (pops);
+ vec_free (event_data);
+ return 0;
+}
+
+vnet_dev_rv_t
+vnet_dev_process_create (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vlib_node_t *n;
+ uword l;
+
+ l = vec_len (dm->free_process_node_indices);
+ if (l > 0)
+ {
+ n = vlib_get_node (vm, dm->free_process_node_indices[l - 1]);
+ if (n->function != vnet_dev_process)
+ {
+ vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, n->index);
+ n->function = vnet_dev_process;
+ rt->function = vnet_dev_process;
+ }
+ vlib_node_rename (vm, n->index, "%s-process", dev->device_id);
+ vlib_node_set_state (vm, n->index, VLIB_NODE_STATE_POLLING);
+ vec_set_len (dm->free_process_node_indices, l - 1);
+ log_debug (dev, "process node '%U' (%u) reused", format_vlib_node_name,
+ vm, n->index, n->index);
+ }
+ else
+ {
+ vlib_node_registration_t r = {
+ .function = vnet_dev_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .process_log2_n_stack_bytes = 16,
+ .runtime_data_bytes = sizeof (void *),
+ };
+
+ vlib_register_node (vm, &r, "%s-process", dev->device_id);
+
+ n = vlib_get_node (vm, r.index);
+ log_debug (dev, "process node '%U' (%u) created", format_vlib_node_name,
+ vm, r.index, r.index);
+ }
+
+ dev->process_node_index = n->index;
+ *(vnet_dev_t **) vlib_node_get_runtime_data (vm, n->index) = dev;
+ vlib_start_process (vm, n->runtime_index);
+
+ return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_process_event_send (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_event_data_t ed)
+{
+ vnet_dev_event_data_t *edp = vlib_process_signal_event_data (
+ vm, dev->process_node_index, 0, 1, sizeof (ed));
+ *edp = ed;
+}
+
+static vnet_dev_rv_t
+vnet_dev_process_event_send_and_wait (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_event_data_t ed)
+{
+ uword event, *event_data = 0;
+ vnet_dev_rv_t rv;
+
+ ed.calling_process_index = vlib_get_current_process_node_index (vm);
+
+ if (ed.calling_process_index == dev->process_node_index)
+ return vnet_dev_process_one_event (vm, dev, &ed);
+
+ ed.reply_needed = 1;
+ vnet_dev_process_event_send (vm, dev, ed);
+ vlib_process_wait_for_event_or_clock (vm, 5.0);
+ event = vlib_process_get_events (vm, &event_data);
+ if (event != ed.event)
+ {
+ log_err (dev, "%s",
+ event == VNET_DEV_EVENT_CLOCK ?
+ "timeout waiting for process node to respond" :
+ "unexpected event received");
+ rv = VNET_DEV_ERR_PROCESS_REPLY;
+ }
+ else
+ rv = event_data[0];
+ vec_free (event_data);
+ return rv;
+}
+
+void
+vnet_dev_process_quit (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vnet_dev_event_data_t ed = { .event = VNET_DEV_EVENT_PROCESS_QUIT };
+ vnet_dev_process_event_send_and_wait (vm, dev, ed);
+}
+
+static int
+_vnet_dev_poll_add (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_periodic_op_t pop)
+{
+ const vnet_dev_event_data_t ed = { .event = VNET_DEV_EVENT_PERIODIC_START };
+ vnet_dev_periodic_op_t *p;
+
+ pool_foreach (p, dev->periodic_ops)
+ if (p->op == pop.op && p->arg == pop.arg)
+ return 0;
+
+ pool_get_zero (dev->periodic_ops, p);
+ *p = pop;
+ if (pool_elts (dev->periodic_ops) == 1)
+ vnet_dev_process_event_send (vm, dev, ed);
+ return 1;
+}
+
+static int
+_vnet_dev_poll_remove (vlib_main_t *vm, vnet_dev_t *dev, void *op, void *arg)
+{
+ const vnet_dev_event_data_t ed = { .event = VNET_DEV_EVENT_PERIODIC_STOP };
+ vnet_dev_periodic_op_t *pop;
+
+ pool_foreach (pop, dev->periodic_ops)
+ if (pop->op == op && pop->arg == arg)
+ {
+ pool_put (dev->periodic_ops, pop);
+ if (pool_elts (dev->periodic_ops) == 0)
+ vnet_dev_process_event_send (vm, dev, ed);
+ return 1;
+ }
+ return 0;
+}
+
+void
+vnet_dev_poll_dev_add (vlib_main_t *vm, vnet_dev_t *dev, f64 interval,
+ vnet_dev_op_no_rv_t *dev_op)
+{
+ vnet_dev_periodic_op_t pop = {
+ .interval = interval,
+ .type = VNET_DEV_PERIODIC_OP_TYPE_DEV,
+ .dev_op = dev_op,
+ .dev = dev,
+ };
+
+ if (_vnet_dev_poll_add (vm, dev, pop) == 0)
+ log_warn (dev, "poll_dev_add: op already exists, not added");
+}
+
+void
+vnet_dev_poll_dev_remove (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_op_no_rv_t *dev_op)
+{
+ if (_vnet_dev_poll_remove (vm, dev, (void *) dev_op, (void *) dev) == 0)
+ log_warn (dev, "poll_dev_remove: op not found, not removed");
+}
+
+void
+vnet_dev_poll_port_add (vlib_main_t *vm, vnet_dev_port_t *port, f64 interval,
+ vnet_dev_port_op_no_rv_t *port_op)
+{
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_periodic_op_t pop = {
+ .interval = interval,
+ .type = VNET_DEV_PERIODIC_OP_TYPE_PORT,
+ .port_op = port_op,
+ .port = port,
+ };
+
+ if (_vnet_dev_poll_add (vm, dev, pop) == 0)
+ log_warn (dev, "poll_port_add: op already exists, not added");
+}
+
+void
+vnet_dev_poll_port_remove (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_op_no_rv_t *port_op)
+{
+ vnet_dev_t *dev = port->dev;
+ if (_vnet_dev_poll_remove (vm, dev, (void *) port_op, (void *) port) == 0)
+ log_warn (dev, "poll_port_remove: op not found, not removed");
+}
+
+vnet_dev_rv_t
+vnet_dev_process_port_cfg_change_req (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *pccr)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_PORT_CONFIG_CHANGE_REQ,
+ .port_cfg_change = {
+ .port = port,
+ .change_req = pccr,
+ },
+ };
+
+ return vnet_dev_process_event_send_and_wait (vm, port->dev, ed);
+}
+
+vnet_dev_rv_t
+vnet_dev_process_call_op (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_op_t *op)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_CALL_OP,
+ .call_op.op = op,
+ };
+
+ return vnet_dev_process_event_send_and_wait (vm, dev, ed);
+}
+
+vnet_dev_rv_t
+vnet_dev_process_call_op_no_rv (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_op_no_rv_t *op)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_CALL_OP_NO_RV,
+ .call_op_no_rv.op = op,
+ };
+
+ return vnet_dev_process_event_send_and_wait (vm, dev, ed);
+}
+
+void
+vnet_dev_process_call_op_no_wait (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_op_no_rv_t *op)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_CALL_OP_NO_WAIT,
+ .call_op_no_rv.op = op,
+ };
+
+ vnet_dev_process_event_send (vm, dev, ed);
+}
+
+vnet_dev_rv_t
+vnet_dev_process_call_port_op (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_op_t *op)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_CALL_PORT_OP,
+ .call_port_op = { .op = op, .port = port },
+ };
+
+ return vnet_dev_process_event_send_and_wait (vm, port->dev, ed);
+}
+
+vnet_dev_rv_t
+vnet_dev_process_call_port_op_no_rv (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_op_no_rv_t *op)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_CALL_PORT_OP_NO_RV,
+ .call_port_op_no_rv = { .op = op, .port = port },
+ };
+
+ return vnet_dev_process_event_send_and_wait (vm, port->dev, ed);
+}
+
+void
+vnet_dev_process_call_port_op_no_wait (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_op_no_rv_t *op)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_CALL_PORT_OP_NO_WAIT,
+ .call_port_op_no_wait = { .op = op, .port = port },
+ };
+
+ vnet_dev_process_event_send (vm, port->dev, ed);
+}
diff --git a/src/vnet/dev/process.h b/src/vnet/dev/process.h
new file mode 100644
index 00000000000..9223973dffc
--- /dev/null
+++ b/src/vnet/dev/process.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_PROCESS_H_
+#define _VNET_DEV_PROCESS_H_
+
+#include <vppinfra/clib.h>
+
+#endif /* _VNET_DEV_PROCESS_H_ */
diff --git a/src/vnet/dev/queue.c b/src/vnet/dev/queue.c
new file mode 100644
index 00000000000..9a016a626fb
--- /dev/null
+++ b/src/vnet/dev/queue.c
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "error",
+};
+
+void
+vnet_dev_rx_queue_free (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_port_t *port = rxq->port;
+ vnet_dev_t *dev = port->dev;
+ log_debug (dev, "queue %u", rxq->queue_id);
+ if (port->rx_queue_ops.free)
+ port->rx_queue_ops.free (vm, rxq);
+
+ vnet_dev_rx_queue_free_counters (vm, rxq);
+ pool_put_index (port->rx_queues, rxq->index);
+ clib_mem_free (rxq);
+}
+
+vnet_dev_rv_t
+vnet_dev_rx_queue_alloc (vlib_main_t *vm, vnet_dev_port_t *port,
+ u16 queue_size)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_rx_queue_t *rxq, **qp;
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ u16 n_threads = vlib_get_n_threads ();
+ u8 buffer_pool_index;
+
+ vnet_dev_port_validate (vm, port);
+
+ log_debug (dev, "port %u queue_size %u", port->port_id, queue_size);
+
+ if (pool_elts (port->rx_queues) == port->attr.max_rx_queues)
+ return VNET_DEV_ERR_NO_AVAIL_QUEUES;
+
+ rxq = vnet_dev_alloc_with_data (sizeof (vnet_dev_port_t),
+ port->rx_queue_config.data_size);
+ pool_get (port->rx_queues, qp);
+ qp[0] = rxq;
+ rxq->enabled = 1;
+ rxq->port = port;
+ rxq->size = queue_size;
+ rxq->index = qp - port->rx_queues;
+
+ /* default queue id - can be changed by driver */
+ rxq->queue_id = qp - port->rx_queues;
+ ASSERT (rxq->queue_id < port->attr.max_rx_queues);
+
+ if (n_threads > 1)
+ {
+ rxq->rx_thread_index = dm->next_rx_queue_thread++;
+ if (dm->next_rx_queue_thread >= n_threads)
+ dm->next_rx_queue_thread = 1;
+ }
+
+ buffer_pool_index =
+ vlib_buffer_pool_get_default_for_numa (vm, dev->numa_node);
+ vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index);
+
+ rxq->buffer_template = bp->buffer_template;
+ vnet_buffer (&rxq->buffer_template)->sw_if_index[VLIB_TX] = ~0;
+
+ rxq->next_index = vnet_dev_default_next_index_by_port_type[port->attr.type];
+
+ if (port->rx_queue_ops.alloc)
+ rv = port->rx_queue_ops.alloc (vm, rxq);
+
+ if (rv != VNET_DEV_OK)
+ {
+ log_err (dev, "driver rejected rx queue add with rv %d", rv);
+ vnet_dev_rx_queue_free (vm, rxq);
+ }
+ else
+ log_debug (dev, "queue %u added, assigned to thread %u", rxq->queue_id,
+ rxq->rx_thread_index);
+
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_rx_queue_start (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ if (rxq->port->rx_queue_ops.start)
+ rv = rxq->port->rx_queue_ops.start (vm, rxq);
+
+ if (rv == VNET_DEV_OK)
+ rxq->started = 1;
+
+ return rv;
+}
+
+void
+vnet_dev_rx_queue_stop (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ if (rxq->port->rx_queue_ops.stop)
+ rxq->port->rx_queue_ops.stop (vm, rxq);
+ vlib_node_set_state (vm, rxq->port->intf.rx_node_index,
+ VLIB_NODE_STATE_DISABLED);
+ rxq->started = 0;
+}
+
+void
+vnet_dev_tx_queue_free (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ vnet_dev_port_t *port = txq->port;
+ vnet_dev_t *dev = port->dev;
+
+ vnet_dev_port_validate (vm, port);
+
+ log_debug (dev, "queue %u", txq->queue_id);
+ if (port->tx_queue_ops.free)
+ port->tx_queue_ops.free (vm, txq);
+
+ clib_bitmap_free (txq->assigned_threads);
+ vnet_dev_tx_queue_free_counters (vm, txq);
+ pool_put_index (port->tx_queues, txq->index);
+ clib_mem_free (txq);
+}
+
+vnet_dev_rv_t
+vnet_dev_tx_queue_alloc (vlib_main_t *vm, vnet_dev_port_t *port,
+ u16 queue_size)
+{
+ vnet_dev_tx_queue_t *txq, **qp;
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ log_debug (dev, "port %u size %u", port->port_id, queue_size);
+
+ if (pool_elts (port->tx_queues) == port->attr.max_tx_queues)
+ return VNET_DEV_ERR_NO_AVAIL_QUEUES;
+
+ txq = vnet_dev_alloc_with_data (sizeof (vnet_dev_port_t),
+ port->tx_queue_config.data_size);
+ pool_get (port->tx_queues, qp);
+ qp[0] = txq;
+ txq->enabled = 1;
+ txq->port = port;
+ txq->size = queue_size;
+ txq->index = qp - port->tx_queues;
+
+ /* default queue id - can be changed by driver */
+ txq->queue_id = qp - port->tx_queues;
+ ASSERT (txq->queue_id < port->attr.max_tx_queues);
+
+ if (port->tx_queue_ops.alloc)
+ rv = port->tx_queue_ops.alloc (vm, txq);
+
+ if (rv != VNET_DEV_OK)
+ {
+ log_err (dev, "driver rejected tx queue alloc with rv %d", rv);
+ vnet_dev_tx_queue_free (vm, txq);
+ }
+ else
+ log_debug (dev, "queue %u added", txq->queue_id);
+
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_tx_queue_start (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ if (txq->port->tx_queue_ops.start)
+ rv = txq->port->tx_queue_ops.start (vm, txq);
+
+ if (rv == VNET_DEV_OK)
+ txq->started = 1;
+
+ return rv;
+}
+
+void
+vnet_dev_tx_queue_stop (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ if (txq->port->tx_queue_ops.stop)
+ txq->port->tx_queue_ops.stop (vm, txq);
+ txq->started = 0;
+}
+
+void
+vnet_dev_rx_queue_add_counters (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq,
+ vnet_dev_counter_t *counters, u16 n_counters)
+{
+ rxq->counter_main = vnet_dev_counters_alloc (
+ vm, counters, n_counters, "%s port %u rx-queue %u counters",
+ rxq->port->dev->device_id, rxq->port->port_id, rxq->queue_id);
+}
+
+void
+vnet_dev_rx_queue_free_counters (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ if (rxq->counter_main)
+ vnet_dev_counters_free (vm, rxq->counter_main);
+}
+
+void
+vnet_dev_tx_queue_add_counters (vlib_main_t *vm, vnet_dev_tx_queue_t *txq,
+ vnet_dev_counter_t *counters, u16 n_counters)
+{
+ txq->counter_main = vnet_dev_counters_alloc (
+ vm, counters, n_counters, "%s port %u tx-queue %u counters",
+ txq->port->dev->device_id, txq->port->port_id, txq->queue_id);
+}
+
+void
+vnet_dev_tx_queue_free_counters (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ if (!txq->counter_main)
+ return;
+
+ log_debug (txq->port->dev, "free");
+ vnet_dev_counters_free (vm, txq->counter_main);
+}
diff --git a/src/vnet/dev/runtime.c b/src/vnet/dev/runtime.c
new file mode 100644
index 00000000000..79c55cfbd53
--- /dev/null
+++ b/src/vnet/dev/runtime.c
@@ -0,0 +1,180 @@
+
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/bitmap.h"
+#include "vppinfra/lock.h"
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "runtime",
+};
+
+static vnet_dev_rt_op_t *rt_ops;
+
+static void
+_vnet_dev_rt_exec_op (vlib_main_t *vm, vnet_dev_rt_op_t *op)
+{
+ vnet_dev_port_t *port = op->port;
+ vnet_dev_rx_queue_t *previous = 0, *first = 0;
+ vnet_dev_rx_node_runtime_t *rtd;
+ vlib_node_state_t state = VLIB_NODE_STATE_DISABLED;
+ u32 node_index = port->intf.rx_node_index;
+
+ rtd = vlib_node_get_runtime_data (vm, node_index);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ if (q->rx_thread_index != vm->thread_index)
+ continue;
+
+ if (q->interrupt_mode == 0)
+ state = VLIB_NODE_STATE_POLLING;
+ else if (state != VLIB_NODE_STATE_POLLING)
+ state = VLIB_NODE_STATE_INTERRUPT;
+
+ q->next_on_thread = 0;
+ if (previous == 0)
+ first = q;
+ else
+ previous->next_on_thread = q;
+
+ previous = q;
+ }
+
+ rtd->first_rx_queue = first;
+ vlib_node_set_state (vm, port->intf.rx_node_index, state);
+ __atomic_store_n (&op->completed, 1, __ATOMIC_RELEASE);
+}
+
+static uword
+vnet_dev_rt_mgmt_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u16 thread_index = vm->thread_index;
+ vnet_dev_rt_op_t *op, *ops = __atomic_load_n (&rt_ops, __ATOMIC_ACQUIRE);
+ u32 n_pending = 0;
+ uword rv = 0;
+
+ vec_foreach (op, ops)
+ {
+ if (!op->completed && op->thread_index == thread_index)
+ {
+ if (op->in_order == 1 && n_pending)
+ {
+ vlib_node_set_interrupt_pending (vm, node->node_index);
+ return rv;
+ }
+ _vnet_dev_rt_exec_op (vm, op);
+ rv++;
+ }
+
+ if (op->completed == 0)
+ n_pending++;
+ }
+
+ return rv;
+}
+
+VLIB_REGISTER_NODE (vnet_dev_rt_mgmt_node, static) = {
+ .function = vnet_dev_rt_mgmt_node_fn,
+ .name = "dev-rt-mgmt",
+ .type = VLIB_NODE_TYPE_PRE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+};
+
+vnet_dev_rv_t
+vnet_dev_rt_exec_ops (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_rt_op_t *ops,
+ u32 n_ops)
+{
+ vnet_dev_rt_op_t *op = ops;
+ vnet_dev_rt_op_t *remote_ops = 0;
+ clib_bitmap_t *remote_bmp = 0;
+ u32 i;
+
+ ASSERT (rt_ops == 0);
+
+ if (vlib_worker_thread_barrier_held ())
+ {
+ for (op = ops; op < (ops + n_ops); op++)
+ {
+ vlib_main_t *tvm = vlib_get_main_by_index (op->thread_index);
+ _vnet_dev_rt_exec_op (tvm, op);
+ log_debug (
+ dev,
+ "port %u rx node runtime update on thread %u executed locally",
+ op->port->port_id, op->thread_index);
+ }
+ return VNET_DEV_OK;
+ }
+
+ while (n_ops)
+ {
+ if (op->thread_index != vm->thread_index)
+ break;
+
+ _vnet_dev_rt_exec_op (vm, op);
+ log_debug (
+ dev, "port %u rx node runtime update on thread %u executed locally",
+ op->port->port_id, op->thread_index);
+ op++;
+ n_ops--;
+ }
+
+ if (n_ops == 0)
+ return VNET_DEV_OK;
+
+ for (op = ops; op < (ops + n_ops); op++)
+ {
+ if (op->thread_index == vm->thread_index &&
+ (op->in_order == 0 || vec_len (remote_ops) == 0))
+ {
+ _vnet_dev_rt_exec_op (vm, op);
+ log_debug (dev,
+ "port %u rx node runtime update on thread "
+ "%u executed locally",
+ op->port->port_id, op->thread_index);
+ }
+ else
+ {
+ vec_add1 (remote_ops, *op);
+ log_debug (dev,
+ "port %u rx node runtime update on thread %u "
+ "enqueued for remote execution",
+ op->port->port_id, op->thread_index);
+ remote_bmp = clib_bitmap_set (remote_bmp, op->thread_index, 1);
+ }
+ }
+
+ if (remote_ops == 0)
+ return VNET_DEV_OK;
+
+ __atomic_store_n (&rt_ops, remote_ops, __ATOMIC_RELEASE);
+
+ clib_bitmap_foreach (i, remote_bmp)
+ {
+ vlib_node_set_interrupt_pending (vlib_get_main_by_index (i),
+ vnet_dev_rt_mgmt_node.index);
+ log_debug (dev, "interrupt sent to %s node on thread %u",
+ vnet_dev_rt_mgmt_node.name, i);
+ }
+
+ vec_foreach (op, remote_ops)
+ {
+ while (op->completed == 0)
+ vlib_process_suspend (vm, 5e-5);
+
+ log_debug (
+ dev, "port %u rx node runtime update on thread %u executed locally",
+ op->port->port_id, op->thread_index);
+ }
+
+ __atomic_store_n (&rt_ops, 0, __ATOMIC_RELAXED);
+ vec_free (remote_ops);
+ clib_bitmap_free (remote_bmp);
+ return VNET_DEV_OK;
+}
diff --git a/src/vnet/dev/types.h b/src/vnet/dev/types.h
new file mode 100644
index 00000000000..006d18e5bc5
--- /dev/null
+++ b/src/vnet/dev/types.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_TYPES_H_
+#define _VNET_DEV_TYPES_H_
+
+#include <vppinfra/types.h>
+#include <vnet/dev/errors.h>
+
+typedef char vnet_dev_device_id_t[48];
+typedef char vnet_dev_if_name_t[32];
+typedef char vnet_dev_driver_name_t[16];
+typedef char vnet_dev_bus_name_t[8];
+typedef u16 vnet_dev_port_id_t;
+typedef struct vnet_dev vnet_dev_t;
+typedef struct vnet_dev_port vnet_dev_port_t;
+typedef struct vnet_dev_rx_queue vnet_dev_rx_queue_t;
+typedef struct vnet_dev_tx_queue vnet_dev_tx_queue_t;
+
+typedef enum
+{
+ VNET_DEV_MINUS_OK = 0,
+#define _(n, d) VNET_DEV_ERR_MINUS_##n,
+ foreach_vnet_dev_rv_type
+#undef _
+} vnet_dev_minus_rv_t;
+
+typedef enum
+{
+ VNET_DEV_OK = 0,
+#define _(n, d) VNET_DEV_ERR_##n = -(VNET_DEV_ERR_MINUS_##n),
+ foreach_vnet_dev_rv_type
+#undef _
+} vnet_dev_rv_t;
+
+/* do not change bit assignments - API dependency */
+#define foreach_vnet_dev_flag _ (0, NO_STATS, "don't poll device stats")
+
+typedef union
+{
+ enum
+ {
+#define _(b, n, d) VNET_DEV_F_##n = 1ull << (b),
+ foreach_vnet_dev_flag
+#undef _
+ } e;
+ u32 n;
+} vnet_dev_flags_t;
+
+/* do not change bit assignments - API dependency */
+#define foreach_vnet_dev_port_flag \
+ _ (0, INTERRUPT_MODE, "enable interrupt mode")
+
+typedef union
+{
+ enum
+ {
+#define _(b, n, d) VNET_DEV_PORT_F_##n = 1ull << (b),
+ foreach_vnet_dev_port_flag
+#undef _
+ } e;
+ u32 n;
+} vnet_dev_port_flags_t;
+
+#endif /* _VNET_DEV_TYPES_H_ */
diff --git a/src/vnet/devices/af_packet/FEATURE.yaml b/src/vnet/devices/af_packet/FEATURE.yaml
deleted file mode 100644
index 4a11ea2beb5..00000000000
--- a/src/vnet/devices/af_packet/FEATURE.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
----
-name: host-interface Device AF_PACKET
-maintainer: Damjan Marion <damarion@cisco.com>
-features:
- - L4 checksum offload
- - GSO offload
-description: "Create a host interface that will attach to a linux AF_PACKET
- interface, one side of a veth pair. The veth pair must
- already exist. Once created, a new host interface will
- exist in VPP with the name 'host-<ifname>', where '<ifname>'
- is the name of the specified veth pair. Use the 'show interface'
- command to display host interface details."
-missing:
- - API dump details beyond sw_if_index and name
-state: production
-properties: [API, CLI, STATS, MULTITHREAD]
diff --git a/src/vnet/devices/af_packet/af_packet.api b/src/vnet/devices/af_packet/af_packet.api
deleted file mode 100644
index 4a5cfb0fc3a..00000000000
--- a/src/vnet/devices/af_packet/af_packet.api
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Copyright (c) 2015-2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-option version = "2.0.0";
-
-import "vnet/interface_types.api";
-import "vnet/ethernet/ethernet_types.api";
-
-/** \brief Create host-interface
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param hw_addr - interface MAC
- @param use_random_hw_addr - use random generated MAC
- @param host_if_name - interface name
-*/
-define af_packet_create
-{
- u32 client_index;
- u32 context;
-
- vl_api_mac_address_t hw_addr;
- bool use_random_hw_addr;
- string host_if_name[64];
-};
-
-/** \brief Create host-interface response
- @param context - sender context, to match reply w/ request
- @param retval - return value for request
-*/
-define af_packet_create_reply
-{
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
-};
-
-/** \brief Create host-interface
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param hw_addr - interface MAC
- @param use_random_hw_addr - use random generated MAC
- @param host_if_name - interface name
- @param rx_frame_size - frame size for RX
- @param tx_frame_size - frame size for TX
- @param rx_frames_per_block - frames per block for RX
- @param tx_frames_per_block - frames per block for TX
- @param flags - flags for the af_packet interface creation
- @param num_rx_queues - number of rx queues
-*/
-define af_packet_create_v2
-{
- u32 client_index;
- u32 context;
-
- vl_api_mac_address_t hw_addr;
- bool use_random_hw_addr;
- string host_if_name[64];
- u32 rx_frame_size;
- u32 tx_frame_size;
- u32 rx_frames_per_block;
- u32 tx_frames_per_block;
- u32 flags;
- u16 num_rx_queues [default=1];
-};
-
-/** \brief Create host-interface response
- @param context - sender context, to match reply w/ request
- @param retval - return value for request
-*/
-define af_packet_create_v2_reply
-{
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
-};
-
-enum af_packet_mode {
- AF_PACKET_API_MODE_ETHERNET = 1, /* mode ethernet */
- AF_PACKET_API_MODE_IP = 2, /* mode ip */
-};
-
-enum af_packet_flags {
- AF_PACKET_API_FLAG_QDISC_BYPASS = 1, /* enable the qdisc bypass */
- AF_PACKET_API_FLAG_CKSUM_GSO = 2, /* enable checksum/gso */
-};
-
-/** \brief Create host-interface
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param mode - 1 - Ethernet, 2 - IP
- @param hw_addr - interface MAC
- @param use_random_hw_addr - use random generated MAC
- @param host_if_name - interface name
- @param rx_frame_size - frame size for RX
- @param tx_frame_size - frame size for TX
- @param rx_frames_per_block - frames per block for RX
- @param tx_frames_per_block - frames per block for TX
- @param flags - flags for the af_packet interface creation
- @param num_rx_queues - number of rx queues
- @param num_tx_queues - number of tx queues
-*/
-define af_packet_create_v3
-{
- u32 client_index;
- u32 context;
-
- vl_api_af_packet_mode_t mode;
- vl_api_mac_address_t hw_addr;
- bool use_random_hw_addr;
- string host_if_name[64];
- u32 rx_frame_size;
- u32 tx_frame_size;
- u32 rx_frames_per_block;
- u32 tx_frames_per_block;
- vl_api_af_packet_flags_t flags;
- u16 num_rx_queues [default=1];
- u16 num_tx_queues [default=1];
-};
-
-/** \brief Create host-interface response
- @param context - sender context, to match reply w/ request
- @param retval - return value for request
-*/
-define af_packet_create_v3_reply
-{
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
-};
-
-/** \brief Delete host-interface
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param host_if_name - interface name
-*/
-autoreply define af_packet_delete
-{
- u32 client_index;
- u32 context;
-
- string host_if_name[64];
-};
-
-/** \brief Set l4 offload checksum calculation
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
-*/
-autoreply define af_packet_set_l4_cksum_offload
-{
- u32 client_index;
- u32 context;
-
- vl_api_interface_index_t sw_if_index;
- bool set;
-};
-
-/** \brief Dump af_packet interfaces request */
-define af_packet_dump
-{
- u32 client_index;
- u32 context;
-};
-
-/** \brief Reply for af_packet dump request
- @param sw_if_index - software index of af_packet interface
- @param host_if_name - interface name
-*/
-define af_packet_details
-{
- u32 context;
- vl_api_interface_index_t sw_if_index;
- string host_if_name[64];
-};
-
-/*
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c
deleted file mode 100644
index ec65bf6d493..00000000000
--- a/src/vnet/devices/af_packet/af_packet.c
+++ /dev/null
@@ -1,849 +0,0 @@
-/*
- *------------------------------------------------------------------
- * af_packet.c - linux kernel packet interface
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <sys/ioctl.h>
-#include <net/if.h>
-#include <dirent.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <fcntl.h>
-
-#include <vppinfra/linux/sysfs.h>
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-#include <vnet/ip/ip.h>
-#include <vnet/devices/netlink.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/interface/rx_queue_funcs.h>
-#include <vnet/interface/tx_queue_funcs.h>
-
-#include <vnet/devices/af_packet/af_packet.h>
-
-af_packet_main_t af_packet_main;
-
-VNET_HW_INTERFACE_CLASS (af_packet_ip_device_hw_interface_class, static) = {
- .name = "af-packet-ip-device",
- .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
-};
-
-#define AF_PACKET_DEFAULT_TX_FRAMES_PER_BLOCK 1024
-#define AF_PACKET_DEFAULT_TX_FRAME_SIZE (2048 * 33) // GSO packet of 64KB
-#define AF_PACKET_TX_BLOCK_NR 1
-
-#define AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK 32
-#define AF_PACKET_DEFAULT_RX_FRAME_SIZE 2048
-#define AF_PACKET_RX_BLOCK_NR 160
-
-/*defined in net/if.h but clashes with dpdk headers */
-unsigned int if_nametoindex (const char *ifname);
-
-static clib_error_t *
-af_packet_eth_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hi,
- u32 frame_size)
-{
- clib_error_t *error, *rv;
- af_packet_main_t *apm = &af_packet_main;
- af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, hi->dev_instance);
-
- error = vnet_netlink_set_link_mtu (apif->host_if_index,
- frame_size + hi->frame_overhead);
-
- if (error)
- {
- vlib_log_err (apm->log_class, "netlink failed to change MTU: %U",
- format_clib_error, error);
- rv = vnet_error (VNET_ERR_SYSCALL_ERROR_1, "netlink error: %U",
- format_clib_error, error);
- clib_error_free (error);
- return rv;
- }
- else
- apif->host_mtu = frame_size + hi->frame_overhead;
- return 0;
-}
-
-static int
-af_packet_read_mtu (af_packet_if_t *apif)
-{
- af_packet_main_t *apm = &af_packet_main;
- clib_error_t *error;
- error = vnet_netlink_get_link_mtu (apif->host_if_index, &apif->host_mtu);
- if (error)
- {
- vlib_log_err (apm->log_class, "netlink failed to get MTU: %U",
- format_clib_error, error);
- clib_error_free (error);
- return VNET_API_ERROR_SYSCALL_ERROR_1;
- }
- return 0;
-}
-
-static clib_error_t *
-af_packet_fd_read_ready (clib_file_t * uf)
-{
- vnet_main_t *vnm = vnet_get_main ();
-
- /* Schedule the rx node */
- vnet_hw_if_rx_queue_set_int_pending (vnm, uf->private_data);
- return 0;
-}
-
-static int
-is_bridge (const u8 * host_if_name)
-{
- u8 *s;
- DIR *dir = NULL;
-
- s = format (0, "/sys/class/net/%s/bridge%c", host_if_name, 0);
- dir = opendir ((char *) s);
- vec_free (s);
-
- if (dir)
- {
- closedir (dir);
- return 0;
- }
-
- return -1;
-}
-
-static void
-af_packet_set_rx_queues (vlib_main_t *vm, af_packet_if_t *apif)
-{
- vnet_main_t *vnm = vnet_get_main ();
- af_packet_queue_t *rx_queue;
-
- vnet_hw_if_set_input_node (vnm, apif->hw_if_index,
- af_packet_input_node.index);
-
- vec_foreach (rx_queue, apif->rx_queues)
- {
- rx_queue->queue_index = vnet_hw_if_register_rx_queue (
- vnm, apif->hw_if_index, rx_queue->queue_id, VNET_HW_IF_RXQ_THREAD_ANY);
-
- {
- clib_file_t template = { 0 };
- template.read_function = af_packet_fd_read_ready;
- template.file_descriptor = rx_queue->fd;
- template.private_data = rx_queue->queue_index;
- template.flags = UNIX_FILE_EVENT_EDGE_TRIGGERED;
- template.description =
- format (0, "%U queue %u", format_af_packet_device_name,
- apif->dev_instance, rx_queue->queue_id);
- rx_queue->clib_file_index = clib_file_add (&file_main, &template);
- }
- vnet_hw_if_set_rx_queue_file_index (vnm, rx_queue->queue_index,
- rx_queue->clib_file_index);
- vnet_hw_if_set_rx_queue_mode (vnm, rx_queue->queue_index,
- VNET_HW_IF_RX_MODE_INTERRUPT);
- rx_queue->mode = VNET_HW_IF_RX_MODE_INTERRUPT;
- }
- vnet_hw_if_update_runtime_data (vnm, apif->hw_if_index);
-}
-
-static void
-af_packet_set_tx_queues (vlib_main_t *vm, af_packet_if_t *apif)
-{
- vnet_main_t *vnm = vnet_get_main ();
- af_packet_main_t *apm = &af_packet_main;
- af_packet_queue_t *tx_queue;
-
- vec_foreach (tx_queue, apif->tx_queues)
- {
- tx_queue->queue_index = vnet_hw_if_register_tx_queue (
- vnm, apif->hw_if_index, tx_queue->queue_id);
- }
-
- if (apif->num_txqs == 0)
- {
- vlib_log_err (apm->log_class, "Interface %U has 0 txq",
- format_vnet_hw_if_index_name, vnm, apif->hw_if_index);
- return;
- }
-
- for (u32 j = 0; j < vlib_get_n_threads (); j++)
- {
- u32 qi = apif->tx_queues[j % apif->num_txqs].queue_index;
- vnet_hw_if_tx_queue_assign_thread (vnm, qi, j);
- }
-
- vnet_hw_if_update_runtime_data (vnm, apif->hw_if_index);
-}
-
-static int
-create_packet_v3_sock (int host_if_index, tpacket_req3_t *rx_req,
- tpacket_req3_t *tx_req, int *fd, af_packet_ring_t *ring,
- u32 fanout_id, af_packet_if_flags_t *flags)
-{
- af_packet_main_t *apm = &af_packet_main;
- struct sockaddr_ll sll;
- socklen_t req_sz = sizeof (tpacket_req3_t);
- int ret;
- int ver = TPACKET_V3;
- u32 ring_sz = 0;
-
- if (rx_req)
- ring_sz += rx_req->tp_block_size * rx_req->tp_block_nr;
-
- if (tx_req)
- ring_sz += tx_req->tp_block_size * tx_req->tp_block_nr;
-
- if ((*fd = socket (AF_PACKET, SOCK_RAW, htons (ETH_P_ALL))) < 0)
- {
- vlib_log_err (apm->log_class,
- "Failed to create AF_PACKET socket: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- /* bind before rx ring is cfged so we don't receive packets from other interfaces */
- clib_memset (&sll, 0, sizeof (sll));
- sll.sll_family = PF_PACKET;
- sll.sll_protocol = htons (ETH_P_ALL);
- sll.sll_ifindex = host_if_index;
- if (bind (*fd, (struct sockaddr *) &sll, sizeof (sll)) < 0)
- {
- vlib_log_err (apm->log_class,
- "Failed to bind rx packet socket: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- if (setsockopt (*fd, SOL_PACKET, PACKET_VERSION, &ver, sizeof (ver)) < 0)
- {
- vlib_log_err (apm->log_class,
- "Failed to set rx packet interface version: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- int opt = 1;
- if (setsockopt (*fd, SOL_PACKET, PACKET_LOSS, &opt, sizeof (opt)) < 0)
- {
- vlib_log_err (
- apm->log_class,
- "Failed to set packet tx ring error handling option: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- if (*flags & AF_PACKET_IF_FLAGS_CKSUM_GSO)
- {
-
- int opt2 = 1;
- if (setsockopt (*fd, SOL_PACKET, PACKET_VNET_HDR, &opt2, sizeof (opt2)) <
- 0)
- {
- // remove the flag
- *flags &= ~AF_PACKET_IF_FLAGS_CKSUM_GSO;
- vlib_log_debug (apm->log_class,
- "Failed to set packet vnet hdr error handling "
- "option: %s (errno %d)",
- strerror (errno), errno);
- }
- }
-
-#if defined(PACKET_QDISC_BYPASS)
- if (*flags & AF_PACKET_IF_FLAGS_QDISC_BYPASS)
- /* Introduced with Linux 3.14 so the ifdef should eventually be removed */
- if (setsockopt (*fd, SOL_PACKET, PACKET_QDISC_BYPASS, &opt, sizeof (opt)) <
- 0)
- {
- // remove the flag
- *flags &= ~AF_PACKET_IF_FLAGS_QDISC_BYPASS;
- vlib_log_debug (apm->log_class,
- "Failed to set qdisc bypass error "
- "handling option: %s (errno %d)",
- strerror (errno), errno);
- }
-#endif
-
- if (rx_req)
- {
- if (*flags & AF_PACKET_IF_FLAGS_FANOUT)
- {
- int fanout = ((fanout_id & 0xffff) | ((PACKET_FANOUT_HASH) << 16));
- if (setsockopt (*fd, SOL_PACKET, PACKET_FANOUT, &fanout,
- sizeof (fanout)) < 0)
- {
- // remove the flag
- *flags &= ~AF_PACKET_IF_FLAGS_FANOUT;
- vlib_log_err (apm->log_class,
- "Failed to set fanout options: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
- }
-
- if (setsockopt (*fd, SOL_PACKET, PACKET_RX_RING, rx_req, req_sz) < 0)
- {
- vlib_log_err (apm->log_class,
- "Failed to set packet rx ring options: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
- }
-
- if (tx_req)
- if (setsockopt (*fd, SOL_PACKET, PACKET_TX_RING, tx_req, req_sz) < 0)
- {
- vlib_log_err (apm->log_class,
- "Failed to set packet tx ring options: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- ring->ring_start_addr = mmap (NULL, ring_sz, PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_LOCKED, *fd, 0);
- if (ring->ring_start_addr == MAP_FAILED)
- {
- vlib_log_err (apm->log_class, "mmap failure: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- ring->ring_size = ring_sz;
-
- return 0;
-error:
- if (*fd >= 0)
- {
- close (*fd);
- *fd = -1;
- }
- return ret;
-}
-
-int
-af_packet_queue_init (vlib_main_t *vm, af_packet_if_t *apif,
- af_packet_create_if_arg_t *arg,
- af_packet_queue_t *rx_queue, af_packet_queue_t *tx_queue,
- u8 queue_id)
-{
- af_packet_main_t *apm = &af_packet_main;
- tpacket_req3_t *rx_req = 0;
- tpacket_req3_t *tx_req = 0;
- int ret, fd = -1;
- af_packet_ring_t ring = { 0 };
- u8 *ring_addr = 0;
- u32 rx_frames_per_block, tx_frames_per_block;
- u32 rx_frame_size, tx_frame_size;
- u32 i = 0;
-
- if (rx_queue)
- {
- rx_frames_per_block = arg->rx_frames_per_block ?
- arg->rx_frames_per_block :
- AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK;
-
- rx_frame_size = arg->rx_frame_size ? arg->rx_frame_size :
- AF_PACKET_DEFAULT_RX_FRAME_SIZE;
- vec_validate (rx_queue->rx_req, 0);
- rx_queue->rx_req->tp_block_size = rx_frame_size * rx_frames_per_block;
- rx_queue->rx_req->tp_frame_size = rx_frame_size;
- rx_queue->rx_req->tp_block_nr = AF_PACKET_RX_BLOCK_NR;
- rx_queue->rx_req->tp_frame_nr =
- AF_PACKET_RX_BLOCK_NR * rx_frames_per_block;
- rx_queue->rx_req->tp_retire_blk_tov = 1; // 1 ms block timout
- rx_queue->rx_req->tp_feature_req_word = 0;
- rx_queue->rx_req->tp_sizeof_priv = 0;
- rx_req = rx_queue->rx_req;
- }
-
- if (tx_queue)
- {
- tx_frames_per_block = arg->tx_frames_per_block ?
- arg->tx_frames_per_block :
- AF_PACKET_DEFAULT_TX_FRAMES_PER_BLOCK;
- tx_frame_size = arg->tx_frame_size ? arg->tx_frame_size :
- AF_PACKET_DEFAULT_TX_FRAME_SIZE;
-
- vec_validate (tx_queue->tx_req, 0);
- tx_queue->tx_req->tp_block_size = tx_frame_size * tx_frames_per_block;
- tx_queue->tx_req->tp_frame_size = tx_frame_size;
- tx_queue->tx_req->tp_block_nr = AF_PACKET_TX_BLOCK_NR;
- tx_queue->tx_req->tp_frame_nr =
- AF_PACKET_TX_BLOCK_NR * tx_frames_per_block;
- tx_queue->tx_req->tp_retire_blk_tov = 0;
- tx_queue->tx_req->tp_sizeof_priv = 0;
- tx_queue->tx_req->tp_feature_req_word = 0;
- tx_req = tx_queue->tx_req;
- }
-
- if (rx_queue || tx_queue)
- {
- ret = create_packet_v3_sock (apif->host_if_index, rx_req, tx_req, &fd,
- &ring, apif->dev_instance, &arg->flags);
-
- if (ret != 0)
- goto error;
-
- vec_add1 (apif->rings, ring);
- ring_addr = ring.ring_start_addr;
- }
-
- if (rx_queue)
- {
- rx_queue->fd = fd;
- vec_validate (rx_queue->rx_ring, rx_queue->rx_req->tp_block_nr - 1);
- vec_foreach_index (i, rx_queue->rx_ring)
- {
- rx_queue->rx_ring[i] =
- ring_addr + i * rx_queue->rx_req->tp_block_size;
- }
-
- rx_queue->next_rx_block = 0;
- rx_queue->queue_id = queue_id;
- rx_queue->is_rx_pending = 0;
- ring_addr = ring_addr + rx_queue->rx_req->tp_block_size *
- rx_queue->rx_req->tp_block_nr;
- }
-
- if (tx_queue)
- {
- tx_queue->fd = fd;
- vec_validate (tx_queue->tx_ring, tx_queue->tx_req->tp_block_nr - 1);
- vec_foreach_index (i, tx_queue->tx_ring)
- {
- tx_queue->tx_ring[i] =
- ring_addr + i * tx_queue->tx_req->tp_block_size;
- }
-
- tx_queue->next_tx_frame = 0;
- tx_queue->queue_id = queue_id;
- tx_queue->is_tx_pending = 0;
- clib_spinlock_init (&tx_queue->lockp);
- }
-
- return 0;
-error:
- vlib_log_err (apm->log_class, "Failed to set queue %u error", queue_id);
- if (rx_queue)
- vec_free (rx_queue->rx_req);
- if (tx_queue)
- vec_free (tx_queue->tx_req);
- return ret;
-}
-
-int
-af_packet_device_init (vlib_main_t *vm, af_packet_if_t *apif,
- af_packet_create_if_arg_t *args)
-{
- af_packet_main_t *apm = &af_packet_main;
- af_packet_queue_t *rx_queue = 0;
- af_packet_queue_t *tx_queue = 0;
- u16 nq = clib_min (args->num_rxqs, args->num_txqs);
- u16 i = 0;
- int ret = 0;
-
- // enable fanout feature for multi-rxqs
- if (args->num_rxqs > 1)
- args->flags |= AF_PACKET_IF_FLAGS_FANOUT;
-
- vec_validate (apif->rx_queues, args->num_rxqs - 1);
- vec_validate (apif->tx_queues, args->num_txqs - 1);
-
- for (; i < nq; i++)
- {
- rx_queue = vec_elt_at_index (apif->rx_queues, i);
- tx_queue = vec_elt_at_index (apif->tx_queues, i);
- ret = af_packet_queue_init (vm, apif, args, rx_queue, tx_queue, i);
- if (ret != 0)
- goto error;
- }
-
- if (args->num_rxqs > args->num_txqs)
- {
- for (; i < args->num_rxqs; i++)
- {
- rx_queue = vec_elt_at_index (apif->rx_queues, i);
- ret = af_packet_queue_init (vm, apif, args, rx_queue, 0, i);
- if (ret != 0)
- goto error;
- }
- }
- else if (args->num_txqs > args->num_rxqs)
- {
- for (; i < args->num_txqs; i++)
- {
- tx_queue = vec_elt_at_index (apif->tx_queues, i);
- ret = af_packet_queue_init (vm, apif, args, 0, tx_queue, i);
- if (ret != 0)
- goto error;
- }
- }
-
- apif->num_rxqs = args->num_rxqs;
- apif->num_txqs = args->num_txqs;
-
- return 0;
-error:
- vlib_log_err (apm->log_class, "Failed to init device error");
- return ret;
-}
-
-int
-af_packet_create_if (af_packet_create_if_arg_t *arg)
-{
- af_packet_main_t *apm = &af_packet_main;
- vlib_main_t *vm = vlib_get_main ();
- int fd2 = -1;
- struct ifreq ifr;
- af_packet_if_t *apif = 0;
- u8 hw_addr[6];
- vnet_sw_interface_t *sw;
- vnet_main_t *vnm = vnet_get_main ();
- vnet_hw_if_caps_t caps = VNET_HW_IF_CAP_INT_MODE;
- uword *p;
- uword if_index;
- u8 *host_if_name_dup = 0;
- int host_if_index = -1;
- int ret = 0;
-
- p = mhash_get (&apm->if_index_by_host_if_name, arg->host_if_name);
- if (p)
- {
- apif = vec_elt_at_index (apm->interfaces, p[0]);
- arg->sw_if_index = apif->sw_if_index;
- return VNET_API_ERROR_IF_ALREADY_EXISTS;
- }
-
- host_if_name_dup = vec_dup (arg->host_if_name);
-
- /*
- * make sure host side of interface is 'UP' before binding AF_PACKET
- * socket on it.
- */
- if ((fd2 = socket (AF_UNIX, SOCK_DGRAM, 0)) < 0)
- {
- vlib_log_debug (apm->log_class,
- "Failed to create AF_UNIX socket: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- clib_memcpy (ifr.ifr_name, (const char *) arg->host_if_name,
- vec_len (arg->host_if_name));
- if (ioctl (fd2, SIOCGIFINDEX, &ifr) < 0)
- {
- vlib_log_debug (
- apm->log_class,
- "Failed to retrieve the interface (%s) index: %s (errno %d)",
- arg->host_if_name, strerror (errno), errno);
- ret = VNET_API_ERROR_INVALID_INTERFACE;
- goto error;
- }
-
- host_if_index = ifr.ifr_ifindex;
- if (ioctl (fd2, SIOCGIFFLAGS, &ifr) < 0)
- {
- vlib_log_debug (apm->log_class,
- "Failed to get the active flag: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- if (!(ifr.ifr_flags & IFF_UP))
- {
- ifr.ifr_flags |= IFF_UP;
- if (ioctl (fd2, SIOCSIFFLAGS, &ifr) < 0)
- {
- vlib_log_debug (apm->log_class,
- "Failed to set the active flag: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
- }
-
- if (fd2 > -1)
- {
- close (fd2);
- fd2 = -1;
- }
-
- ret = is_bridge (arg->host_if_name);
- if (ret == 0) /* is a bridge, ignore state */
- host_if_index = -1;
-
- /* So far everything looks good, let's create interface */
- pool_get (apm->interfaces, apif);
- if_index = apif - apm->interfaces;
-
- apif->dev_instance = if_index;
- apif->host_if_index = host_if_index;
- apif->host_if_name = host_if_name_dup;
- apif->per_interface_next_index = ~0;
- apif->mode = arg->mode;
-
- ret = af_packet_device_init (vm, apif, arg);
- if (ret != 0)
- goto error;
-
- ret = af_packet_read_mtu (apif);
- if (ret != 0)
- goto error;
-
-
- if (apif->mode != AF_PACKET_IF_MODE_IP)
- {
- vnet_eth_interface_registration_t eir = {};
- /*use configured or generate random MAC address */
- if (arg->hw_addr)
- clib_memcpy (hw_addr, arg->hw_addr, 6);
- else
- {
- f64 now = vlib_time_now (vm);
- u32 rnd;
- rnd = (u32) (now * 1e6);
- rnd = random_u32 (&rnd);
-
- clib_memcpy (hw_addr + 2, &rnd, sizeof (rnd));
- hw_addr[0] = 2;
- hw_addr[1] = 0xfe;
- }
-
- eir.dev_class_index = af_packet_device_class.index;
- eir.dev_instance = apif->dev_instance;
- eir.address = hw_addr;
- eir.cb.set_max_frame_size = af_packet_eth_set_max_frame_size;
- apif->hw_if_index = vnet_eth_register_interface (vnm, &eir);
- }
- else
- {
- apif->hw_if_index = vnet_register_interface (
- vnm, af_packet_device_class.index, apif->dev_instance,
- af_packet_ip_device_hw_interface_class.index, apif->dev_instance);
- }
-
- sw = vnet_get_hw_sw_interface (vnm, apif->hw_if_index);
- apif->sw_if_index = sw->sw_if_index;
-
- af_packet_set_rx_queues (vm, apif);
- af_packet_set_tx_queues (vm, apif);
-
- if (arg->flags & AF_PACKET_IF_FLAGS_FANOUT)
- apif->is_fanout_enabled = 1;
-
- apif->is_qdisc_bypass_enabled =
- (arg->flags & AF_PACKET_IF_FLAGS_QDISC_BYPASS);
-
- if (arg->flags & AF_PACKET_IF_FLAGS_CKSUM_GSO)
- apif->is_cksum_gso_enabled = 1;
-
- if (apif->is_cksum_gso_enabled)
- caps |= VNET_HW_IF_CAP_TCP_GSO | VNET_HW_IF_CAP_TX_IP4_CKSUM |
- VNET_HW_IF_CAP_TX_TCP_CKSUM | VNET_HW_IF_CAP_TX_UDP_CKSUM;
-
- vnet_hw_if_set_caps (vnm, apif->hw_if_index, caps);
- vnet_hw_interface_set_flags (vnm, apif->hw_if_index,
- VNET_HW_INTERFACE_FLAG_LINK_UP);
-
- mhash_set_mem (&apm->if_index_by_host_if_name, host_if_name_dup, &if_index,
- 0);
- arg->sw_if_index = apif->sw_if_index;
-
- return 0;
-
-error:
- if (fd2 > -1)
- {
- close (fd2);
- fd2 = -1;
- }
- vec_free (host_if_name_dup);
- if (apif)
- {
- memset (apif, 0, sizeof (*apif));
- pool_put (apm->interfaces, apif);
- }
- return ret;
-}
-
-static int
-af_packet_rx_queue_free (af_packet_if_t *apif, af_packet_queue_t *rx_queue)
-{
- clib_file_del_by_index (&file_main, rx_queue->clib_file_index);
- close (rx_queue->fd);
- rx_queue->fd = -1;
- rx_queue->rx_ring = NULL;
- vec_free (rx_queue->rx_req);
- rx_queue->rx_req = NULL;
- return 0;
-}
-
-static int
-af_packet_tx_queue_free (af_packet_if_t *apif, af_packet_queue_t *tx_queue)
-{
- close (tx_queue->fd);
- tx_queue->fd = -1;
- clib_spinlock_free (&tx_queue->lockp);
- tx_queue->tx_ring = NULL;
- vec_free (tx_queue->tx_req);
- tx_queue->tx_req = NULL;
- return 0;
-}
-
-static int
-af_packet_ring_free (af_packet_if_t *apif, af_packet_ring_t *ring)
-{
- af_packet_main_t *apm = &af_packet_main;
-
- if (ring)
- {
- // FIXME: unmap the memory
- if (munmap (ring->ring_start_addr, ring->ring_size))
- vlib_log_warn (apm->log_class,
- "Host interface %s could not free ring %p of size %u",
- apif->host_if_name, ring->ring_start_addr,
- ring->ring_size);
- else
- ring->ring_start_addr = 0;
- }
-
- return 0;
-}
-
-int
-af_packet_delete_if (u8 *host_if_name)
-{
- vnet_main_t *vnm = vnet_get_main ();
- af_packet_main_t *apm = &af_packet_main;
- af_packet_if_t *apif;
- af_packet_queue_t *rx_queue;
- af_packet_queue_t *tx_queue;
- af_packet_ring_t *ring;
- uword *p;
-
- p = mhash_get (&apm->if_index_by_host_if_name, host_if_name);
- if (p == NULL)
- {
- vlib_log_warn (apm->log_class, "Host interface %s does not exist",
- host_if_name);
- return VNET_API_ERROR_SYSCALL_ERROR_1;
- }
- apif = pool_elt_at_index (apm->interfaces, p[0]);
-
- /* bring down the interface */
- vnet_hw_interface_set_flags (vnm, apif->hw_if_index, 0);
-
- /* clean up */
- vec_foreach (rx_queue, apif->rx_queues)
- af_packet_rx_queue_free (apif, rx_queue);
- vec_foreach (tx_queue, apif->tx_queues)
- af_packet_tx_queue_free (apif, tx_queue);
- vec_foreach (ring, apif->rings)
- af_packet_ring_free (apif, ring);
-
- vec_free (apif->rx_queues);
- apif->rx_queues = NULL;
- vec_free (apif->tx_queues);
- apif->tx_queues = NULL;
- vec_free (apif->rings);
- apif->rings = NULL;
-
- vec_free (apif->host_if_name);
- apif->host_if_name = NULL;
- apif->host_if_index = -1;
-
- mhash_unset (&apm->if_index_by_host_if_name, host_if_name, p);
-
- if (apif->mode != AF_PACKET_IF_MODE_IP)
- ethernet_delete_interface (vnm, apif->hw_if_index);
- else
- vnet_delete_hw_interface (vnm, apif->hw_if_index);
-
- memset (apif, 0, sizeof (*apif));
- pool_put (apm->interfaces, apif);
-
- return 0;
-}
-
-int
-af_packet_set_l4_cksum_offload (u32 sw_if_index, u8 set)
-{
- // deprecated ...
- return 0;
-}
-
-int
-af_packet_dump_ifs (af_packet_if_detail_t ** out_af_packet_ifs)
-{
- af_packet_main_t *apm = &af_packet_main;
- af_packet_if_t *apif;
- af_packet_if_detail_t *r_af_packet_ifs = NULL;
- af_packet_if_detail_t *af_packet_if = NULL;
-
- pool_foreach (apif, apm->interfaces)
- {
- vec_add2 (r_af_packet_ifs, af_packet_if, 1);
- af_packet_if->sw_if_index = apif->sw_if_index;
- if (apif->host_if_name)
- {
- clib_memcpy (af_packet_if->host_if_name, apif->host_if_name,
- MIN (ARRAY_LEN (af_packet_if->host_if_name) - 1,
- strlen ((const char *) apif->host_if_name)));
- }
- }
-
- *out_af_packet_ifs = r_af_packet_ifs;
-
- return 0;
-}
-
-static clib_error_t *
-af_packet_init (vlib_main_t * vm)
-{
- af_packet_main_t *apm = &af_packet_main;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
-
- clib_memset (apm, 0, sizeof (af_packet_main_t));
-
- mhash_init_vec_string (&apm->if_index_by_host_if_name, sizeof (uword));
-
- vec_validate_aligned (apm->rx_buffers, tm->n_vlib_mains - 1,
- CLIB_CACHE_LINE_BYTES);
-
- apm->log_class = vlib_log_register_class ("af_packet", 0);
- vlib_log_debug (apm->log_class, "initialized");
-
- return 0;
-}
-
-VLIB_INIT_FUNCTION (af_packet_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/af_packet/af_packet.h b/src/vnet/devices/af_packet/af_packet.h
deleted file mode 100644
index 940acbb1372..00000000000
--- a/src/vnet/devices/af_packet/af_packet.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- *------------------------------------------------------------------
- * af_packet.h - linux kernel packet interface header file
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <linux/if_packet.h>
-
-#include <vppinfra/lock.h>
-#include <vlib/log.h>
-
-typedef struct tpacket_block_desc block_desc_t;
-typedef struct tpacket_req3 tpacket_req3_t;
-typedef struct tpacket3_hdr tpacket3_hdr_t;
-
-typedef enum
-{
- AF_PACKET_IF_MODE_ETHERNET = 1,
- AF_PACKET_IF_MODE_IP = 2
-} af_packet_if_mode_t;
-
-typedef enum
-{
- AF_PACKET_IF_FLAGS_QDISC_BYPASS = 1,
- AF_PACKET_IF_FLAGS_CKSUM_GSO = 2,
- AF_PACKET_IF_FLAGS_FANOUT = 4,
-} af_packet_if_flags_t;
-
-typedef struct
-{
- u32 sw_if_index;
- u8 host_if_name[64];
-} af_packet_if_detail_t;
-
-typedef struct
-{
- u8 *ring_start_addr;
- u32 ring_size;
-} af_packet_ring_t;
-
-typedef struct
-{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- clib_spinlock_t lockp;
- int fd;
- union
- {
- tpacket_req3_t *rx_req;
- tpacket_req3_t *tx_req;
- };
-
- union
- {
- u8 **rx_ring;
- u8 **tx_ring;
- };
-
- union
- {
- u32 next_rx_block;
- u32 next_tx_frame;
- };
-
- u16 queue_id;
- u32 queue_index;
-
- u32 clib_file_index;
-
- u32 rx_frame_offset;
- u16 num_rx_pkts;
- u8 is_rx_pending;
- u8 is_tx_pending;
- vnet_hw_if_rx_mode mode;
-} af_packet_queue_t;
-
-typedef struct
-{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- u32 hw_if_index;
- u32 sw_if_index;
- u32 per_interface_next_index;
- af_packet_if_mode_t mode;
- u8 is_admin_up;
- u8 is_cksum_gso_enabled;
-
- af_packet_queue_t *rx_queues;
- af_packet_queue_t *tx_queues;
-
- u8 num_rxqs;
- u8 num_txqs;
-
- u8 *host_if_name;
- int host_if_index;
-
- u32 host_mtu;
- u32 dev_instance;
-
- af_packet_ring_t *rings;
- u8 is_qdisc_bypass_enabled;
- u8 is_fanout_enabled;
-} af_packet_if_t;
-
-typedef struct
-{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- af_packet_if_t *interfaces;
-
- u32 polling_count;
- /* rx buffer cache */
- u32 **rx_buffers;
-
- /* hash of host interface names */
- mhash_t if_index_by_host_if_name;
-
- /** log class */
- vlib_log_class_t log_class;
-} af_packet_main_t;
-
-typedef struct
-{
- u8 *host_if_name;
- u8 *hw_addr;
- u32 rx_frame_size;
- u32 tx_frame_size;
- u32 rx_frames_per_block;
- u32 tx_frames_per_block;
- u8 num_rxqs;
- u8 num_txqs;
- af_packet_if_mode_t mode;
- af_packet_if_flags_t flags;
-
- /* return */
- u32 sw_if_index;
-} af_packet_create_if_arg_t;
-
-extern af_packet_main_t af_packet_main;
-extern vnet_device_class_t af_packet_device_class;
-extern vlib_node_registration_t af_packet_input_node;
-
-int af_packet_create_if (af_packet_create_if_arg_t *arg);
-int af_packet_delete_if (u8 *host_if_name);
-int af_packet_set_l4_cksum_offload (u32 sw_if_index, u8 set);
-int af_packet_dump_ifs (af_packet_if_detail_t ** out_af_packet_ifs);
-
-format_function_t format_af_packet_device_name;
-
-#define MIN(x,y) (((x)<(y))?(x):(y))
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/af_packet/af_packet_api.c b/src/vnet/devices/af_packet/af_packet_api.c
deleted file mode 100644
index 21f2c381809..00000000000
--- a/src/vnet/devices/af_packet/af_packet_api.c
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- *------------------------------------------------------------------
- * af_packet_api.c - af-packet api
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <vnet/vnet.h>
-#include <vlibmemory/api.h>
-
-#include <vnet/interface.h>
-#include <vnet/api_errno.h>
-#include <vnet/devices/af_packet/af_packet.h>
-
-#include <vnet/format_fns.h>
-#include <vnet/devices/af_packet/af_packet.api_enum.h>
-#include <vnet/devices/af_packet/af_packet.api_types.h>
-
-#define REPLY_MSG_ID_BASE msg_id_base
-#include <vlibapi/api_helper_macros.h>
-
-static u16 msg_id_base;
-
-static void
-vl_api_af_packet_create_t_handler (vl_api_af_packet_create_t * mp)
-{
- af_packet_create_if_arg_t _arg, *arg = &_arg;
- vl_api_af_packet_create_reply_t *rmp;
- int rv = 0;
-
- clib_memset (arg, 0, sizeof (*arg));
-
- arg->host_if_name = format (0, "%s", mp->host_if_name);
- vec_add1 (arg->host_if_name, 0);
-
- arg->hw_addr = mp->use_random_hw_addr ? 0 : mp->hw_addr;
- arg->mode = AF_PACKET_IF_MODE_ETHERNET;
- // Default flags
- arg->flags = AF_PACKET_IF_FLAGS_QDISC_BYPASS | AF_PACKET_IF_FLAGS_CKSUM_GSO;
- rv = af_packet_create_if (arg);
-
- vec_free (arg->host_if_name);
-
- REPLY_MACRO2 (VL_API_AF_PACKET_CREATE_REPLY, ({
- rmp->sw_if_index = clib_host_to_net_u32 (arg->sw_if_index);
- }));
-}
-
-static void
-vl_api_af_packet_create_v2_t_handler (vl_api_af_packet_create_v2_t *mp)
-{
- af_packet_create_if_arg_t _arg, *arg = &_arg;
- vl_api_af_packet_create_v2_reply_t *rmp;
- int rv = 0;
-
- clib_memset (arg, 0, sizeof (*arg));
-
- arg->host_if_name = format (0, "%s", mp->host_if_name);
- vec_add1 (arg->host_if_name, 0);
-
- // Default number of rx/tx queue(s)
- arg->num_rxqs = 1;
- arg->num_txqs = 1;
- arg->rx_frame_size = clib_net_to_host_u32 (mp->rx_frame_size);
- arg->tx_frame_size = clib_net_to_host_u32 (mp->tx_frame_size);
- arg->rx_frames_per_block = clib_net_to_host_u32 (mp->rx_frames_per_block);
- arg->tx_frames_per_block = clib_net_to_host_u32 (mp->tx_frames_per_block);
- arg->hw_addr = mp->use_random_hw_addr ? 0 : mp->hw_addr;
- arg->mode = AF_PACKET_IF_MODE_ETHERNET;
- // Default flags
- arg->flags = AF_PACKET_IF_FLAGS_QDISC_BYPASS | AF_PACKET_IF_FLAGS_CKSUM_GSO;
-
- if (mp->num_rx_queues > 1)
- arg->num_rxqs = clib_net_to_host_u16 (mp->num_rx_queues);
-
- rv = af_packet_create_if (arg);
-
- vec_free (arg->host_if_name);
- REPLY_MACRO2 (VL_API_AF_PACKET_CREATE_V2_REPLY, ({
- rmp->sw_if_index = clib_host_to_net_u32 (arg->sw_if_index);
- }));
-}
-
-static void
-vl_api_af_packet_create_v3_t_handler (vl_api_af_packet_create_v3_t *mp)
-{
- af_packet_create_if_arg_t _arg, *arg = &_arg;
- vl_api_af_packet_create_v3_reply_t *rmp;
- int rv = 0;
-
- clib_memset (arg, 0, sizeof (*arg));
-
- arg->host_if_name = format (0, "%s", mp->host_if_name);
- vec_add1 (arg->host_if_name, 0);
-
- // Default number of rx/tx queue(s)
- arg->num_rxqs = 1;
- arg->num_txqs = 1;
- arg->rx_frame_size = clib_net_to_host_u32 (mp->rx_frame_size);
- arg->tx_frame_size = clib_net_to_host_u32 (mp->tx_frame_size);
- arg->rx_frames_per_block = clib_net_to_host_u32 (mp->rx_frames_per_block);
- arg->tx_frames_per_block = clib_net_to_host_u32 (mp->tx_frames_per_block);
- arg->hw_addr = mp->use_random_hw_addr ? 0 : mp->hw_addr;
-
- switch (clib_net_to_host_u32 (mp->mode))
- {
- case AF_PACKET_API_MODE_ETHERNET:
- arg->mode = AF_PACKET_IF_MODE_ETHERNET;
- break;
- case AF_PACKET_API_MODE_IP:
- arg->mode = AF_PACKET_IF_MODE_IP;
- break;
- default:
- arg->sw_if_index = ~0;
- rv = VNET_ERR_INVALID_VALUE;
- goto error;
- }
-
- STATIC_ASSERT (((int) AF_PACKET_API_FLAG_QDISC_BYPASS ==
- (int) AF_PACKET_IF_FLAGS_QDISC_BYPASS),
- "af-packet qdisc-bypass api flag mismatch");
- STATIC_ASSERT (
- ((int) AF_PACKET_API_FLAG_CKSUM_GSO == (int) AF_PACKET_IF_FLAGS_CKSUM_GSO),
- "af-packet checksum/gso offload api flag mismatch");
-
- // Default flags
- arg->flags = clib_net_to_host_u32 (mp->flags);
-
- if (clib_net_to_host_u16 (mp->num_rx_queues) > 1)
- arg->num_rxqs = clib_net_to_host_u16 (mp->num_rx_queues);
-
- if (clib_net_to_host_u16 (mp->num_tx_queues) > 1)
- arg->num_txqs = clib_net_to_host_u16 (mp->num_tx_queues);
-
- rv = af_packet_create_if (arg);
-
-error:
- vec_free (arg->host_if_name);
- REPLY_MACRO2 (VL_API_AF_PACKET_CREATE_V3_REPLY, ({
- rmp->sw_if_index = clib_host_to_net_u32 (arg->sw_if_index);
- }));
-}
-
-static void
-vl_api_af_packet_delete_t_handler (vl_api_af_packet_delete_t * mp)
-{
- vl_api_af_packet_delete_reply_t *rmp;
- int rv = 0;
- u8 *host_if_name = NULL;
-
- host_if_name = format (0, "%s", mp->host_if_name);
- vec_add1 (host_if_name, 0);
-
- rv = af_packet_delete_if (host_if_name);
-
- vec_free (host_if_name);
-
- REPLY_MACRO (VL_API_AF_PACKET_DELETE_REPLY);
-}
-
-static void
- vl_api_af_packet_set_l4_cksum_offload_t_handler
- (vl_api_af_packet_set_l4_cksum_offload_t * mp)
-{
- vl_api_af_packet_delete_reply_t *rmp;
- int rv = 0;
-
- rv = af_packet_set_l4_cksum_offload (ntohl (mp->sw_if_index), mp->set);
- REPLY_MACRO (VL_API_AF_PACKET_SET_L4_CKSUM_OFFLOAD_REPLY);
-}
-
-static void
-af_packet_send_details (vpe_api_main_t * am,
- vl_api_registration_t * reg,
- af_packet_if_detail_t * af_packet_if, u32 context)
-{
- vl_api_af_packet_details_t *mp;
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = htons (REPLY_MSG_ID_BASE + VL_API_AF_PACKET_DETAILS);
- mp->sw_if_index = htonl (af_packet_if->sw_if_index);
- clib_memcpy (mp->host_if_name, af_packet_if->host_if_name,
- MIN (ARRAY_LEN (mp->host_if_name) - 1,
- strlen ((const char *) af_packet_if->host_if_name)));
-
- mp->context = context;
- vl_api_send_msg (reg, (u8 *) mp);
-}
-
-
-static void
-vl_api_af_packet_dump_t_handler (vl_api_af_packet_dump_t * mp)
-{
- int rv;
- vpe_api_main_t *am = &vpe_api_main;
- vl_api_registration_t *reg;
- af_packet_if_detail_t *out_af_packet_ifs = NULL;
- af_packet_if_detail_t *af_packet_if = NULL;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- rv = af_packet_dump_ifs (&out_af_packet_ifs);
- if (rv)
- return;
-
- vec_foreach (af_packet_if, out_af_packet_ifs)
- {
- af_packet_send_details (am, reg, af_packet_if, mp->context);
- }
-
- vec_free (out_af_packet_ifs);
-}
-
-#include <vnet/devices/af_packet/af_packet.api.c>
-static clib_error_t *
-af_packet_api_hookup (vlib_main_t * vm)
-{
- /*
- * Set up the (msg_name, crc, message-id) table
- */
- REPLY_MSG_ID_BASE = setup_message_id_table ();
-
- return 0;
-}
-
-VLIB_API_INIT_FUNCTION (af_packet_api_hookup);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/af_packet/cli.c b/src/vnet/devices/af_packet/cli.c
deleted file mode 100644
index e730659bfcd..00000000000
--- a/src/vnet/devices/af_packet/cli.c
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
- *------------------------------------------------------------------
- * af_packet.c - linux kernel packet interface
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <fcntl.h> /* for open */
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/uio.h> /* for iovec */
-#include <netinet/in.h>
-
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ethernet/ethernet.h>
-
-#include <vnet/devices/af_packet/af_packet.h>
-
-/**
- * @file
- * @brief CLI for Host Interface Device Driver.
- *
- * This file contains the source code for CLI for the host interface.
- */
-
-static clib_error_t *
-af_packet_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- af_packet_create_if_arg_t _arg, *arg = &_arg;
- clib_error_t *error = NULL;
- u8 hwaddr[6];
- int r;
-
- clib_memset (arg, 0, sizeof (*arg));
-
- // Default mode
- arg->mode = AF_PACKET_IF_MODE_ETHERNET;
-
- // Default number of rx/tx queue(s)
- arg->num_rxqs = 1;
- arg->num_txqs = 1;
-
- // Default flags
- arg->flags = AF_PACKET_IF_FLAGS_QDISC_BYPASS | AF_PACKET_IF_FLAGS_CKSUM_GSO;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "name %s", &arg->host_if_name))
- ;
- else if (unformat (line_input, "rx-size %u", &arg->rx_frame_size))
- ;
- else if (unformat (line_input, "tx-size %u", &arg->tx_frame_size))
- ;
- else if (unformat (line_input, "rx-per-block %u",
- &arg->rx_frames_per_block))
- ;
- else if (unformat (line_input, "tx-per-block %u",
- &arg->tx_frames_per_block))
- ;
- else if (unformat (line_input, "num-rx-queues %u", &arg->num_rxqs))
- ;
- else if (unformat (line_input, "num-tx-queues %u", &arg->num_txqs))
- ;
- else if (unformat (line_input, "qdisc-bypass-disable"))
- arg->flags &= ~AF_PACKET_IF_FLAGS_QDISC_BYPASS;
- else if (unformat (line_input, "cksum-gso-disable"))
- arg->flags &= ~AF_PACKET_IF_FLAGS_CKSUM_GSO;
- else if (unformat (line_input, "mode ip"))
- arg->mode = AF_PACKET_IF_MODE_IP;
- else if (unformat (line_input, "hw-addr %U", unformat_ethernet_address,
- hwaddr))
- arg->hw_addr = hwaddr;
- else
- {
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
- goto done;
- }
- }
-
- if (arg->host_if_name == NULL)
- {
- error = clib_error_return (0, "missing host interface name");
- goto done;
- }
-
- r = af_packet_create_if (arg);
-
- if (r == VNET_API_ERROR_SYSCALL_ERROR_1)
- {
- error = clib_error_return (0, "%s (errno %d)", strerror (errno), errno);
- goto done;
- }
-
- if (r == VNET_API_ERROR_INVALID_INTERFACE)
- {
- error = clib_error_return (0, "Invalid interface name");
- goto done;
- }
-
- if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS)
- {
- error = clib_error_return (0, "Interface already exists");
- goto done;
- }
-
- vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (),
- arg->sw_if_index);
-
-done:
- vec_free (arg->host_if_name);
- unformat_free (line_input);
-
- return error;
-}
-
-/*?
- * Create a host interface that will attach to a linux AF_PACKET
- * interface, one side of a veth pair. The veth pair must already
- * exist. Once created, a new host interface will exist in VPP
- * with the name '<em>host-<ifname></em>', where '<em><ifname></em>'
- * is the name of the specified veth pair. Use the
- * '<em>show interface</em>' command to display host interface details.
- *
- * This command has the following optional parameters:
- *
- * - <b>hw-addr <mac-addr></b> - Optional ethernet address, can be in either
- * X:X:X:X:X:X unix or X.X.X cisco format.
- *
- * @cliexpar
- * Example of how to create a host interface tied to one side of an
- * existing linux veth pair named vpp1:
- * @cliexstart{create host-interface name vpp1}
- * host-vpp1
- * @cliexend
- * Once the host interface is created, enable the interface using:
- * @cliexcmd{set interface state host-vpp1 up}
-?*/
-VLIB_CLI_COMMAND (af_packet_create_command, static) = {
- .path = "create host-interface",
- .short_help = "create host-interface name <ifname> [num-rx-queues <n>] "
- "[num-tx-queues <n>] [hw-addr <mac-addr>] [mode ip] "
- "[qdisc-bypass-disable] [cksum-gso-disable]",
- .function = af_packet_create_command_fn,
-};
-
-static clib_error_t *
-af_packet_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- u8 *host_if_name = NULL;
- clib_error_t *error = NULL;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "name %s", &host_if_name))
- ;
- else
- {
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
- goto done;
- }
- }
-
- if (host_if_name == NULL)
- {
- error = clib_error_return (0, "missing host interface name");
- goto done;
- }
-
- af_packet_delete_if (host_if_name);
-
-done:
- vec_free (host_if_name);
- unformat_free (line_input);
-
- return error;
-}
-
-/*?
- * Delete a host interface. Use the linux interface name to identify
- * the host interface to be deleted. In VPP, host interfaces are
- * named as '<em>host-<ifname></em>', where '<em><ifname></em>'
- * is the name of the linux interface.
- *
- * @cliexpar
- * Example of how to delete a host interface named host-vpp1:
- * @cliexcmd{delete host-interface name vpp1}
-?*/
-VLIB_CLI_COMMAND (af_packet_delete_command, static) = {
- .path = "delete host-interface",
- .short_help = "delete host-interface name <ifname>",
- .function = af_packet_delete_command_fn,
-};
-
-static clib_error_t *
-af_packet_set_l4_cksum_offload_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- u8 set = 0;
- clib_error_t *error = NULL;
- vnet_main_t *vnm = vnet_get_main ();
- u32 sw_if_index;
-
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
- &sw_if_index))
- ;
- else if (unformat (line_input, "on"))
- set = 1;
- else if (unformat (line_input, "off"))
- set = 0;
- else
- {
- error = clib_error_return (0, "unknown input '%U'",
- format_unformat_error, line_input);
- goto done;
- }
- }
-
- if (af_packet_set_l4_cksum_offload (sw_if_index, set) < 0)
- error = clib_error_return (0, "not an af_packet interface");
-
-done:
- unformat_free (line_input);
- return error;
-}
-
-/*?
- * Set TCP/UDP offload checksum calculation. Use interface
- * name to identify the interface to set TCP/UDP offload checksum
- * calculation.
- *
- * @cliexpar
- * Example of how to set TCP/UDP offload checksum calculation on host-vpp0:
- * @cliexcmd{set host-interface l4-cksum-offload host-vpp0 off}
- * @cliexcmd{set host-interface l4-cksum-offload host-vpp0 on}
-?*/
-VLIB_CLI_COMMAND (af_packet_set_l4_cksum_offload_command, static) = {
- .path = "set host-interface l4-cksum-offload",
- .short_help = "set host-interface l4-cksum-offload <host-if-name> <on|off>",
- .function = af_packet_set_l4_cksum_offload_command_fn,
-};
-
-clib_error_t *
-af_packet_cli_init (vlib_main_t * vm)
-{
- return 0;
-}
-
-VLIB_INIT_FUNCTION (af_packet_cli_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/af_packet/device.c b/src/vnet/devices/af_packet/device.c
deleted file mode 100644
index 74bc1c8c42c..00000000000
--- a/src/vnet/devices/af_packet/device.c
+++ /dev/null
@@ -1,690 +0,0 @@
-/*
- *------------------------------------------------------------------
- * af_packet.c - linux kernel packet interface
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <linux/if_packet.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <net/if.h>
-#include <net/if_arp.h>
-
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/ip/ip4_packet.h>
-#include <vnet/ip/ip6_packet.h>
-#include <vnet/ip/ip_psh_cksum.h>
-#include <vnet/tcp/tcp_packet.h>
-#include <vnet/udp/udp_packet.h>
-
-#include <vnet/devices/af_packet/af_packet.h>
-#include <vnet/devices/virtio/virtio_std.h>
-
-#define foreach_af_packet_tx_func_error \
-_(FRAME_NOT_READY, "tx frame not ready") \
-_(TXRING_EAGAIN, "tx sendto temporary failure") \
-_(TXRING_FATAL, "tx sendto fatal failure") \
-_(TXRING_OVERRUN, "tx ring overrun")
-
-typedef enum
-{
-#define _(f,s) AF_PACKET_TX_ERROR_##f,
- foreach_af_packet_tx_func_error
-#undef _
- AF_PACKET_TX_N_ERROR,
-} af_packet_tx_func_error_t;
-
-static char *af_packet_tx_func_error_strings[] = {
-#define _(n,s) s,
- foreach_af_packet_tx_func_error
-#undef _
-};
-
-typedef struct
-{
- u32 buffer_index;
- u32 hw_if_index;
- u16 queue_id;
- tpacket3_hdr_t tph;
- vnet_virtio_net_hdr_t vnet_hdr;
- vlib_buffer_t buffer;
-} af_packet_tx_trace_t;
-
-#ifndef CLIB_MARCH_VARIANT
-u8 *
-format_af_packet_device_name (u8 * s, va_list * args)
-{
- u32 i = va_arg (*args, u32);
- af_packet_main_t *apm = &af_packet_main;
- af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, i);
-
- s = format (s, "host-%s", apif->host_if_name);
- return s;
-}
-#endif /* CLIB_MARCH_VARIANT */
-
-static u8 *
-format_af_packet_device (u8 * s, va_list * args)
-{
- u32 dev_instance = va_arg (*args, u32);
- u32 indent = format_get_indent (s);
- int __clib_unused verbose = va_arg (*args, int);
-
- af_packet_main_t *apm = &af_packet_main;
- af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, dev_instance);
- af_packet_queue_t *rx_queue = 0;
- af_packet_queue_t *tx_queue = 0;
-
- s = format (s, "Linux PACKET socket interface");
- s = format (s, "\n%UFEATURES:", format_white_space, indent);
- if (apif->is_qdisc_bypass_enabled)
- s = format (s, "\n%Uqdisc-bpass-enabled", format_white_space, indent + 2);
- if (apif->is_cksum_gso_enabled)
- s = format (s, "\n%Ucksum-gso-enabled", format_white_space, indent + 2);
- if (apif->is_fanout_enabled)
- s = format (s, "\n%Ufanout-enabled", format_white_space, indent + 2);
-
- vec_foreach (rx_queue, apif->rx_queues)
- {
- u32 rx_block_size = rx_queue->rx_req->tp_block_size;
- u32 rx_frame_size = rx_queue->rx_req->tp_frame_size;
- u32 rx_frame_nr = rx_queue->rx_req->tp_frame_nr;
- u32 rx_block_nr = rx_queue->rx_req->tp_block_nr;
-
- s = format (s, "\n%URX Queue %u:", format_white_space, indent,
- rx_queue->queue_id);
- s = format (s, "\n%Ublock size:%d nr:%d frame size:%d nr:%d",
- format_white_space, indent + 2, rx_block_size, rx_block_nr,
- rx_frame_size, rx_frame_nr);
- s = format (s, " next block:%d", rx_queue->next_rx_block);
- if (rx_queue->is_rx_pending)
- {
- s = format (
- s, "\n%UPending Request: num-rx-pkts:%d next-frame-offset:%d",
- format_white_space, indent + 2, rx_queue->num_rx_pkts,
- rx_queue->rx_frame_offset);
- }
- }
-
- vec_foreach (tx_queue, apif->tx_queues)
- {
- clib_spinlock_lock (&tx_queue->lockp);
- u32 tx_block_sz = tx_queue->tx_req->tp_block_size;
- u32 tx_frame_sz = tx_queue->tx_req->tp_frame_size;
- u32 tx_frame_nr = tx_queue->tx_req->tp_frame_nr;
- u32 tx_block_nr = tx_queue->tx_req->tp_block_nr;
- int block = 0;
- int n_send_req = 0, n_avail = 0, n_sending = 0, n_tot = 0, n_wrong = 0;
- u8 *tx_block_start = tx_queue->tx_ring[block];
- u32 tx_frame = tx_queue->next_tx_frame;
- tpacket3_hdr_t *tph;
-
- s = format (s, "\n%UTX Queue %u:", format_white_space, indent,
- tx_queue->queue_id);
- s = format (s, "\n%Ublock size:%d nr:%d frame size:%d nr:%d",
- format_white_space, indent + 2, tx_block_sz, tx_block_nr,
- tx_frame_sz, tx_frame_nr);
- s = format (s, " next frame:%d", tx_queue->next_tx_frame);
-
- do
- {
- tph = (tpacket3_hdr_t *) (tx_block_start + tx_frame * tx_frame_sz);
- tx_frame = (tx_frame + 1) % tx_frame_nr;
- if (tph->tp_status == 0)
- n_avail++;
- else if (tph->tp_status & TP_STATUS_SEND_REQUEST)
- n_send_req++;
- else if (tph->tp_status & TP_STATUS_SENDING)
- n_sending++;
- else
- n_wrong++;
- n_tot++;
- }
- while (tx_frame != tx_queue->next_tx_frame);
- s =
- format (s, "\n%Uavailable:%d request:%d sending:%d wrong:%d total:%d",
- format_white_space, indent + 2, n_avail, n_send_req, n_sending,
- n_wrong, n_tot);
- clib_spinlock_unlock (&tx_queue->lockp);
- }
- return s;
-}
-
-static u8 *
-format_af_packet_tx_trace (u8 *s, va_list *va)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
- af_packet_tx_trace_t *t = va_arg (*va, af_packet_tx_trace_t *);
- u32 indent = format_get_indent (s);
-
- s = format (s, "af_packet: hw_if_index %u tx-queue %u", t->hw_if_index,
- t->queue_id);
-
- s =
- format (s,
- "\n%Utpacket3_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u"
- "\n%Usec 0x%x nsec 0x%x vlan %U"
-#ifdef TP_STATUS_VLAN_TPID_VALID
- " vlan_tpid %u"
-#endif
- ,
- format_white_space, indent + 2, format_white_space, indent + 4,
- t->tph.tp_status, t->tph.tp_len, t->tph.tp_snaplen, t->tph.tp_mac,
- t->tph.tp_net, format_white_space, indent + 4, t->tph.tp_sec,
- t->tph.tp_nsec, format_ethernet_vlan_tci, t->tph.hv1.tp_vlan_tci
-#ifdef TP_STATUS_VLAN_TPID_VALID
- ,
- t->tph.hv1.tp_vlan_tpid
-#endif
- );
-
- s = format (s,
- "\n%Uvnet-hdr:\n%Uflags 0x%02x gso_type 0x%02x hdr_len %u"
- "\n%Ugso_size %u csum_start %u csum_offset %u",
- format_white_space, indent + 2, format_white_space, indent + 4,
- t->vnet_hdr.flags, t->vnet_hdr.gso_type, t->vnet_hdr.hdr_len,
- format_white_space, indent + 4, t->vnet_hdr.gso_size,
- t->vnet_hdr.csum_start, t->vnet_hdr.csum_offset);
-
- s = format (s, "\n%Ubuffer 0x%x:\n%U%U", format_white_space, indent + 2,
- t->buffer_index, format_white_space, indent + 4,
- format_vnet_buffer_no_chain, &t->buffer);
- s = format (s, "\n%U%U", format_white_space, indent + 2,
- format_ethernet_header_with_length, t->buffer.pre_data,
- sizeof (t->buffer.pre_data));
- return s;
-}
-
-static void
-af_packet_tx_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
- vlib_buffer_t *b0, u32 bi, tpacket3_hdr_t *tph,
- vnet_virtio_net_hdr_t *vnet_hdr, u32 hw_if_index,
- u16 queue_id)
-{
- af_packet_tx_trace_t *t;
- t = vlib_add_trace (vm, node, b0, sizeof (t[0]));
- t->hw_if_index = hw_if_index;
- t->queue_id = queue_id;
- t->buffer_index = bi;
-
- clib_memcpy_fast (&t->tph, tph, sizeof (*tph));
- clib_memcpy_fast (&t->vnet_hdr, vnet_hdr, sizeof (*vnet_hdr));
- clib_memcpy_fast (&t->buffer, b0, sizeof (*b0) - sizeof (b0->pre_data));
- clib_memcpy_fast (t->buffer.pre_data, vlib_buffer_get_current (b0),
- sizeof (t->buffer.pre_data));
-}
-
-static_always_inline void
-fill_gso_offload (vlib_buffer_t *b0, vnet_virtio_net_hdr_t *vnet_hdr)
-{
- vnet_buffer_oflags_t oflags = vnet_buffer (b0)->oflags;
- if (b0->flags & VNET_BUFFER_F_IS_IP4)
- {
- ip4_header_t *ip4;
- vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
- vnet_hdr->gso_size = vnet_buffer2 (b0)->gso_size;
- vnet_hdr->hdr_len =
- vnet_buffer (b0)->l4_hdr_offset + vnet_buffer2 (b0)->gso_l4_hdr_sz;
- vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- vnet_hdr->csum_start = vnet_buffer (b0)->l4_hdr_offset; // 0x22;
- vnet_hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
- ip4 = (ip4_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset);
- if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
- ip4->checksum = ip4_header_checksum (ip4);
- }
- else if (b0->flags & VNET_BUFFER_F_IS_IP6)
- {
- vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
- vnet_hdr->gso_size = vnet_buffer2 (b0)->gso_size;
- vnet_hdr->hdr_len =
- vnet_buffer (b0)->l4_hdr_offset + vnet_buffer2 (b0)->gso_l4_hdr_sz;
- vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- vnet_hdr->csum_start = vnet_buffer (b0)->l4_hdr_offset; // 0x36;
- vnet_hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
- }
-}
-
-static_always_inline void
-fill_cksum_offload (vlib_buffer_t *b0, vnet_virtio_net_hdr_t *vnet_hdr)
-{
- vnet_buffer_oflags_t oflags = vnet_buffer (b0)->oflags;
- if (b0->flags & VNET_BUFFER_F_IS_IP4)
- {
- ip4_header_t *ip4;
- ip4 = (ip4_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset);
- if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
- ip4->checksum = ip4_header_checksum (ip4);
- vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- vnet_hdr->csum_start = 0x22;
- if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
- {
- tcp_header_t *tcp =
- (tcp_header_t *) (b0->data + vnet_buffer (b0)->l4_hdr_offset);
- tcp->checksum = ip4_pseudo_header_cksum (ip4);
- vnet_hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
- }
- else if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)
- {
- udp_header_t *udp =
- (udp_header_t *) (b0->data + vnet_buffer (b0)->l4_hdr_offset);
- udp->checksum = ip4_pseudo_header_cksum (ip4);
- vnet_hdr->csum_offset = STRUCT_OFFSET_OF (udp_header_t, checksum);
- }
- }
- else if (b0->flags & VNET_BUFFER_F_IS_IP6)
- {
- ip6_header_t *ip6;
- vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- vnet_hdr->csum_start = 0x36;
- ip6 = (ip6_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset);
- if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
- {
- tcp_header_t *tcp =
- (tcp_header_t *) (b0->data + vnet_buffer (b0)->l4_hdr_offset);
- tcp->checksum = ip6_pseudo_header_cksum (ip6);
- vnet_hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
- }
- else if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)
- {
- udp_header_t *udp =
- (udp_header_t *) (b0->data + vnet_buffer (b0)->l4_hdr_offset);
- udp->checksum = ip6_pseudo_header_cksum (ip6);
- vnet_hdr->csum_offset = STRUCT_OFFSET_OF (udp_header_t, checksum);
- }
- }
-}
-
-VNET_DEVICE_CLASS_TX_FN (af_packet_device_class) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- af_packet_main_t *apm = &af_packet_main;
- vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame);
- u32 *buffers = vlib_frame_vector_args (frame);
- u32 n_left = frame->n_vectors;
- u32 n_sent = 0;
- vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
- af_packet_if_t *apif =
- pool_elt_at_index (apm->interfaces, rd->dev_instance);
- u16 queue_id = tf->queue_id;
- af_packet_queue_t *tx_queue = vec_elt_at_index (apif->tx_queues, queue_id);
- u32 block = 0, frame_size = 0, frame_num = 0, tx_frame = 0;
- u8 *block_start = 0;
- tpacket3_hdr_t *tph = 0;
- u32 frame_not_ready = 0;
- u8 is_cksum_gso_enabled = (apif->is_cksum_gso_enabled == 1) ? 1 : 0;
-
- if (tf->shared_queue)
- clib_spinlock_lock (&tx_queue->lockp);
-
- frame_size = tx_queue->tx_req->tp_frame_size;
- frame_num = tx_queue->tx_req->tp_frame_nr;
- block_start = tx_queue->tx_ring[block];
- tx_frame = tx_queue->next_tx_frame;
-
- while (n_left)
- {
- u32 len;
- vnet_virtio_net_hdr_t *vnet_hdr = 0;
- u32 offset = 0;
- vlib_buffer_t *b0 = 0, *b0_first = 0;
- u32 bi, bi_first;
-
- bi = bi_first = buffers[0];
- n_left--;
- buffers++;
-
- tph = (tpacket3_hdr_t *) (block_start + tx_frame * frame_size);
- if (PREDICT_FALSE (tph->tp_status &
- (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING)))
- {
- frame_not_ready++;
- goto next;
- }
-
- b0_first = b0 = vlib_get_buffer (vm, bi);
-
- if (PREDICT_TRUE (is_cksum_gso_enabled))
- {
- vnet_hdr =
- (vnet_virtio_net_hdr_t *) ((u8 *) tph + TPACKET_ALIGN (sizeof (
- tpacket3_hdr_t)));
-
- clib_memset_u8 (vnet_hdr, 0, sizeof (vnet_virtio_net_hdr_t));
- offset = sizeof (vnet_virtio_net_hdr_t);
-
- if (b0->flags & VNET_BUFFER_F_GSO)
- fill_gso_offload (b0, vnet_hdr);
- else if (b0->flags & VNET_BUFFER_F_OFFLOAD)
- fill_cksum_offload (b0, vnet_hdr);
- }
-
- len = b0->current_length;
- clib_memcpy_fast ((u8 *) tph + TPACKET_ALIGN (sizeof (tpacket3_hdr_t)) +
- offset,
- vlib_buffer_get_current (b0), len);
- offset += len;
-
- while (b0->flags & VLIB_BUFFER_NEXT_PRESENT)
- {
- b0 = vlib_get_buffer (vm, b0->next_buffer);
- len = b0->current_length;
- clib_memcpy_fast ((u8 *) tph +
- TPACKET_ALIGN (sizeof (tpacket3_hdr_t)) + offset,
- vlib_buffer_get_current (b0), len);
- offset += len;
- }
-
- tph->tp_len = tph->tp_snaplen = offset;
- tph->tp_status = TP_STATUS_SEND_REQUEST;
- n_sent++;
-
- if (PREDICT_FALSE (b0_first->flags & VLIB_BUFFER_IS_TRACED))
- {
- if (PREDICT_TRUE (is_cksum_gso_enabled))
- af_packet_tx_trace (vm, node, b0_first, bi_first, tph, vnet_hdr,
- apif->hw_if_index, queue_id);
- else
- {
- vnet_virtio_net_hdr_t vnet_hdr2 = {};
- af_packet_tx_trace (vm, node, b0_first, bi_first, tph,
- &vnet_hdr2, apif->hw_if_index, queue_id);
- }
- }
- tx_frame = (tx_frame + 1) % frame_num;
-
- next:
- /* check if we've exhausted the ring */
- if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num))
- break;
- }
-
- CLIB_MEMORY_BARRIER ();
-
- if (PREDICT_TRUE (n_sent || tx_queue->is_tx_pending))
- {
- tx_queue->next_tx_frame = tx_frame;
- tx_queue->is_tx_pending = 0;
-
- if (PREDICT_FALSE (
- sendto (tx_queue->fd, NULL, 0, MSG_DONTWAIT, NULL, 0) == -1))
- {
- /* Uh-oh, drop & move on, but count whether it was fatal or not.
- * Note that we have no reliable way to properly determine the
- * disposition of the packets we just enqueued for delivery.
- */
- uword counter;
-
- if (unix_error_is_fatal (errno))
- {
- counter = AF_PACKET_TX_ERROR_TXRING_FATAL;
- }
- else
- {
- counter = AF_PACKET_TX_ERROR_TXRING_EAGAIN;
- /* non-fatal error: kick again next time
- * note that you could still end up in a deadlock: if you do not
- * try to send new packets (ie reschedule this tx node), eg.
- * because your peer is waiting for the unsent packets to reply
- * to you but your waiting for its reply etc., you are not going
- * to kick again, and everybody is waiting for the other to talk
- * 1st... */
- tx_queue->is_tx_pending = 1;
- }
-
- vlib_error_count (vm, node->node_index, counter, 1);
- }
- }
-
- if (tf->shared_queue)
- clib_spinlock_unlock (&tx_queue->lockp);
-
- if (PREDICT_FALSE (frame_not_ready))
- vlib_error_count (vm, node->node_index,
- AF_PACKET_TX_ERROR_FRAME_NOT_READY, frame_not_ready);
-
- if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num))
- vlib_error_count (vm, node->node_index, AF_PACKET_TX_ERROR_TXRING_OVERRUN,
- n_left);
-
- vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
- return frame->n_vectors;
-}
-
-static void
-af_packet_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
- u32 node_index)
-{
- af_packet_main_t *apm = &af_packet_main;
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
- af_packet_if_t *apif =
- pool_elt_at_index (apm->interfaces, hw->dev_instance);
-
- /* Shut off redirection */
- if (node_index == ~0)
- {
- apif->per_interface_next_index = node_index;
- return;
- }
-
- apif->per_interface_next_index =
- vlib_node_add_next (vlib_get_main (), af_packet_input_node.index,
- node_index);
-}
-
-static void
-af_packet_clear_hw_interface_counters (u32 instance)
-{
- /* Nothing for now */
-}
-
-static clib_error_t *
-af_packet_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
- u32 flags)
-{
- af_packet_main_t *apm = &af_packet_main;
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
- af_packet_if_t *apif =
- pool_elt_at_index (apm->interfaces, hw->dev_instance);
- u32 hw_flags;
- int rv, fd = socket (AF_UNIX, SOCK_DGRAM, 0);
- struct ifreq ifr;
-
- if (0 > fd)
- {
- vlib_log_warn (apm->log_class, "af_packet_%s could not open socket",
- apif->host_if_name);
- return 0;
- }
-
- /* if interface is a bridge ignore */
- if (apif->host_if_index < 0)
- goto error; /* no error */
-
- /* use host_if_index in case host name has changed */
- ifr.ifr_ifindex = apif->host_if_index;
- if ((rv = ioctl (fd, SIOCGIFNAME, &ifr)) < 0)
- {
- vlib_log_warn (apm->log_class,
- "af_packet_%s ioctl could not retrieve eth name",
- apif->host_if_name);
- goto error;
- }
-
- apif->is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
-
- if ((rv = ioctl (fd, SIOCGIFFLAGS, &ifr)) < 0)
- {
- vlib_log_warn (apm->log_class, "af_packet_%s error: %d",
- apif->is_admin_up ? "up" : "down", rv);
- goto error;
- }
-
- if (apif->is_admin_up)
- {
- hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP;
- ifr.ifr_flags |= IFF_UP;
- }
- else
- {
- hw_flags = 0;
- ifr.ifr_flags &= ~IFF_UP;
- }
-
- if ((rv = ioctl (fd, SIOCSIFFLAGS, &ifr)) < 0)
- {
- vlib_log_warn (apm->log_class, "af_packet_%s error: %d",
- apif->is_admin_up ? "up" : "down", rv);
- goto error;
- }
-
- vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
-
-error:
- if (0 <= fd)
- close (fd);
-
- return 0; /* no error */
-}
-
-static clib_error_t *
-af_packet_subif_add_del_function (vnet_main_t * vnm,
- u32 hw_if_index,
- struct vnet_sw_interface_t *st, int is_add)
-{
- /* Nothing for now */
- return 0;
-}
-
-static clib_error_t *af_packet_set_mac_address_function
- (struct vnet_hw_interface_t *hi, const u8 * old_address, const u8 * address)
-{
- af_packet_main_t *apm = &af_packet_main;
- af_packet_if_t *apif =
- pool_elt_at_index (apm->interfaces, hi->dev_instance);
- int rv, fd;
- struct ifreq ifr;
-
- if (apif->mode == AF_PACKET_IF_MODE_IP)
- {
- vlib_log_warn (apm->log_class, "af_packet_%s interface is in IP mode",
- apif->host_if_name);
- return clib_error_return (0,
- " MAC update failed, interface is in IP mode");
- }
-
- fd = socket (AF_UNIX, SOCK_DGRAM, 0);
- if (0 > fd)
- {
- vlib_log_warn (apm->log_class, "af_packet_%s could not open socket",
- apif->host_if_name);
- return 0;
- }
-
- /* if interface is a bridge ignore */
- if (apif->host_if_index < 0)
- goto error; /* no error */
-
- /* use host_if_index in case host name has changed */
- ifr.ifr_ifindex = apif->host_if_index;
- if ((rv = ioctl (fd, SIOCGIFNAME, &ifr)) < 0)
- {
- vlib_log_warn
- (apm->log_class,
- "af_packet_%s ioctl could not retrieve eth name, error: %d",
- apif->host_if_name, rv);
- goto error;
- }
-
- clib_memcpy (ifr.ifr_hwaddr.sa_data, address, 6);
- ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER;
-
- if ((rv = ioctl (fd, SIOCSIFHWADDR, &ifr)) < 0)
- {
- vlib_log_warn (apm->log_class,
- "af_packet_%s ioctl could not set mac, error: %d",
- apif->host_if_name, rv);
- goto error;
- }
-
-error:
-
- if (0 <= fd)
- close (fd);
-
- return 0; /* no error */
-}
-
-static clib_error_t *
-af_packet_interface_rx_mode_change (vnet_main_t *vnm, u32 hw_if_index, u32 qid,
- vnet_hw_if_rx_mode mode)
-{
- af_packet_main_t *apm = &af_packet_main;
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
- af_packet_if_t *apif;
-
- apif = vec_elt_at_index (apm->interfaces, hw->dev_instance);
-
- if (mode == VNET_HW_IF_RX_MODE_ADAPTIVE)
- {
- vlib_log_err (apm->log_class,
- "af_packet_%s adaptive mode is not supported",
- apif->host_if_name);
- return clib_error_return (
- 0, "af_packet_%s adaptive mode is not supported", apif->host_if_name);
- }
-
- af_packet_queue_t *rx_queue = vec_elt_at_index (apif->rx_queues, qid);
-
- if (rx_queue->mode != mode)
- {
- rx_queue->mode = mode;
-
- if (mode == VNET_HW_IF_RX_MODE_POLLING)
- apm->polling_count++;
- else if (mode == VNET_HW_IF_RX_MODE_INTERRUPT && apm->polling_count > 0)
- apm->polling_count--;
- }
-
- return 0;
-}
-
-VNET_DEVICE_CLASS (af_packet_device_class) = {
- .name = "af-packet",
- .format_device_name = format_af_packet_device_name,
- .format_device = format_af_packet_device,
- .format_tx_trace = format_af_packet_tx_trace,
- .tx_function_n_errors = AF_PACKET_TX_N_ERROR,
- .tx_function_error_strings = af_packet_tx_func_error_strings,
- .rx_redirect_to_node = af_packet_set_interface_next_node,
- .clear_counters = af_packet_clear_hw_interface_counters,
- .admin_up_down_function = af_packet_interface_admin_up_down,
- .subif_add_del_function = af_packet_subif_add_del_function,
- .mac_addr_change_function = af_packet_set_mac_address_function,
- .rx_mode_change_function = af_packet_interface_rx_mode_change,
-};
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/af_packet/dir.dox b/src/vnet/devices/af_packet/dir.dox
deleted file mode 100644
index 78991c6d97f..00000000000
--- a/src/vnet/devices/af_packet/dir.dox
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2016 Cisco and/or its affiliates.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Doxygen directory documentation */
-
-/**
-@dir
-@brief Host Interface Implementation.
-
-This directory contains the source code for Host Interface driver. The
-Host Interface driver leverages the DPDK AF_PACKET driver.
-
-
-*/
-/*? %%clicmd:group_label Host Interface %% ?*/
-/*? %%syscfg:group_label Host Interface %% ?*/
diff --git a/src/vnet/devices/af_packet/node.c b/src/vnet/devices/af_packet/node.c
deleted file mode 100644
index 8c72afb2456..00000000000
--- a/src/vnet/devices/af_packet/node.c
+++ /dev/null
@@ -1,574 +0,0 @@
-/*
- *------------------------------------------------------------------
- * af_packet.c - linux kernel packet interface
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <linux/if_packet.h>
-
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/interface/rx_queue_funcs.h>
-#include <vnet/feature/feature.h>
-#include <vnet/ethernet/packet.h>
-
-#include <vnet/devices/af_packet/af_packet.h>
-#include <vnet/devices/virtio/virtio_std.h>
-
-#define foreach_af_packet_input_error \
- _ (PARTIAL_PKT, "partial packet") \
- _ (TIMEDOUT_BLK, "timed out block") \
- _ (TOTAL_RECV_BLK, "total received block")
-typedef enum
-{
-#define _(f,s) AF_PACKET_INPUT_ERROR_##f,
- foreach_af_packet_input_error
-#undef _
- AF_PACKET_INPUT_N_ERROR,
-} af_packet_input_error_t;
-
-static char *af_packet_input_error_strings[] = {
-#define _(n,s) s,
- foreach_af_packet_input_error
-#undef _
-};
-
-typedef struct
-{
- u32 next_index;
- u32 hw_if_index;
- u16 queue_id;
- int block;
- u32 pkt_num;
- void *block_start;
- block_desc_t bd;
- tpacket3_hdr_t tph;
- vnet_virtio_net_hdr_t vnet_hdr;
-} af_packet_input_trace_t;
-
-static u8 *
-format_af_packet_input_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- af_packet_input_trace_t *t = va_arg (*args, af_packet_input_trace_t *);
- u32 indent = format_get_indent (s);
-
- s = format (s, "af_packet: hw_if_index %d rx-queue %u next-index %d",
- t->hw_if_index, t->queue_id, t->next_index);
-
- s = format (
- s, "\n%Ublock %u:\n%Uaddress %p version %u seq_num %lu pkt_num %u",
- format_white_space, indent + 2, t->block, format_white_space, indent + 4,
- t->block_start, t->bd.version, t->bd.hdr.bh1.seq_num, t->pkt_num);
- s =
- format (s,
- "\n%Utpacket3_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u"
- "\n%Usec 0x%x nsec 0x%x vlan %U"
-#ifdef TP_STATUS_VLAN_TPID_VALID
- " vlan_tpid %u"
-#endif
- ,
- format_white_space, indent + 2, format_white_space, indent + 4,
- t->tph.tp_status, t->tph.tp_len, t->tph.tp_snaplen, t->tph.tp_mac,
- t->tph.tp_net, format_white_space, indent + 4, t->tph.tp_sec,
- t->tph.tp_nsec, format_ethernet_vlan_tci, t->tph.hv1.tp_vlan_tci
-#ifdef TP_STATUS_VLAN_TPID_VALID
- ,
- t->tph.hv1.tp_vlan_tpid
-#endif
- );
-
- s = format (s,
- "\n%Uvnet-hdr:\n%Uflags 0x%02x gso_type 0x%02x hdr_len %u"
- "\n%Ugso_size %u csum_start %u csum_offset %u",
- format_white_space, indent + 2, format_white_space, indent + 4,
- t->vnet_hdr.flags, t->vnet_hdr.gso_type, t->vnet_hdr.hdr_len,
- format_white_space, indent + 4, t->vnet_hdr.gso_size,
- t->vnet_hdr.csum_start, t->vnet_hdr.csum_offset);
- return s;
-}
-
-always_inline void
-buffer_add_to_chain (vlib_buffer_t *b, vlib_buffer_t *first_b,
- vlib_buffer_t *prev_b, u32 bi)
-{
- /* update first buffer */
- first_b->total_length_not_including_first_buffer += b->current_length;
-
- /* update previous buffer */
- prev_b->next_buffer = bi;
- prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
-
- /* update current buffer */
- b->next_buffer = ~0;
-}
-
-static_always_inline void
-fill_gso_offload (vlib_buffer_t *b, u32 gso_size, u8 l4_hdr_sz)
-{
- b->flags |= VNET_BUFFER_F_GSO;
- vnet_buffer2 (b)->gso_size = gso_size;
- vnet_buffer2 (b)->gso_l4_hdr_sz = l4_hdr_sz;
-}
-
-static_always_inline void
-fill_cksum_offload (vlib_buffer_t *b, u8 *l4_hdr_sz, u8 is_ip)
-{
- vnet_buffer_oflags_t oflags = 0;
- u16 l2hdr_sz = 0;
- u16 ethertype = 0;
- u8 l4_proto = 0;
-
- if (is_ip)
- {
- switch (b->data[0] & 0xf0)
- {
- case 0x40:
- ethertype = ETHERNET_TYPE_IP4;
- break;
- case 0x60:
- ethertype = ETHERNET_TYPE_IP6;
- break;
- }
- }
- else
- {
- ethernet_header_t *eth = (ethernet_header_t *) b->data;
- ethertype = clib_net_to_host_u16 (eth->type);
- l2hdr_sz = sizeof (ethernet_header_t);
- if (ethernet_frame_is_tagged (ethertype))
- {
- ethernet_vlan_header_t *vlan = (ethernet_vlan_header_t *) (eth + 1);
-
- ethertype = clib_net_to_host_u16 (vlan->type);
- l2hdr_sz += sizeof (*vlan);
- if (ethertype == ETHERNET_TYPE_VLAN)
- {
- vlan++;
- ethertype = clib_net_to_host_u16 (vlan->type);
- l2hdr_sz += sizeof (*vlan);
- }
- }
- }
-
- vnet_buffer (b)->l2_hdr_offset = 0;
- vnet_buffer (b)->l3_hdr_offset = l2hdr_sz;
-
- if (ethertype == ETHERNET_TYPE_IP4)
- {
- ip4_header_t *ip4 = (ip4_header_t *) (b->data + l2hdr_sz);
- vnet_buffer (b)->l4_hdr_offset = l2hdr_sz + ip4_header_bytes (ip4);
- b->flags |= (VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
- VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
- VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
-
- l4_proto = ip4->protocol;
- }
- else if (ethertype == ETHERNET_TYPE_IP6)
- {
- ip6_header_t *ip6 = (ip6_header_t *) (b->data + l2hdr_sz);
- b->flags |= (VNET_BUFFER_F_IS_IP6 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
- VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
- VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
- u16 ip6_hdr_len = sizeof (ip6_header_t);
-
- if (ip6_ext_hdr (ip6->protocol))
- {
- ip6_ext_header_t *p = (void *) (ip6 + 1);
- ip6_hdr_len += ip6_ext_header_len (p);
- while (ip6_ext_hdr (p->next_hdr))
- {
- ip6_hdr_len += ip6_ext_header_len (p);
- p = ip6_ext_next_header (p);
- }
- l4_proto = p->next_hdr;
- }
- else
- l4_proto = ip6->protocol;
- vnet_buffer (b)->l4_hdr_offset = l2hdr_sz + ip6_hdr_len;
- }
-
- if (l4_proto == IP_PROTOCOL_TCP)
- {
- oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
- tcp_header_t *tcp =
- (tcp_header_t *) (b->data + vnet_buffer (b)->l4_hdr_offset);
- *l4_hdr_sz = tcp_header_bytes (tcp);
- }
- else if (l4_proto == IP_PROTOCOL_UDP)
- {
- oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
- *l4_hdr_sz = sizeof (udp_header_t);
- }
-
- if (oflags)
- vnet_buffer_offload_flags_set (b, oflags);
-}
-
-always_inline uword
-af_packet_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
- vlib_frame_t *frame, af_packet_if_t *apif,
- u16 queue_id, u8 is_cksum_gso_enabled)
-{
- af_packet_main_t *apm = &af_packet_main;
- af_packet_queue_t *rx_queue = vec_elt_at_index (apif->rx_queues, queue_id);
- tpacket3_hdr_t *tph;
- u32 next_index;
- u32 n_free_bufs;
- u32 n_rx_packets = 0;
- u32 n_rx_bytes = 0;
- u32 timedout_blk = 0;
- u32 total = 0;
- u32 *to_next = 0;
- u32 block = rx_queue->next_rx_block;
- u32 block_nr = rx_queue->rx_req->tp_block_nr;
- u8 *block_start = 0;
- uword n_trace = vlib_get_trace_count (vm, node);
- u32 thread_index = vm->thread_index;
- u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm);
- u32 min_bufs = rx_queue->rx_req->tp_frame_size / n_buffer_bytes;
- u32 num_pkts = 0;
- u32 rx_frame_offset = 0;
- block_desc_t *bd = 0;
- vlib_buffer_t bt = {};
- u8 is_ip = (apif->mode == AF_PACKET_IF_MODE_IP);
-
- if (is_ip)
- next_index = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
- else
- {
- next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
- if (PREDICT_FALSE (apif->per_interface_next_index != ~0))
- next_index = apif->per_interface_next_index;
-
- /* redirect if feature path enabled */
- vnet_feature_start_device_input_x1 (apif->sw_if_index, &next_index, &bt);
- }
-
- if ((((block_desc_t *) (block_start = rx_queue->rx_ring[block]))
- ->hdr.bh1.block_status &
- TP_STATUS_USER) != 0)
- {
- u32 n_required = 0;
- bd = (block_desc_t *) block_start;
-
- if (PREDICT_FALSE (rx_queue->is_rx_pending))
- {
- num_pkts = rx_queue->num_rx_pkts;
- rx_frame_offset = rx_queue->rx_frame_offset;
- rx_queue->is_rx_pending = 0;
- }
- else
- {
- num_pkts = bd->hdr.bh1.num_pkts;
- rx_frame_offset = sizeof (block_desc_t);
- total++;
-
- if (TP_STATUS_BLK_TMO & bd->hdr.bh1.block_status)
- timedout_blk++;
- }
-
- n_required = clib_max (num_pkts, VLIB_FRAME_SIZE);
- n_free_bufs = vec_len (apm->rx_buffers[thread_index]);
- if (PREDICT_FALSE (n_free_bufs < n_required))
- {
- vec_validate (apm->rx_buffers[thread_index],
- n_required + n_free_bufs - 1);
- n_free_bufs += vlib_buffer_alloc (
- vm, &apm->rx_buffers[thread_index][n_free_bufs], n_required);
- vec_set_len (apm->rx_buffers[thread_index], n_free_bufs);
- }
-
- while (num_pkts && (n_free_bufs >= min_bufs))
- {
- u32 next0 = next_index;
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (num_pkts && n_left_to_next && (n_free_bufs >= min_bufs))
- {
- tph = (tpacket3_hdr_t *) (block_start + rx_frame_offset);
-
- if (num_pkts > 1)
- CLIB_PREFETCH (block_start + rx_frame_offset +
- tph->tp_next_offset,
- 2 * CLIB_CACHE_LINE_BYTES, LOAD);
-
- vlib_buffer_t *b0 = 0, *first_b0 = 0, *prev_b0 = 0;
- vnet_virtio_net_hdr_t *vnet_hdr = 0;
- u32 data_len = tph->tp_snaplen;
- u32 offset = 0;
- u32 bi0 = ~0, first_bi0 = ~0;
- u8 l4_hdr_sz = 0;
-
- if (is_cksum_gso_enabled)
- vnet_hdr =
- (vnet_virtio_net_hdr_t *) ((u8 *) tph + tph->tp_mac -
- sizeof (vnet_virtio_net_hdr_t));
-
- // save current state and return
- if (PREDICT_FALSE (((data_len / n_buffer_bytes) + 1) >
- vec_len (apm->rx_buffers[thread_index])))
- {
- rx_queue->rx_frame_offset = rx_frame_offset;
- rx_queue->num_rx_pkts = num_pkts;
- rx_queue->is_rx_pending = 1;
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- goto done;
- }
-
- while (data_len)
- {
- /* grab free buffer */
- u32 last_empty_buffer =
- vec_len (apm->rx_buffers[thread_index]) - 1;
- bi0 = apm->rx_buffers[thread_index][last_empty_buffer];
- vec_set_len (apm->rx_buffers[thread_index],
- last_empty_buffer);
- n_free_bufs--;
-
- /* copy data */
- u32 bytes_to_copy =
- data_len > n_buffer_bytes ? n_buffer_bytes : data_len;
- u32 vlan_len = 0;
- u32 bytes_copied = 0;
-
- b0 = vlib_get_buffer (vm, bi0);
- b0->current_data = 0;
-
- /* Kernel removes VLAN headers, so reconstruct VLAN */
- if (PREDICT_FALSE (tph->tp_status & TP_STATUS_VLAN_VALID))
- {
- if (PREDICT_TRUE (offset == 0))
- {
- clib_memcpy_fast (vlib_buffer_get_current (b0),
- (u8 *) tph + tph->tp_mac,
- sizeof (ethernet_header_t));
- ethernet_header_t *eth =
- vlib_buffer_get_current (b0);
- ethernet_vlan_header_t *vlan =
- (ethernet_vlan_header_t *) (eth + 1);
- vlan->priority_cfi_and_id =
- clib_host_to_net_u16 (tph->hv1.tp_vlan_tci);
- vlan->type = eth->type;
- eth->type =
- clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
- vlan_len = sizeof (ethernet_vlan_header_t);
- bytes_copied = sizeof (ethernet_header_t);
- }
- }
- clib_memcpy_fast (((u8 *) vlib_buffer_get_current (b0)) +
- bytes_copied + vlan_len,
- (u8 *) tph + tph->tp_mac + offset +
- bytes_copied,
- (bytes_to_copy - bytes_copied));
-
- /* fill buffer header */
- b0->current_length = bytes_to_copy + vlan_len;
-
- if (offset == 0)
- {
- b0->total_length_not_including_first_buffer = 0;
- b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
- vnet_buffer (b0)->sw_if_index[VLIB_RX] =
- apif->sw_if_index;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~0;
- first_b0 = b0;
- first_bi0 = bi0;
- if (is_cksum_gso_enabled)
- {
- if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
- fill_cksum_offload (first_b0, &l4_hdr_sz, is_ip);
- if (vnet_hdr->gso_type & (VIRTIO_NET_HDR_GSO_TCPV4 |
- VIRTIO_NET_HDR_GSO_TCPV6))
- fill_gso_offload (first_b0, vnet_hdr->gso_size,
- l4_hdr_sz);
- }
- }
- else
- buffer_add_to_chain (b0, first_b0, prev_b0, bi0);
-
- prev_b0 = b0;
- offset += bytes_to_copy;
- data_len -= bytes_to_copy;
- }
- n_rx_packets++;
- n_rx_bytes += tph->tp_snaplen;
- to_next[0] = first_bi0;
- to_next += 1;
- n_left_to_next--;
-
- /* drop partial packets */
- if (PREDICT_FALSE (tph->tp_len != tph->tp_snaplen))
- {
- next0 = VNET_DEVICE_INPUT_NEXT_DROP;
- first_b0->error =
- node->errors[AF_PACKET_INPUT_ERROR_PARTIAL_PKT];
- }
- else
- {
- if (PREDICT_FALSE (apif->mode == AF_PACKET_IF_MODE_IP))
- {
- switch (first_b0->data[0] & 0xf0)
- {
- case 0x40:
- next0 = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
- break;
- case 0x60:
- next0 = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
- break;
- default:
- next0 = VNET_DEVICE_INPUT_NEXT_DROP;
- break;
- }
- if (PREDICT_FALSE (apif->per_interface_next_index != ~0))
- next0 = apif->per_interface_next_index;
- }
- else
- {
- /* copy feature arc data from template */
- first_b0->current_config_index = bt.current_config_index;
- vnet_buffer (first_b0)->feature_arc_index =
- vnet_buffer (&bt)->feature_arc_index;
- }
- }
-
- /* trace */
- if (PREDICT_FALSE (n_trace > 0 &&
- vlib_trace_buffer (vm, node, next0, first_b0,
- /* follow_chain */ 0)))
- {
- af_packet_input_trace_t *tr;
- vlib_set_trace_count (vm, node, --n_trace);
- tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr));
- tr->next_index = next0;
- tr->hw_if_index = apif->hw_if_index;
- tr->queue_id = queue_id;
- tr->block = block;
- tr->block_start = bd;
- tr->pkt_num = bd->hdr.bh1.num_pkts - num_pkts;
- clib_memcpy_fast (&tr->bd, bd, sizeof (block_desc_t));
- clib_memcpy_fast (&tr->tph, tph, sizeof (tpacket3_hdr_t));
- if (is_cksum_gso_enabled)
- clib_memcpy_fast (&tr->vnet_hdr, vnet_hdr,
- sizeof (vnet_virtio_net_hdr_t));
- else
- clib_memset_u8 (&tr->vnet_hdr, 0,
- sizeof (vnet_virtio_net_hdr_t));
- }
-
- /* enque and take next packet */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, first_bi0,
- next0);
-
- /* next packet */
- num_pkts--;
- rx_frame_offset += tph->tp_next_offset;
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- if (PREDICT_TRUE (num_pkts == 0))
- {
- bd->hdr.bh1.block_status = TP_STATUS_KERNEL;
- block = (block + 1) % block_nr;
- }
- else
- {
- rx_queue->rx_frame_offset = rx_frame_offset;
- rx_queue->num_rx_pkts = num_pkts;
- rx_queue->is_rx_pending = 1;
- }
- }
-
- rx_queue->next_rx_block = block;
-
-done:
-
- if (apm->polling_count == 0)
- {
- if ((((block_desc_t *) (block_start = rx_queue->rx_ring[block]))
- ->hdr.bh1.block_status &
- TP_STATUS_USER) != 0)
- vlib_node_set_state (vm, node->node_index, VLIB_NODE_STATE_POLLING);
- else
- vlib_node_set_state (vm, node->node_index, VLIB_NODE_STATE_INTERRUPT);
- }
-
- vlib_error_count (vm, node->node_index, AF_PACKET_INPUT_ERROR_TOTAL_RECV_BLK,
- total);
- vlib_error_count (vm, node->node_index, AF_PACKET_INPUT_ERROR_TIMEDOUT_BLK,
- timedout_blk);
-
- vlib_increment_combined_counter
- (vnet_get_main ()->interface_main.combined_sw_if_counters
- + VNET_INTERFACE_COUNTER_RX,
- vlib_get_thread_index (), apif->hw_if_index, n_rx_packets, n_rx_bytes);
-
- vnet_device_increment_rx_packets (thread_index, n_rx_packets);
- return n_rx_packets;
-}
-
-VLIB_NODE_FN (af_packet_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- u32 n_rx_packets = 0;
- af_packet_main_t *apm = &af_packet_main;
- vnet_hw_if_rxq_poll_vector_t *pv;
- pv = vnet_hw_if_get_rxq_poll_vector (vm, node);
- for (int i = 0; i < vec_len (pv); i++)
- {
- af_packet_if_t *apif;
- apif = vec_elt_at_index (apm->interfaces, pv[i].dev_instance);
- if (apif->is_admin_up)
- {
- if (apif->is_cksum_gso_enabled)
- n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif,
- pv[i].queue_id, 1);
- else
- n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif,
- pv[i].queue_id, 0);
- }
- }
- return n_rx_packets;
-}
-
-VLIB_REGISTER_NODE (af_packet_input_node) = {
- .name = "af-packet-input",
- .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
- .sibling_of = "device-input",
- .format_trace = format_af_packet_input_trace,
- .type = VLIB_NODE_TYPE_INPUT,
- .state = VLIB_NODE_STATE_INTERRUPT,
- .n_errors = AF_PACKET_INPUT_N_ERROR,
- .error_strings = af_packet_input_error_strings,
-};
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c
index 1a4f02df6a8..ee380bebbde 100644
--- a/src/vnet/devices/devices.c
+++ b/src/vnet/devices/devices.c
@@ -29,7 +29,6 @@ device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (device_input_node) = {
.function = device_input_fn,
.name = "device-input",
@@ -40,29 +39,6 @@ VLIB_REGISTER_NODE (device_input_node) = {
.next_nodes = VNET_DEVICE_INPUT_NEXT_NODES,
};
-/* Table defines how much we need to advance current data pointer
- in the buffer if we shortcut to l3 nodes */
-
-const u32 __attribute__((aligned (CLIB_CACHE_LINE_BYTES)))
-device_input_next_node_advance[((VNET_DEVICE_INPUT_N_NEXT_NODES /
- CLIB_CACHE_LINE_BYTES) +1) * CLIB_CACHE_LINE_BYTES] =
-{
- [VNET_DEVICE_INPUT_NEXT_IP4_INPUT] = sizeof (ethernet_header_t),
- [VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT] = sizeof (ethernet_header_t),
- [VNET_DEVICE_INPUT_NEXT_IP6_INPUT] = sizeof (ethernet_header_t),
- [VNET_DEVICE_INPUT_NEXT_MPLS_INPUT] = sizeof (ethernet_header_t),
-};
-
-const u32 __attribute__((aligned (CLIB_CACHE_LINE_BYTES)))
-device_input_next_node_flags[((VNET_DEVICE_INPUT_N_NEXT_NODES /
- CLIB_CACHE_LINE_BYTES) +1) * CLIB_CACHE_LINE_BYTES] =
-{
- [VNET_DEVICE_INPUT_NEXT_IP4_INPUT] = VNET_BUFFER_F_L3_HDR_OFFSET_VALID,
- [VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT] = VNET_BUFFER_F_L3_HDR_OFFSET_VALID,
- [VNET_DEVICE_INPUT_NEXT_IP6_INPUT] = VNET_BUFFER_F_L3_HDR_OFFSET_VALID,
- [VNET_DEVICE_INPUT_NEXT_MPLS_INPUT] = VNET_BUFFER_F_L3_HDR_OFFSET_VALID,
-};
-
VNET_FEATURE_ARC_INIT (device_input, static) =
{
.arc_name = "device-input",
@@ -100,7 +76,6 @@ VNET_FEATURE_INIT (ethernet_input, static) = {
.node_name = "ethernet-input",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
static void
input_rate_collector_fn (vlib_stats_collector_data_t *d)
diff --git a/src/vnet/devices/devices.h b/src/vnet/devices/devices.h
index e54c7a29130..cadf1f857a6 100644
--- a/src/vnet/devices/devices.h
+++ b/src/vnet/devices/devices.h
@@ -67,8 +67,6 @@ typedef struct
extern vnet_device_main_t vnet_device_main;
extern vlib_node_registration_t device_input_node;
-extern const u32 device_input_next_node_advance[];
-extern const u32 device_input_next_node_flags[];
static inline u64
vnet_get_aggregate_rx_packets (void)
diff --git a/src/vnet/devices/netlink.c b/src/vnet/devices/netlink.c
index da21e9adea1..3fd3e13bf77 100644
--- a/src/vnet/devices/netlink.c
+++ b/src/vnet/devices/netlink.c
@@ -20,8 +20,13 @@
#include <fcntl.h>
#include <net/if.h>
+#ifdef __linux__
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
+#elif __FreeBSD__
+#include <netlink/netlink.h>
+#include <netlink/netlink_route.h>
+#endif
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
diff --git a/src/vnet/devices/pipe/pipe.c b/src/vnet/devices/pipe/pipe.c
index 26b01970b6c..9caee2a55cb 100644
--- a/src/vnet/devices/pipe/pipe.c
+++ b/src/vnet/devices/pipe/pipe.c
@@ -83,13 +83,11 @@ pipe_build_rewrite (vnet_main_t * vnm,
return (rewrite);
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (pipe_hw_interface_class) = {
.name = "Pipe",
.build_rewrite = pipe_build_rewrite,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
-/* *INDENT-ON* */
pipe_t *
pipe_get (u32 sw_if_index)
@@ -131,7 +129,7 @@ pipe_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
{
u32 n_left_from, n_left_to_next, n_copy, *from, *to_next;
u32 next_index = VNET_PIPE_TX_NEXT_ETHERNET_INPUT;
- u32 i, sw_if_index = 0, n_pkts = 0;
+ u32 i, sw_if_index = 0;
vlib_buffer_t *b;
pipe_t *pipe;
@@ -159,7 +157,6 @@ pipe_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0;
i++;
- n_pkts++;
}
from += n_copy;
@@ -186,25 +183,21 @@ pipe_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
- /* *INDENT-OFF* */
hi = vnet_get_hw_interface (vnm, hw_if_index);
hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id,
({
vnet_sw_interface_set_flags (vnm, sw_if_index, flags);
}));
- /* *INDENT-ON* */
return (NULL);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (pipe_device_class) = {
.name = "Pipe",
.format_device_name = format_pipe_name,
.tx_function = pipe_tx,
.admin_up_down_function = pipe_admin_up_down,
};
-/* *INDENT-ON* */
#define foreach_pipe_rx_next \
_ (DROP, "error-drop")
@@ -433,7 +426,6 @@ pipe_rx (vlib_main_t * vm,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (pipe_rx_node) = {
.function = pipe_rx,
.name = "pipe-rx",
@@ -443,7 +435,6 @@ VLIB_REGISTER_NODE (pipe_rx_node) = {
.sibling_of = "ethernet-input",
};
-/* *INDENT-ON* */
/*
* Maintain a bitmap of allocated pipe instance numbers.
@@ -626,13 +617,11 @@ pipe_hw_walk (vnet_main_t * vnm, u32 hw_if_index, void *args)
{
u32 pipe_sw_if_index[2], id, sw_if_index;
- /* *INDENT-OFF* */
hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id,
({
ASSERT(id < 2);
pipe_sw_if_index[id] = sw_if_index;
}));
- /* *INDENT-ON* */
ctx->cb (hi->sw_if_index, pipe_sw_if_index, hi->dev_instance, ctx->ctx);
}
@@ -691,13 +680,11 @@ create_pipe_interfaces (vlib_main_t * vm,
* Example of how to create a pipe interface:
* @cliexcmd{pipe create}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (pipe_create_interface_command, static) = {
.path = "pipe create",
.short_help = "pipe create [instance <instance>]",
.function = create_pipe_interfaces,
};
-/* *INDENT-ON* */
int
vnet_delete_pipe_interface (u32 sw_if_index)
@@ -721,13 +708,11 @@ vnet_delete_pipe_interface (u32 sw_if_index)
return VNET_API_ERROR_INVALID_SW_IF_INDEX;
}
- /* *INDENT-OFF* */
hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id,
({
vnet_delete_sub_interface(sw_if_index);
pipe_main.pipes[sw_if_index] = PIPE_INVALID;
}));
- /* *INDENT-ON* */
ethernet_delete_interface (vnm, hw_if_index);
@@ -771,13 +756,11 @@ delete_pipe_interfaces (vlib_main_t * vm,
* Example of how to delete a pipe interface:
* @cliexcmd{pipe delete-interface intfc loop0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (pipe_delete_interface_command, static) = {
.path = "pipe delete",
.short_help = "pipe delete <interface>",
.function = delete_pipe_interfaces,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/devices/pipe/pipe_api.c b/src/vnet/devices/pipe/pipe_api.c
index 1f0faef7c1e..79a4377de83 100644
--- a/src/vnet/devices/pipe/pipe_api.c
+++ b/src/vnet/devices/pipe/pipe_api.c
@@ -42,14 +42,12 @@ vl_api_pipe_create_t_handler (vl_api_pipe_create_t * mp)
rv = vnet_create_pipe_interface (is_specified, user_instance,
&parent_sw_if_index, pipe_sw_if_index);
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_PIPE_CREATE_REPLY,
({
rmp->sw_if_index = ntohl (parent_sw_if_index);
rmp->pipe_sw_if_index[0] = ntohl (pipe_sw_if_index[0]);
rmp->pipe_sw_if_index[1] = ntohl (pipe_sw_if_index[1]);
}));
- /* *INDENT-ON* */
}
static void
diff --git a/src/vnet/devices/tap/FEATURE.yaml b/src/vnet/devices/tap/FEATURE.yaml
index 35ee4885b02..1a774fb0e74 100644
--- a/src/vnet/devices/tap/FEATURE.yaml
+++ b/src/vnet/devices/tap/FEATURE.yaml
@@ -1,6 +1,6 @@
---
name: Tap Device
-maintainer: damarion@cisco.com sluong@cisco.com sykazmi@cisco.com
+maintainer: damarion@cisco.com sluong@cisco.com mohsin.kazmi14@gmail.com
features:
- Virtio
- Persistence
diff --git a/src/vnet/devices/tap/cli.c b/src/vnet/devices/tap/cli.c
index 096a2c46970..5c676d32d60 100644
--- a/src/vnet/devices/tap/cli.c
+++ b/src/vnet/devices/tap/cli.c
@@ -136,7 +136,6 @@ tap_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tap_create_command, static) = {
.path = "create tap",
.short_help =
@@ -150,7 +149,6 @@ VLIB_CLI_COMMAND (tap_create_command, static) = {
"[persist] [attach] [tun] [packed] [in-order]",
.function = tap_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
tap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -191,14 +189,12 @@ tap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tap_delete__command, static) =
{
.path = "delete tap",
.short_help = "delete tap {<interface> | sw_if_index <sw_idx>}",
.function = tap_delete_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
tap_offload_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -261,7 +257,6 @@ tap_offload_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tap_offload_command, static) =
{
.path = "set tap offload",
@@ -270,7 +265,6 @@ VLIB_CLI_COMMAND (tap_offload_command, static) =
"csum-offload-disable>",
.function = tap_offload_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
tap_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -300,10 +294,8 @@ tap_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (vec_len (hw_if_indices) == 0)
{
- /* *INDENT-OFF* */
pool_foreach (vif, mm->interfaces)
vec_add1 (hw_if_indices, vif->hw_if_index);
- /* *INDENT-ON* */
}
virtio_show (vm, hw_if_indices, show_descr, VIRTIO_IF_TYPE_TAP);
@@ -313,13 +305,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tap_show_command, static) = {
.path = "show tap",
.short_help = "show tap {<interface>] [descriptors]",
.function = tap_show_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
tun_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -349,10 +339,8 @@ tun_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (vec_len (hw_if_indices) == 0)
{
- /* *INDENT-OFF* */
pool_foreach (vif, mm->interfaces)
vec_add1 (hw_if_indices, vif->hw_if_index);
- /* *INDENT-ON* */
}
virtio_show (vm, hw_if_indices, show_descr, VIRTIO_IF_TYPE_TUN);
@@ -362,13 +350,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tun_show_command, static) = {
.path = "show tun",
.short_help = "show tun {<interface>] [descriptors]",
.function = tun_show_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
tap_cli_init (vlib_main_t * vm)
diff --git a/src/vnet/devices/tap/tap.c b/src/vnet/devices/tap/tap.c
index 360c001b168..1e2ee87041d 100644
--- a/src/vnet/devices/tap/tap.c
+++ b/src/vnet/devices/tap/tap.c
@@ -97,14 +97,12 @@ tap_free (vlib_main_t * vm, virtio_if_t * vif)
virtio_pre_input_node_disable (vm, vif);
- /* *INDENT-OFF* */
vec_foreach_index (i, vif->vhost_fds) if (vif->vhost_fds[i] != -1)
close (vif->vhost_fds[i]);
vec_foreach_index (i, vif->rxq_vrings)
virtio_vring_free_rx (vm, vif, RX_QUEUE (i));
vec_foreach_index (i, vif->txq_vrings)
virtio_vring_free_tx (vm, vif, TX_QUEUE (i));
- /* *INDENT-ON* */
if (vif->tap_fds)
{
@@ -885,7 +883,6 @@ tap_dump_ifs (tap_interface_details_t ** out_tapids)
tap_interface_details_t *r_tapids = NULL;
tap_interface_details_t *tapid = NULL;
- /* *INDENT-OFF* */
pool_foreach (vif, mm->interfaces) {
if ((vif->type != VIRTIO_IF_TYPE_TAP)
&& (vif->type != VIRTIO_IF_TYPE_TUN))
@@ -929,7 +926,6 @@ tap_dump_ifs (tap_interface_details_t ** out_tapids)
tapid->host_ip6_prefix_len = vif->host_ip6_prefix_len;
tapid->host_mtu_size = vif->host_mtu_size;
}
- /* *INDENT-ON* */
*out_tapids = r_tapids;
diff --git a/src/vnet/devices/tap/tapv2.api b/src/vnet/devices/tap/tapv2.api
index 0ee14511529..bf53d1bc6fe 100644
--- a/src/vnet/devices/tap/tapv2.api
+++ b/src/vnet/devices/tap/tapv2.api
@@ -144,6 +144,8 @@ autoendian define tap_create_v3_reply
*/
define tap_create_v2
{
+ option deprecated;
+
u32 client_index;
u32 context;
u32 id [default=0xffffffff];
@@ -181,6 +183,8 @@ define tap_create_v2
*/
define tap_create_v2_reply
{
+ option deprecated;
+
u32 context;
i32 retval;
vl_api_interface_index_t sw_if_index;
diff --git a/src/vnet/devices/virtio/FEATURE.yaml b/src/vnet/devices/virtio/FEATURE.yaml
index 7b2fb59e1ad..446a45b61a3 100644
--- a/src/vnet/devices/virtio/FEATURE.yaml
+++ b/src/vnet/devices/virtio/FEATURE.yaml
@@ -1,6 +1,6 @@
---
name: Virtio PCI Device
-maintainer: sykazmi@cisco.com sluong@cisco.com
+maintainer: mohsin.kazmi14@gmail.com sluong@cisco.com
features:
- Driver mode to emulate PCI interface presented to VPP from
the host interface.
@@ -11,6 +11,8 @@ features:
- Support multi-queue, GSO, checksum offload, indirect descriptor,
jumbo frame, and packed ring.
- Support virtio 1.1 packed ring in vhost
+ - Support for tx queue size configuration (tested on host kernel 5.15
+ and qemu version 6.2.0)
description: "Virtio implementation"
missing:
- API dump filtering by sw_if_index
diff --git a/src/vnet/devices/virtio/cli.c b/src/vnet/devices/virtio/cli.c
index a78336997e2..c1b6c8be065 100644
--- a/src/vnet/devices/virtio/cli.c
+++ b/src/vnet/devices/virtio/cli.c
@@ -31,6 +31,7 @@ virtio_pci_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
virtio_pci_create_if_args_t args;
u64 feature_mask = (u64) ~ (0ULL);
u32 buffering_size = 0;
+ u32 txq_size = 0;
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
@@ -43,6 +44,8 @@ virtio_pci_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
;
else if (unformat (line_input, "feature-mask 0x%llx", &feature_mask))
args.features = feature_mask;
+ else if (unformat (line_input, "tx-queue-size %u", &txq_size))
+ args.tx_queue_size = txq_size;
else if (unformat (line_input, "gso-enabled"))
args.gso_enabled = 1;
else if (unformat (line_input, "csum-enabled"))
@@ -55,6 +58,10 @@ virtio_pci_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
}
else if (unformat (line_input, "packed"))
args.virtio_flags |= VIRTIO_FLAG_PACKED;
+ else if (unformat (line_input, "bind force"))
+ args.bind = VIRTIO_BIND_FORCE;
+ else if (unformat (line_input, "bind"))
+ args.bind = VIRTIO_BIND_DEFAULT;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
@@ -66,15 +73,14 @@ virtio_pci_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (virtio_pci_create_command, static) = {
.path = "create interface virtio",
.short_help = "create interface virtio <pci-address> "
- "[feature-mask <hex-mask>] [gso-enabled] [csum-enabled] "
- "[buffering [size <buffering-szie>]] [packed]",
+ "[feature-mask <hex-mask>] [tx-queue-size <size>] "
+ "[gso-enabled] [csum-enabled] "
+ "[buffering [size <buffering-szie>]] [packed] [bind [force]]",
.function = virtio_pci_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
virtio_pci_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -120,14 +126,12 @@ virtio_pci_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (virtio_pci_delete_command, static) = {
.path = "delete interface virtio",
.short_help = "delete interface virtio "
"{<interface> | sw_if_index <sw_idx>}",
.function = virtio_pci_delete_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
virtio_pci_enable_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -182,14 +186,12 @@ virtio_pci_enable_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (virtio_pci_enable_command, static) = {
.path = "set virtio pci",
.short_help = "set virtio pci {<interface> | sw_if_index <sw_idx>}"
" [gso-enabled | csum-offload-enabled | offloads-disabled]",
.function = virtio_pci_enable_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_virtio_pci_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -248,13 +250,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_virtio_pci_command, static) = {
.path = "show virtio pci",
.short_help = "show virtio pci [<interface>] [descriptors | desc] [debug-device]",
.function = show_virtio_pci_fn,
};
-/* *INDENT-ON* */
clib_error_t *
virtio_pci_cli_init (vlib_main_t * vm)
diff --git a/src/vnet/devices/virtio/device.c b/src/vnet/devices/virtio/device.c
index ec5cdbd7bdd..112f77e7065 100644
--- a/src/vnet/devices/virtio/device.c
+++ b/src/vnet/devices/virtio/device.c
@@ -309,12 +309,12 @@ set_checksum_offsets (vlib_buffer_t *b, vnet_virtio_net_hdr_v1_t *hdr,
const int is_l2)
{
vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags;
-
+ i16 l4_hdr_offset = vnet_buffer (b)->l4_hdr_offset - b->current_data;
if (b->flags & VNET_BUFFER_F_IS_IP4)
{
ip4_header_t *ip4;
hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- hdr->csum_start = vnet_buffer (b)->l4_hdr_offset; // 0x22;
+ hdr->csum_start = l4_hdr_offset; // 0x22;
/*
* virtio devices do not support IP4 checksum offload. So driver takes
@@ -347,7 +347,7 @@ set_checksum_offsets (vlib_buffer_t *b, vnet_virtio_net_hdr_v1_t *hdr,
{
ip6_header_t *ip6;
hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- hdr->csum_start = vnet_buffer (b)->l4_hdr_offset; // 0x36;
+ hdr->csum_start = l4_hdr_offset; // 0x36;
ip6 = (ip6_header_t *) (b->data + vnet_buffer (b)->l3_hdr_offset);
/*
@@ -376,17 +376,16 @@ set_gso_offsets (vlib_buffer_t *b, vnet_virtio_net_hdr_v1_t *hdr,
const int is_l2)
{
vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags;
+ i16 l4_hdr_offset = vnet_buffer (b)->l4_hdr_offset - b->current_data;
if (b->flags & VNET_BUFFER_F_IS_IP4)
{
ip4_header_t *ip4;
hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
hdr->gso_size = vnet_buffer2 (b)->gso_size;
- hdr->hdr_len = vnet_buffer (b)->l4_hdr_offset -
- vnet_buffer (b)->l2_hdr_offset +
- vnet_buffer2 (b)->gso_l4_hdr_sz;
+ hdr->hdr_len = l4_hdr_offset + vnet_buffer2 (b)->gso_l4_hdr_sz;
hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- hdr->csum_start = vnet_buffer (b)->l4_hdr_offset; // 0x22;
+ hdr->csum_start = l4_hdr_offset; // 0x22;
hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
ip4 = (ip4_header_t *) (b->data + vnet_buffer (b)->l3_hdr_offset);
/*
@@ -400,11 +399,9 @@ set_gso_offsets (vlib_buffer_t *b, vnet_virtio_net_hdr_v1_t *hdr,
{
hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
hdr->gso_size = vnet_buffer2 (b)->gso_size;
- hdr->hdr_len = vnet_buffer (b)->l4_hdr_offset -
- vnet_buffer (b)->l2_hdr_offset +
- vnet_buffer2 (b)->gso_l4_hdr_sz;
+ hdr->hdr_len = l4_hdr_offset + vnet_buffer2 (b)->gso_l4_hdr_sz;
hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- hdr->csum_start = vnet_buffer (b)->l4_hdr_offset; // 0x36;
+ hdr->csum_start = l4_hdr_offset; // 0x36;
hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
}
}
@@ -1205,16 +1202,6 @@ virtio_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
return 0;
}
-static clib_error_t *
-virtio_subif_add_del_function (vnet_main_t * vnm,
- u32 hw_if_index,
- struct vnet_sw_interface_t *st, int is_add)
-{
- /* Nothing for now */
- return 0;
-}
-
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (virtio_device_class) = {
.name = "virtio",
.format_device_name = format_virtio_device_name,
@@ -1225,11 +1212,9 @@ VNET_DEVICE_CLASS (virtio_device_class) = {
.rx_redirect_to_node = virtio_set_interface_next_node,
.clear_counters = virtio_clear_hw_interface_counters,
.admin_up_down_function = virtio_interface_admin_up_down,
- .subif_add_del_function = virtio_subif_add_del_function,
.rx_mode_change_function = virtio_interface_rx_mode_change,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/devices/virtio/node.c b/src/vnet/devices/virtio/node.c
index a022ee5eacc..8c837575cf8 100644
--- a/src/vnet/devices/virtio/node.c
+++ b/src/vnet/devices/virtio/node.c
@@ -19,7 +19,11 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <net/if.h>
+#ifdef __linux__
#include <linux/if_tun.h>
+#elif __FreeBSD__
+#include <net/if_tun.h>
+#endif /* __linux */
#include <sys/ioctl.h>
#include <sys/eventfd.h>
@@ -202,6 +206,19 @@ virtio_get_len (vnet_virtio_vring_t *vring, const int packed, const int hdr_sz,
return vring->used->ring[last & mask].len - hdr_sz;
}
+#define virtio_packed_check_n_left(vring, last) \
+ do \
+ { \
+ vnet_virtio_vring_packed_desc_t *d = &vring->packed_desc[last]; \
+ u16 flags = d->flags; \
+ if ((flags & VRING_DESC_F_AVAIL) != (vring->used_wrap_counter << 7) || \
+ (flags & VRING_DESC_F_USED) != (vring->used_wrap_counter << 15)) \
+ { \
+ n_left = 0; \
+ } \
+ } \
+ while (0)
+
#define increment_last(last, packed, vring) \
do \
{ \
@@ -214,6 +231,29 @@ virtio_get_len (vnet_virtio_vring_t *vring, const int packed, const int hdr_sz,
} \
while (0)
+static_always_inline void
+virtio_device_input_ethernet (vlib_main_t *vm, vlib_node_runtime_t *node,
+ const u32 next_index, const u32 sw_if_index,
+ const u32 hw_if_index)
+{
+ vlib_next_frame_t *nf;
+ vlib_frame_t *f;
+ ethernet_input_frame_t *ef;
+
+ if (PREDICT_FALSE (VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT != next_index))
+ return;
+
+ nf = vlib_node_runtime_get_next_frame (
+ vm, node, VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT);
+ f = vlib_get_frame (vm, nf->frame);
+ f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
+
+ ef = vlib_frame_scalar_args (f);
+ ef->sw_if_index = sw_if_index;
+ ef->hw_if_index = hw_if_index;
+ vlib_frame_no_append (f);
+}
+
static_always_inline uword
virtio_device_input_gso_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_frame_t *frame, virtio_if_t *vif,
@@ -234,6 +274,11 @@ virtio_device_input_gso_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
u16 n_left = virtio_n_left_to_process (vring, packed);
vlib_buffer_t bt = {};
+ if (packed)
+ {
+ virtio_packed_check_n_left (vring, last);
+ }
+
if (n_left == 0)
return 0;
@@ -248,7 +293,7 @@ virtio_device_input_gso_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
next_index = vif->per_interface_next_index;
/* only for l2, redirect if feature path enabled */
- vnet_feature_start_device_input_x1 (vif->sw_if_index, &next_index, &bt);
+ vnet_feature_start_device_input (vif->sw_if_index, &next_index, &bt);
}
while (n_left)
@@ -256,7 +301,7 @@ virtio_device_input_gso_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
u32 n_left_to_next;
u32 next0 = next_index;
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
while (n_left && n_left_to_next)
{
@@ -386,6 +431,8 @@ virtio_device_input_gso_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
n_rx_packets++;
n_rx_bytes += len;
}
+ virtio_device_input_ethernet (vm, node, next_index, vif->sw_if_index,
+ vif->hw_if_index);
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
vring->last_used_idx = last;
@@ -477,7 +524,6 @@ VLIB_NODE_FN (virtio_input_node) (vlib_main_t * vm,
return n_rx;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (virtio_input_node) = {
.name = "virtio-input",
.sibling_of = "device-input",
@@ -488,7 +534,6 @@ VLIB_REGISTER_NODE (virtio_input_node) = {
.n_errors = VIRTIO_INPUT_N_ERROR,
.error_strings = virtio_input_error_strings,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/devices/virtio/pci.c b/src/vnet/devices/virtio/pci.c
index f678c3960bf..6234f64fcfb 100644
--- a/src/vnet/devices/virtio/pci.c
+++ b/src/vnet/devices/virtio/pci.c
@@ -721,8 +721,8 @@ virtio_pci_control_vring_init (vlib_main_t * vm, virtio_if_t * vif,
}
clib_error_t *
-virtio_pci_vring_split_init (vlib_main_t * vm, virtio_if_t * vif,
- u16 queue_num)
+virtio_pci_vring_split_init (vlib_main_t *vm, virtio_if_t *vif, u16 queue_num,
+ u16 txq_size)
{
clib_error_t *error = 0;
u16 queue_size = 0;
@@ -745,6 +745,16 @@ virtio_pci_vring_split_init (vlib_main_t * vm, virtio_if_t * vif,
if (queue_num % 2)
{
+ if (txq_size)
+ {
+ virtio_log_debug (vif, "tx-queue: number %u, default-size %u",
+ queue_num, queue_size);
+ vif->virtio_pci_func->set_queue_size (vm, vif, queue_num, txq_size);
+ queue_size =
+ vif->virtio_pci_func->get_queue_size (vm, vif, queue_num);
+ virtio_log_debug (vif, "tx-queue: number %u, new size %u", queue_num,
+ queue_size);
+ }
vec_validate_aligned (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num),
CLIB_CACHE_LINE_BYTES);
vring = vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num));
@@ -886,12 +896,13 @@ virtio_pci_vring_packed_init (vlib_main_t * vm, virtio_if_t * vif,
}
clib_error_t *
-virtio_pci_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 queue_num)
+virtio_pci_vring_init (vlib_main_t *vm, virtio_if_t *vif, u16 queue_num,
+ u16 txq_size)
{
if (vif->is_packed)
return virtio_pci_vring_packed_init (vm, vif, queue_num);
else
- return virtio_pci_vring_split_init (vm, vif, queue_num);
+ return virtio_pci_vring_split_init (vm, vif, queue_num, txq_size);
}
static void
@@ -1229,7 +1240,7 @@ virtio_pci_device_init (vlib_main_t * vm, virtio_if_t * vif,
for (int i = 0; i < vif->max_queue_pairs; i++)
{
- if ((error = virtio_pci_vring_init (vm, vif, RX_QUEUE (i))))
+ if ((error = virtio_pci_vring_init (vm, vif, RX_QUEUE (i), 0)))
{
args->rv = VNET_API_ERROR_INIT_FAILED;
virtio_log_error (vif, "%s (%u) %s", "error in rxq-queue",
@@ -1244,7 +1255,8 @@ virtio_pci_device_init (vlib_main_t * vm, virtio_if_t * vif,
vif->num_rxqs++;
}
- if ((error = virtio_pci_vring_init (vm, vif, TX_QUEUE (i))))
+ if ((error = virtio_pci_vring_init (vm, vif, TX_QUEUE (i),
+ args->tx_queue_size)))
{
args->rv = VNET_API_ERROR_INIT_FAILED;
virtio_log_error (vif, "%s (%u) %s", "error in txq-queue",
@@ -1328,7 +1340,6 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args)
clib_error_t *error = 0;
u32 interrupt_count = 0;
- /* *INDENT-OFF* */
pool_foreach (vif, vim->interfaces) {
if (vif->pci_addr.as_u32 == args->addr)
{
@@ -1341,7 +1352,24 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args)
return;
}
}
- /* *INDENT-ON* */
+
+ if (args->bind)
+ {
+ vlib_pci_addr_t pci = { .as_u32 = args->addr };
+ error = vlib_pci_bind_to_uio (vm, &pci, (char *) "auto",
+ VIRTIO_BIND_FORCE == args->bind);
+ if (error)
+ {
+ args->rv = VNET_API_ERROR_INVALID_INTERFACE;
+ args->error =
+ clib_error_return (error, "%U: %s", format_vlib_pci_addr, &pci,
+ "error encountered on binding pci device");
+ vlib_log (VLIB_LOG_LEVEL_ERR, vim->log_default, "%U: %s",
+ format_vlib_pci_addr, &pci,
+ "error encountered on binding pci devicee");
+ return;
+ }
+ }
pool_get (vim->interfaces, vif);
vif->dev_instance = vif - vim->interfaces;
@@ -1480,9 +1508,17 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args)
"error encountered during packet buffering init");
goto error;
}
+ /*
+ * packet buffering flag needs to be set 1 before calling the
+ * virtio_pre_input_node_enable but after the successful initialization
+ * of buffering queues above.
+ * Packet buffering flag set to 0 if there will be any error during
+ * buffering initialization.
+ */
+ vif->packet_buffering = 1;
+ virtio_pre_input_node_enable (vm, vif);
}
- virtio_pre_input_node_enable (vm, vif);
virtio_vring_set_rx_queues (vm, vif);
virtio_vring_set_tx_queues (vm, vif);
@@ -1524,17 +1560,19 @@ virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * vif)
vlib_pci_intr_disable (vm, vif->pci_dev_handle);
- for (i = 0; i < vif->max_queue_pairs; i++)
+ if (vif->virtio_pci_func)
{
- vif->virtio_pci_func->del_queue (vm, vif, RX_QUEUE (i));
- vif->virtio_pci_func->del_queue (vm, vif, TX_QUEUE (i));
- }
+ for (i = 0; i < vif->max_queue_pairs; i++)
+ {
+ vif->virtio_pci_func->del_queue (vm, vif, RX_QUEUE (i));
+ vif->virtio_pci_func->del_queue (vm, vif, TX_QUEUE (i));
+ }
- if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ))
- vif->virtio_pci_func->del_queue (vm, vif, vif->max_queue_pairs * 2);
+ if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ))
+ vif->virtio_pci_func->del_queue (vm, vif, vif->max_queue_pairs * 2);
- if (vif->virtio_pci_func)
- vif->virtio_pci_func->device_reset (vm, vif);
+ vif->virtio_pci_func->device_reset (vm, vif);
+ }
if (vif->hw_if_index)
{
@@ -1555,7 +1593,8 @@ virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * vif)
vlib_physmem_free (vm, vring->desc);
}
- virtio_pre_input_node_disable (vm, vif);
+ if (vif->packet_buffering)
+ virtio_pre_input_node_disable (vm, vif);
vec_foreach_index (i, vif->txq_vrings)
{
diff --git a/src/vnet/devices/virtio/pci.h b/src/vnet/devices/virtio/pci.h
index db20537bc3f..5eb80f823be 100644
--- a/src/vnet/devices/virtio/pci.h
+++ b/src/vnet/devices/virtio/pci.h
@@ -154,13 +154,11 @@ typedef struct
* and an ack/status response in the last entry. Data for the
* command goes in between.
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
u8 class;
u8 cmd;
}) virtio_net_ctrl_hdr_t;
-/* *INDENT-ON* */
typedef u8 virtio_net_ctrl_ack_t;
@@ -227,6 +225,13 @@ typedef enum
#undef _
} virtio_flag_t;
+typedef enum
+{
+ VIRTIO_BIND_NONE = 0,
+ VIRTIO_BIND_DEFAULT = 1,
+ VIRTIO_BIND_FORCE = 2,
+} __clib_packed virtio_bind_t;
+
typedef struct
{
u32 addr;
@@ -238,6 +243,8 @@ typedef struct
u64 features;
u8 gso_enabled;
u8 checksum_offload_enabled;
+ u32 tx_queue_size;
+ virtio_bind_t bind;
u32 buffering_size;
u32 virtio_flags;
clib_error_t *error;
diff --git a/src/vnet/devices/virtio/vhost_user.api b/src/vnet/devices/virtio/vhost_user.api
deleted file mode 100644
index b026ba768a9..00000000000
--- a/src/vnet/devices/virtio/vhost_user.api
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Copyright (c) 2015-2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-option version = "4.1.1";
-
-import "vnet/interface_types.api";
-import "vnet/ethernet/ethernet_types.api";
-import "vnet/devices/virtio/virtio_types.api";
-
-/** \brief vhost-user interface create request
- @param client_index - opaque cookie to identify the sender
- @param is_server - our side is socket server
- @param sock_filename - unix socket filename, used to speak with frontend
- @param use_custom_mac - enable or disable the use of the provided hardware address
- @param disable_mrg_rxbuf - disable the use of merge receive buffers
- @param disable_indirect_desc - disable the use of indirect descriptors which driver can use
- @param enable_gso - enable gso support (default 0)
- @param enable_packed - enable packed ring support (default 0)
- @param mac_address - hardware address to use if 'use_custom_mac' is set
-*/
-define create_vhost_user_if
-{
- option deprecated;
- u32 client_index;
- u32 context;
- bool is_server;
- string sock_filename[256];
- bool renumber;
- bool disable_mrg_rxbuf;
- bool disable_indirect_desc;
- bool enable_gso;
- bool enable_packed;
- u32 custom_dev_instance;
- bool use_custom_mac;
- vl_api_mac_address_t mac_address;
- string tag[64];
-};
-
-/** \brief vhost-user interface create response
- @param context - sender context, to match reply w/ request
- @param retval - return code for the request
- @param sw_if_index - interface the operation is applied to
-*/
-define create_vhost_user_if_reply
-{
- option deprecated;
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
-};
-
-/** \brief vhost-user interface modify request
- @param client_index - opaque cookie to identify the sender
- @param is_server - our side is socket server
- @param sock_filename - unix socket filename, used to speak with frontend
- @param enable_gso - enable gso support (default 0)
- @param enable_packed - enable packed ring support (default 0)
-*/
-autoreply define modify_vhost_user_if
-{
- option deprecated;
- u32 client_index;
- u32 context;
- vl_api_interface_index_t sw_if_index;
- bool is_server;
- string sock_filename[256];
- bool renumber;
- bool enable_gso;
- bool enable_packed;
- u32 custom_dev_instance;
-};
-
-/** \brief vhost-user interface create request
- @param client_index - opaque cookie to identify the sender
- @param is_server - our side is socket server
- @param sock_filename - unix socket filename, used to speak with frontend
- @param use_custom_mac - enable or disable the use of the provided hardware address
- @param disable_mrg_rxbuf - disable the use of merge receive buffers
- @param disable_indirect_desc - disable the use of indirect descriptors which driver can use
- @param enable_gso - enable gso support (default 0)
- @param enable_packed - enable packed ring support (default 0)
- @param enable_event_idx - enable event_idx support (default 0)
- @param mac_address - hardware address to use if 'use_custom_mac' is set
- @param renumber - if true, use custom_dev_instance is valid
- @param custom_dev_instance - custom device instance number
-*/
-define create_vhost_user_if_v2
-{
- u32 client_index;
- u32 context;
- bool is_server;
- string sock_filename[256];
- bool renumber;
- bool disable_mrg_rxbuf;
- bool disable_indirect_desc;
- bool enable_gso;
- bool enable_packed;
- bool enable_event_idx;
- u32 custom_dev_instance;
- bool use_custom_mac;
- vl_api_mac_address_t mac_address;
- string tag[64];
-};
-
-/** \brief vhost-user interface create response
- @param context - sender context, to match reply w/ request
- @param retval - return code for the request
- @param sw_if_index - interface the operation is applied to
-*/
-define create_vhost_user_if_v2_reply
-{
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
-};
-
-/** \brief vhost-user interface modify request
- @param client_index - opaque cookie to identify the sender
- @param is_server - our side is socket server
- @param sock_filename - unix socket filename, used to speak with frontend
- @param enable_gso - enable gso support (default 0)
- @param enable_packed - enable packed ring support (default 0)
- @param enable_event_idx - enable event idx support (default 0)
- @param renumber - if true, use custom_dev_instance is valid
- @param custom_dev_instance - custom device instance number
-*/
-autoreply define modify_vhost_user_if_v2
-{
- u32 client_index;
- u32 context;
- vl_api_interface_index_t sw_if_index;
- bool is_server;
- string sock_filename[256];
- bool renumber;
- bool enable_gso;
- bool enable_packed;
- bool enable_event_idx;
- u32 custom_dev_instance;
-};
-
-/** \brief vhost-user interface delete request
- @param client_index - opaque cookie to identify the sender
-*/
-autoreply define delete_vhost_user_if
-{
- u32 client_index;
- u32 context;
- vl_api_interface_index_t sw_if_index;
-};
-
-/** \brief Vhost-user interface details structure (fix this)
- @param sw_if_index - index of the interface
- @param interface_name - name of interface
- @param virtio_net_hdr_sz - net header size
- @param features_first_32 - interface features, first 32 bits
- @param features_last_32 - interface features, last 32 bits
- @param is_server - vhost-user server socket
- @param sock_filename - socket filename
- @param num_regions - number of used memory regions
- @param sock_errno - socket errno
-*/
-define sw_interface_vhost_user_details
-{
- u32 context;
- vl_api_interface_index_t sw_if_index;
- string interface_name[64];
- u32 virtio_net_hdr_sz;
- vl_api_virtio_net_features_first_32_t features_first_32;
- vl_api_virtio_net_features_last_32_t features_last_32;
- bool is_server;
- string sock_filename[256];
- u32 num_regions;
- i32 sock_errno;
-};
-
-/** \brief Vhost-user interface dump request
- @param sw_if_index - filter by sw_if_index
-*/
-define sw_interface_vhost_user_dump
-{
- u32 client_index;
- u32 context;
- vl_api_interface_index_t sw_if_index [default=0xffffffff];
-};
-/*
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/virtio/vhost_user.c b/src/vnet/devices/virtio/vhost_user.c
deleted file mode 100644
index b6e0806db90..00000000000
--- a/src/vnet/devices/virtio/vhost_user.c
+++ /dev/null
@@ -1,2613 +0,0 @@
-/*
- *------------------------------------------------------------------
- * vhost.c - vhost-user
- *
- * Copyright (c) 2014-2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <fcntl.h> /* for open */
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/uio.h> /* for iovec */
-#include <netinet/in.h>
-#include <sys/vfs.h>
-
-#include <linux/if_arp.h>
-#include <linux/if_tun.h>
-
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/devices/devices.h>
-#include <vnet/feature/feature.h>
-#include <vnet/interface/rx_queue_funcs.h>
-#include <vnet/interface/tx_queue_funcs.h>
-
-#include <vnet/devices/virtio/vhost_user.h>
-#include <vnet/devices/virtio/vhost_user_inline.h>
-
-/**
- * @file
- * @brief vHost User Device Driver.
- *
- * This file contains the source code for vHost User interface.
- */
-
-
-vlib_node_registration_t vhost_user_send_interrupt_node;
-
-/* *INDENT-OFF* */
-vhost_user_main_t vhost_user_main = {
- .mtu_bytes = 1518,
-};
-
-VNET_HW_INTERFACE_CLASS (vhost_interface_class, static) = {
- .name = "vhost-user",
-};
-/* *INDENT-ON* */
-
-static long
-get_huge_page_size (int fd)
-{
- struct statfs s;
- fstatfs (fd, &s);
- return s.f_bsize;
-}
-
-static void
-unmap_all_mem_regions (vhost_user_intf_t * vui)
-{
- int i, r, q;
- vhost_user_vring_t *vq;
-
- for (i = 0; i < vui->nregions; i++)
- {
- if (vui->region_mmap_addr[i] != MAP_FAILED)
- {
-
- long page_sz = get_huge_page_size (vui->region_mmap_fd[i]);
-
- ssize_t map_sz = (vui->regions[i].memory_size +
- vui->regions[i].mmap_offset +
- page_sz - 1) & ~(page_sz - 1);
-
- r =
- munmap (vui->region_mmap_addr[i] - vui->regions[i].mmap_offset,
- map_sz);
-
- vu_log_debug (vui, "unmap memory region %d addr 0x%lx len 0x%lx "
- "page_sz 0x%x", i, vui->region_mmap_addr[i], map_sz,
- page_sz);
-
- vui->region_mmap_addr[i] = MAP_FAILED;
-
- if (r == -1)
- {
- vu_log_err (vui, "failed to unmap memory region (errno %d)",
- errno);
- }
- close (vui->region_mmap_fd[i]);
- }
- }
- vui->nregions = 0;
-
- FOR_ALL_VHOST_RX_TXQ (q, vui)
- {
- vq = &vui->vrings[q];
- vq->avail = 0;
- vq->used = 0;
- vq->desc = 0;
- }
-}
-
-static_always_inline void
-vhost_user_tx_thread_placement (vhost_user_intf_t *vui, u32 qid)
-{
- vnet_main_t *vnm = vnet_get_main ();
- vhost_user_vring_t *rxvq = &vui->vrings[qid];
- u32 q = qid >> 1, rxvq_count;
-
- ASSERT ((qid & 1) == 0);
- if (!rxvq->started || !rxvq->enabled)
- return;
-
- rxvq_count = (qid >> 1) + 1;
- if (rxvq->queue_index == ~0)
- {
- rxvq->queue_index =
- vnet_hw_if_register_tx_queue (vnm, vui->hw_if_index, q);
- rxvq->qid = q;
- }
-
- FOR_ALL_VHOST_RXQ (q, vui)
- {
- vhost_user_vring_t *rxvq = &vui->vrings[q];
- u32 qi = rxvq->queue_index;
-
- if (rxvq->queue_index == ~0)
- break;
- for (u32 i = 0; i < vlib_get_n_threads (); i++)
- vnet_hw_if_tx_queue_unassign_thread (vnm, qi, i);
- }
-
- for (u32 i = 0; i < vlib_get_n_threads (); i++)
- {
- vhost_user_vring_t *rxvq =
- &vui->vrings[VHOST_VRING_IDX_RX (i % rxvq_count)];
- u32 qi = rxvq->queue_index;
-
- vnet_hw_if_tx_queue_assign_thread (vnm, qi, i);
- }
-
- vnet_hw_if_update_runtime_data (vnm, vui->hw_if_index);
-}
-
-/**
- * @brief Unassign existing interface/queue to thread mappings and re-assign
- * new interface/queue to thread mappings
- */
-static_always_inline void
-vhost_user_rx_thread_placement (vhost_user_intf_t * vui, u32 qid)
-{
- vhost_user_vring_t *txvq = &vui->vrings[qid];
- vnet_main_t *vnm = vnet_get_main ();
- int rv;
- u32 q = qid >> 1;
- vhost_user_main_t *vum = &vhost_user_main;
-
- ASSERT ((qid & 1) == 1); // should be odd
- // Assign new queue mappings for the interface
- if (txvq->queue_index != ~0)
- return;
- vnet_hw_if_set_input_node (vnm, vui->hw_if_index,
- vhost_user_input_node.index);
- txvq->queue_index = vnet_hw_if_register_rx_queue (vnm, vui->hw_if_index, q,
- VNET_HW_IF_RXQ_THREAD_ANY);
- txvq->thread_index =
- vnet_hw_if_get_rx_queue_thread_index (vnm, txvq->queue_index);
-
- if (txvq->mode == VNET_HW_IF_RX_MODE_UNKNOWN)
- /* Set polling as the default */
- txvq->mode = VNET_HW_IF_RX_MODE_POLLING;
- if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING)
- {
- vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, txvq->thread_index);
- /* Keep a polling queue count for each thread */
- cpu->polling_q_count++;
- }
- txvq->qid = q;
- rv = vnet_hw_if_set_rx_queue_mode (vnm, txvq->queue_index, txvq->mode);
- if (rv)
- vu_log_warn (vui, "unable to set rx mode for interface %d, "
- "queue %d: rc=%d", vui->hw_if_index, q, rv);
- vnet_hw_if_update_runtime_data (vnm, vui->hw_if_index);
-}
-
-/** @brief Returns whether at least one TX and one RX vring are enabled */
-static_always_inline int
-vhost_user_intf_ready (vhost_user_intf_t * vui)
-{
- int i, found[2] = { }; //RX + TX
-
- for (i = 0; i < vui->num_qid; i++)
- if (vui->vrings[i].started && vui->vrings[i].enabled)
- found[i & 1] = 1;
-
- return found[0] && found[1];
-}
-
-static_always_inline void
-vhost_user_update_iface_state (vhost_user_intf_t * vui)
-{
- /* if we have pointers to descriptor table, go up */
- int is_ready = vhost_user_intf_ready (vui);
- if (is_ready != vui->is_ready)
- {
- vu_log_debug (vui, "interface %d %s", vui->sw_if_index,
- is_ready ? "ready" : "down");
- if (vui->admin_up)
- vnet_hw_interface_set_flags (vnet_get_main (), vui->hw_if_index,
- is_ready ? VNET_HW_INTERFACE_FLAG_LINK_UP
- : 0);
- vui->is_ready = is_ready;
- }
-}
-
-static clib_error_t *
-vhost_user_callfd_read_ready (clib_file_t * uf)
-{
- __attribute__ ((unused)) int n;
- u8 buff[8];
-
- n = read (uf->file_descriptor, ((char *) &buff), 8);
-
- return 0;
-}
-
-static_always_inline void
-vhost_user_thread_placement (vhost_user_intf_t * vui, u32 qid)
-{
- if (qid & 1) // RX is odd, TX is even
- {
- if (vui->vrings[qid].queue_index == ~0)
- vhost_user_rx_thread_placement (vui, qid);
- }
- else
- vhost_user_tx_thread_placement (vui, qid);
-}
-
-static clib_error_t *
-vhost_user_kickfd_read_ready (clib_file_t * uf)
-{
- __attribute__ ((unused)) ssize_t n;
- u8 buff[8];
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui =
- pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data >> 8);
- u32 qid = uf->private_data & 0xff;
- u32 is_txq = qid & 1;
- vhost_user_vring_t *vq = &vui->vrings[qid];
- vnet_main_t *vnm = vnet_get_main ();
-
- n = read (uf->file_descriptor, buff, 8);
- if (vq->started == 0)
- {
- vq->started = 1;
- vhost_user_thread_placement (vui, qid);
- vhost_user_update_iface_state (vui);
- if (is_txq)
- vnet_hw_if_set_rx_queue_file_index (vnm, vq->queue_index,
- vq->kickfd_idx);
- }
-
- if (is_txq && (vq->mode != VNET_HW_IF_RX_MODE_POLLING) &&
- vhost_user_intf_ready (vui))
- {
- vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, vq->thread_index);
- /*
- * If the thread has more than 1 queue and the other queue is in polling
- * mode, there is no need to trigger an interrupt
- */
- if (cpu->polling_q_count == 0)
- vnet_hw_if_rx_queue_set_int_pending (vnm, vq->queue_index);
- }
-
- return 0;
-}
-
-static_always_inline void
-vhost_user_vring_init (vhost_user_intf_t * vui, u32 qid)
-{
- vhost_user_vring_t *vring = &vui->vrings[qid];
-
- clib_memset (vring, 0, sizeof (*vring));
- vring->kickfd_idx = ~0;
- vring->callfd_idx = ~0;
- vring->errfd = -1;
- vring->qid = -1;
- vring->queue_index = ~0;
- vring->thread_index = ~0;
- vring->mode = VNET_HW_IF_RX_MODE_POLLING;
-
- clib_spinlock_init (&vring->vring_lock);
-
- /*
- * We have a bug with some qemu 2.5, and this may be a fix.
- * Feel like interpretation holy text, but this is from vhost-user.txt.
- * "
- * One queue pair is enabled initially. More queues are enabled
- * dynamically, by sending message VHOST_USER_SET_VRING_ENABLE.
- * "
- * Don't know who's right, but this is what DPDK does.
- */
- if (qid == 0 || qid == 1)
- vring->enabled = 1;
-}
-
-static_always_inline void
-vhost_user_vring_close (vhost_user_intf_t * vui, u32 qid)
-{
- vhost_user_vring_t *vring = &vui->vrings[qid];
-
- if (vring->kickfd_idx != ~0)
- {
- clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
- vring->kickfd_idx);
- clib_file_del (&file_main, uf);
- vring->kickfd_idx = ~0;
- }
- if (vring->callfd_idx != ~0)
- {
- clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
- vring->callfd_idx);
- clib_file_del (&file_main, uf);
- vring->callfd_idx = ~0;
- }
- if (vring->errfd != -1)
- {
- close (vring->errfd);
- vring->errfd = -1;
- }
-
- clib_spinlock_free (&vring->vring_lock);
-
- // save the needed information in vrings prior to being wiped out
- u16 q = vui->vrings[qid].qid;
- u32 queue_index = vui->vrings[qid].queue_index;
- u32 mode = vui->vrings[qid].mode;
- u32 thread_index = vui->vrings[qid].thread_index;
- vhost_user_vring_init (vui, qid);
- vui->vrings[qid].qid = q;
- vui->vrings[qid].queue_index = queue_index;
- vui->vrings[qid].mode = mode;
- vui->vrings[qid].thread_index = thread_index;
-}
-
-static_always_inline void
-vhost_user_if_disconnect (vhost_user_intf_t * vui)
-{
- vnet_main_t *vnm = vnet_get_main ();
- int q;
-
- vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
-
- if (vui->clib_file_index != ~0)
- {
- clib_file_del (&file_main, file_main.file_pool + vui->clib_file_index);
- vui->clib_file_index = ~0;
- }
-
- vui->is_ready = 0;
-
- FOR_ALL_VHOST_RX_TXQ (q, vui) { vhost_user_vring_close (vui, q); }
-
- unmap_all_mem_regions (vui);
- vu_log_debug (vui, "interface ifindex %d disconnected", vui->sw_if_index);
-}
-
-void
-vhost_user_set_operation_mode (vhost_user_intf_t *vui,
- vhost_user_vring_t *txvq)
-{
- if (vhost_user_is_packed_ring_supported (vui))
- {
- if (txvq->used_event)
- {
- if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING)
- txvq->used_event->flags = VRING_EVENT_F_DISABLE;
- else
- txvq->used_event->flags = 0;
- }
- }
- else
- {
- if (txvq->used)
- {
- if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING)
- txvq->used->flags = VRING_USED_F_NO_NOTIFY;
- else
- txvq->used->flags = 0;
- }
- }
-}
-
-static clib_error_t *
-vhost_user_socket_read (clib_file_t * uf)
-{
- int n, i, j;
- int fd, number_of_fds = 0;
- int fds[VHOST_MEMORY_MAX_NREGIONS];
- vhost_user_msg_t msg;
- struct msghdr mh;
- struct iovec iov[1];
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui;
- struct cmsghdr *cmsg;
- u8 q;
- clib_file_t template = { 0 };
- vnet_main_t *vnm = vnet_get_main ();
- vlib_main_t *vm = vlib_get_main ();
-
- vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
-
- char control[CMSG_SPACE (VHOST_MEMORY_MAX_NREGIONS * sizeof (int))];
-
- clib_memset (&mh, 0, sizeof (mh));
- clib_memset (control, 0, sizeof (control));
-
- for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++)
- fds[i] = -1;
-
- /* set the payload */
- iov[0].iov_base = (void *) &msg;
- iov[0].iov_len = VHOST_USER_MSG_HDR_SZ;
-
- mh.msg_iov = iov;
- mh.msg_iovlen = 1;
- mh.msg_control = control;
- mh.msg_controllen = sizeof (control);
-
- n = recvmsg (uf->file_descriptor, &mh, 0);
-
- if (n != VHOST_USER_MSG_HDR_SZ)
- {
- if (n == -1)
- {
- vu_log_debug (vui, "recvmsg returned error %d %s", errno,
- strerror (errno));
- }
- else
- {
- vu_log_debug (vui, "n (%d) != VHOST_USER_MSG_HDR_SZ (%d)",
- n, VHOST_USER_MSG_HDR_SZ);
- }
- goto close_socket;
- }
-
- if (mh.msg_flags & MSG_CTRUNC)
- {
- vu_log_debug (vui, "MSG_CTRUNC is set");
- goto close_socket;
- }
-
- cmsg = CMSG_FIRSTHDR (&mh);
-
- if (cmsg && (cmsg->cmsg_len > 0) && (cmsg->cmsg_level == SOL_SOCKET) &&
- (cmsg->cmsg_type == SCM_RIGHTS) &&
- (cmsg->cmsg_len - CMSG_LEN (0) <=
- VHOST_MEMORY_MAX_NREGIONS * sizeof (int)))
- {
- number_of_fds = (cmsg->cmsg_len - CMSG_LEN (0)) / sizeof (int);
- clib_memcpy_fast (fds, CMSG_DATA (cmsg), number_of_fds * sizeof (int));
- }
-
- /* version 1, no reply bit set */
- if ((msg.flags & 7) != 1)
- {
- vu_log_debug (vui, "malformed message received. closing socket");
- goto close_socket;
- }
-
- {
- int rv;
- rv =
- read (uf->file_descriptor, ((char *) &msg) + VHOST_USER_MSG_HDR_SZ,
- msg.size);
- if (rv < 0)
- {
- vu_log_debug (vui, "read failed %s", strerror (errno));
- goto close_socket;
- }
- else if (rv != msg.size)
- {
- vu_log_debug (vui, "message too short (read %dB should be %dB)", rv,
- msg.size);
- goto close_socket;
- }
- }
-
- switch (msg.request)
- {
- case VHOST_USER_GET_FEATURES:
- msg.flags |= 4;
- msg.u64 = VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF) |
- VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ) |
- VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT) |
- VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC) |
- VIRTIO_FEATURE (VHOST_F_LOG_ALL) |
- VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_ANNOUNCE) |
- VIRTIO_FEATURE (VIRTIO_NET_F_MQ) |
- VIRTIO_FEATURE (VHOST_USER_F_PROTOCOL_FEATURES) |
- VIRTIO_FEATURE (VIRTIO_F_VERSION_1);
- msg.u64 &= vui->feature_mask;
-
- if (vui->enable_event_idx)
- msg.u64 |= VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX);
- if (vui->enable_gso)
- msg.u64 |= FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS;
- if (vui->enable_packed)
- msg.u64 |= VIRTIO_FEATURE (VIRTIO_F_RING_PACKED);
-
- msg.size = sizeof (msg.u64);
- vu_log_debug (vui, "if %d msg VHOST_USER_GET_FEATURES - reply "
- "0x%016llx", vui->hw_if_index, msg.u64);
- n =
- send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
- if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
- {
- vu_log_debug (vui, "could not send message response");
- goto close_socket;
- }
- break;
-
- case VHOST_USER_SET_FEATURES:
- vu_log_debug (vui, "if %d msg VHOST_USER_SET_FEATURES features "
- "0x%016llx", vui->hw_if_index, msg.u64);
-
- vui->features = msg.u64;
-
- if (vui->features &
- (VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF) |
- VIRTIO_FEATURE (VIRTIO_F_VERSION_1)))
- vui->virtio_net_hdr_sz = 12;
- else
- vui->virtio_net_hdr_sz = 10;
-
- vui->is_any_layout =
- (vui->features & VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT)) ? 1 : 0;
-
- ASSERT (vui->virtio_net_hdr_sz < VLIB_BUFFER_PRE_DATA_SIZE);
- if (vui->enable_gso &&
- ((vui->features & FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS)
- == FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS))
- {
- vnet_hw_if_set_caps (vnm, vui->hw_if_index,
- VNET_HW_IF_CAP_TCP_GSO |
- VNET_HW_IF_CAP_TX_TCP_CKSUM |
- VNET_HW_IF_CAP_TX_UDP_CKSUM);
- }
- else
- {
- vnet_hw_if_unset_caps (vnm, vui->hw_if_index,
- VNET_HW_IF_CAP_TCP_GSO |
- VNET_HW_IF_CAP_L4_TX_CKSUM);
- }
- vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
- vui->is_ready = 0;
- vhost_user_update_iface_state (vui);
- break;
-
- case VHOST_USER_SET_MEM_TABLE:
- vu_log_debug (vui, "if %d msg VHOST_USER_SET_MEM_TABLE nregions %d",
- vui->hw_if_index, msg.memory.nregions);
-
- if ((msg.memory.nregions < 1) ||
- (msg.memory.nregions > VHOST_MEMORY_MAX_NREGIONS))
- {
- vu_log_debug (vui, "number of mem regions must be between 1 and %i",
- VHOST_MEMORY_MAX_NREGIONS);
- goto close_socket;
- }
-
- if (msg.memory.nregions != number_of_fds)
- {
- vu_log_debug (vui, "each memory region must have FD");
- goto close_socket;
- }
-
- /* Do the mmap without barrier sync */
- void *region_mmap_addr[VHOST_MEMORY_MAX_NREGIONS];
- for (i = 0; i < msg.memory.nregions; i++)
- {
- long page_sz = get_huge_page_size (fds[i]);
-
- /* align size to page */
- ssize_t map_sz = (msg.memory.regions[i].memory_size +
- msg.memory.regions[i].mmap_offset +
- page_sz - 1) & ~(page_sz - 1);
-
- region_mmap_addr[i] = mmap (0, map_sz, PROT_READ | PROT_WRITE,
- MAP_SHARED, fds[i], 0);
- if (region_mmap_addr[i] == MAP_FAILED)
- {
- vu_log_err (vui, "failed to map memory. errno is %d", errno);
- for (j = 0; j < i; j++)
- munmap (region_mmap_addr[j], map_sz);
- goto close_socket;
- }
- vu_log_debug (vui, "map memory region %d addr 0 len 0x%lx fd %d "
- "mapped 0x%lx page_sz 0x%x", i, map_sz, fds[i],
- region_mmap_addr[i], page_sz);
- }
-
- vlib_worker_thread_barrier_sync (vm);
- unmap_all_mem_regions (vui);
- for (i = 0; i < msg.memory.nregions; i++)
- {
- clib_memcpy_fast (&(vui->regions[i]), &msg.memory.regions[i],
- sizeof (vhost_user_memory_region_t));
-
- vui->region_mmap_addr[i] = region_mmap_addr[i];
- vui->region_guest_addr_lo[i] = vui->regions[i].guest_phys_addr;
- vui->region_guest_addr_hi[i] = vui->regions[i].guest_phys_addr +
- vui->regions[i].memory_size;
-
- vui->region_mmap_addr[i] += vui->regions[i].mmap_offset;
- vui->region_mmap_fd[i] = fds[i];
-
- vui->nregions++;
- }
-
- /*
- * Re-compute desc, used, and avail descriptor table if vring address
- * is set.
- */
- FOR_ALL_VHOST_RX_TXQ (q, vui)
- {
- if (vui->vrings[q].desc_user_addr && vui->vrings[q].used_user_addr &&
- vui->vrings[q].avail_user_addr)
- {
- vui->vrings[q].desc =
- map_user_mem (vui, vui->vrings[q].desc_user_addr);
- vui->vrings[q].used =
- map_user_mem (vui, vui->vrings[q].used_user_addr);
- vui->vrings[q].avail =
- map_user_mem (vui, vui->vrings[q].avail_user_addr);
- }
- }
- vlib_worker_thread_barrier_release (vm);
- break;
-
- case VHOST_USER_SET_VRING_NUM:
- vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_NUM idx %d num %d",
- vui->hw_if_index, msg.state.index, msg.state.num);
-
- if ((msg.state.num > 32768) || /* maximum ring size is 32768 */
- (msg.state.num == 0) || /* it cannot be zero */
- ((msg.state.num - 1) & msg.state.num) || /* must be power of 2 */
- (msg.state.index >= vui->num_qid))
- {
- vu_log_debug (vui, "invalid VHOST_USER_SET_VRING_NUM: msg.state.num"
- " %d, msg.state.index %d, curruent max q %d",
- msg.state.num, msg.state.index, vui->num_qid);
- goto close_socket;
- }
- vui->vrings[msg.state.index].qsz_mask = msg.state.num - 1;
- break;
-
- case VHOST_USER_SET_VRING_ADDR:
- vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_ADDR idx %d",
- vui->hw_if_index, msg.state.index);
-
- if (msg.state.index >= vui->num_qid)
- {
- vu_log_debug (vui, "invalid vring index VHOST_USER_SET_VRING_ADDR:"
- " %u >= %u", msg.state.index, vui->num_qid);
- goto close_socket;
- }
-
- if (msg.size < sizeof (msg.addr))
- {
- vu_log_debug (vui, "vhost message is too short (%d < %d)",
- msg.size, sizeof (msg.addr));
- goto close_socket;
- }
-
- vnet_virtio_vring_desc_t *desc =
- map_user_mem (vui, msg.addr.desc_user_addr);
- vnet_virtio_vring_used_t *used =
- map_user_mem (vui, msg.addr.used_user_addr);
- vnet_virtio_vring_avail_t *avail =
- map_user_mem (vui, msg.addr.avail_user_addr);
-
- if ((desc == NULL) || (used == NULL) || (avail == NULL))
- {
- vu_log_debug (vui, "failed to map user memory for hw_if_index %d",
- vui->hw_if_index);
- goto close_socket;
- }
-
- vui->vrings[msg.state.index].desc_user_addr = msg.addr.desc_user_addr;
- vui->vrings[msg.state.index].used_user_addr = msg.addr.used_user_addr;
- vui->vrings[msg.state.index].avail_user_addr = msg.addr.avail_user_addr;
-
- vlib_worker_thread_barrier_sync (vm);
- vui->vrings[msg.state.index].desc = desc;
- vui->vrings[msg.state.index].used = used;
- vui->vrings[msg.state.index].avail = avail;
-
- vui->vrings[msg.state.index].log_guest_addr = msg.addr.log_guest_addr;
- vui->vrings[msg.state.index].log_used =
- (msg.addr.flags & (1 << VHOST_VRING_F_LOG)) ? 1 : 0;
-
- /* Spec says: If VHOST_USER_F_PROTOCOL_FEATURES has not been negotiated,
- the ring is initialized in an enabled state. */
- if (!(vui->features & VIRTIO_FEATURE (VHOST_USER_F_PROTOCOL_FEATURES)))
- vui->vrings[msg.state.index].enabled = 1;
-
- vui->vrings[msg.state.index].last_used_idx =
- vui->vrings[msg.state.index].last_avail_idx =
- vui->vrings[msg.state.index].used->idx;
- vui->vrings[msg.state.index].last_kick =
- vui->vrings[msg.state.index].last_used_idx;
-
- /* tell driver that we want interrupts or not */
- vhost_user_set_operation_mode (vui, &vui->vrings[msg.state.index]);
- vlib_worker_thread_barrier_release (vm);
- vhost_user_update_iface_state (vui);
- break;
-
- case VHOST_USER_SET_OWNER:
- vu_log_debug (vui, "if %d msg VHOST_USER_SET_OWNER", vui->hw_if_index);
- break;
-
- case VHOST_USER_RESET_OWNER:
- vu_log_debug (vui, "if %d msg VHOST_USER_RESET_OWNER",
- vui->hw_if_index);
- break;
-
- case VHOST_USER_SET_VRING_CALL:
- vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_CALL %d",
- vui->hw_if_index, msg.u64);
-
- q = (u8) (msg.u64 & 0xFF);
- if (vui->num_qid > q)
- {
- /* if there is old fd, delete and close it */
- if (vui->vrings[q].callfd_idx != ~0)
- {
- clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
- vui->vrings[q].callfd_idx);
- clib_file_del (&file_main, uf);
- vui->vrings[q].callfd_idx = ~0;
- }
- }
- else if (vec_len (vui->vrings) > q)
- {
- /* grow vrings by pair (RX + TX) */
- vui->num_qid = (q & 1) ? (q + 1) : (q + 2);
- }
- else
- {
- u32 i, new_max_q, old_max_q = vec_len (vui->vrings);
-
- /*
- * Double the array size if it is less than 64 entries.
- * Slow down thereafter.
- */
- if (vec_len (vui->vrings) < (VHOST_VRING_INIT_MQ_PAIR_SZ << 3))
- new_max_q = vec_len (vui->vrings) << 1;
- else
- new_max_q = vec_len (vui->vrings) +
- (VHOST_VRING_INIT_MQ_PAIR_SZ << 2);
- if (new_max_q > (VHOST_VRING_MAX_MQ_PAIR_SZ << 1))
- new_max_q = (VHOST_VRING_MAX_MQ_PAIR_SZ << 1);
-
- /* sync with the worker threads, vrings may move due to realloc */
- vlib_worker_thread_barrier_sync (vm);
- vec_validate_aligned (vui->vrings, new_max_q - 1,
- CLIB_CACHE_LINE_BYTES);
- vlib_worker_thread_barrier_release (vm);
-
- for (i = old_max_q; i < vec_len (vui->vrings); i++)
- vhost_user_vring_init (vui, i);
-
- /* grow vrings by pair (RX + TX) */
- vui->num_qid = (q & 1) ? (q + 1) : (q + 2);
- }
-
- if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK))
- {
- if (number_of_fds != 1)
- {
- vu_log_debug (vui, "More than one fd received !");
- goto close_socket;
- }
-
- template.read_function = vhost_user_callfd_read_ready;
- template.file_descriptor = fds[0];
- template.private_data =
- ((vui - vhost_user_main.vhost_user_interfaces) << 8) + q;
- template.description = format (0, "vhost user");
- vui->vrings[q].callfd_idx = clib_file_add (&file_main, &template);
- }
- else
- vui->vrings[q].callfd_idx = ~0;
- break;
-
- case VHOST_USER_SET_VRING_KICK:
- vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_KICK %d",
- vui->hw_if_index, msg.u64);
-
- q = (u8) (msg.u64 & 0xFF);
- if (q >= vui->num_qid)
- {
- vu_log_debug (vui, "invalid vring index VHOST_USER_SET_VRING_KICK:"
- " %u >= %u", q, vui->num_qid);
- goto close_socket;
- }
-
- if (vui->vrings[q].kickfd_idx != ~0)
- {
- clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
- vui->vrings[q].kickfd_idx);
- clib_file_del (&file_main, uf);
- vui->vrings[q].kickfd_idx = ~0;
- }
-
- if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK))
- {
- if (number_of_fds != 1)
- {
- vu_log_debug (vui, "More than one fd received !");
- goto close_socket;
- }
-
- template.read_function = vhost_user_kickfd_read_ready;
- template.file_descriptor = fds[0];
- template.private_data =
- (((uword) (vui - vhost_user_main.vhost_user_interfaces)) << 8) +
- q;
- vui->vrings[q].kickfd_idx = clib_file_add (&file_main, &template);
- }
- else
- {
- //When no kickfd is set, the queue is initialized as started
- vui->vrings[q].kickfd_idx = ~0;
- vui->vrings[q].started = 1;
- vhost_user_thread_placement (vui, q);
- }
- vhost_user_update_iface_state (vui);
- break;
-
- case VHOST_USER_SET_VRING_ERR:
- vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_ERR %d",
- vui->hw_if_index, msg.u64);
-
- q = (u8) (msg.u64 & 0xFF);
- if (q >= vui->num_qid)
- {
- vu_log_debug (vui, "invalid vring index VHOST_USER_SET_VRING_ERR:"
- " %u >= %u", q, vui->num_qid);
- goto close_socket;
- }
-
- if (vui->vrings[q].errfd != -1)
- close (vui->vrings[q].errfd);
-
- if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK))
- {
- if (number_of_fds != 1)
- goto close_socket;
-
- vui->vrings[q].errfd = fds[0];
- }
- else
- vui->vrings[q].errfd = -1;
- break;
-
- case VHOST_USER_SET_VRING_BASE:
- vu_log_debug (vui,
- "if %d msg VHOST_USER_SET_VRING_BASE idx %d num 0x%x",
- vui->hw_if_index, msg.state.index, msg.state.num);
- if (msg.state.index >= vui->num_qid)
- {
- vu_log_debug (vui, "invalid vring index VHOST_USER_SET_VRING_ADDR:"
- " %u >= %u", msg.state.index, vui->num_qid);
- goto close_socket;
- }
- vlib_worker_thread_barrier_sync (vm);
- vui->vrings[msg.state.index].last_avail_idx = msg.state.num;
- if (vhost_user_is_packed_ring_supported (vui))
- {
- /*
- * 0 1 2 3
- * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | last avail idx | | last used idx | |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * ^ ^
- * | |
- * avail wrap counter used wrap counter
- */
- /* last avail idx at bit 0-14. */
- vui->vrings[msg.state.index].last_avail_idx =
- msg.state.num & 0x7fff;
- /* avail wrap counter at bit 15 */
- vui->vrings[msg.state.index].avail_wrap_counter =
- ! !(msg.state.num & (1 << 15));
-
- /*
- * Although last_used_idx is passed in the upper 16 bits in qemu
- * implementation, in practice, last_avail_idx and last_used_idx are
- * usually the same. As a result, DPDK does not bother to pass us
- * last_used_idx. The spec is not clear on thex coding. I figured it
- * out by reading the qemu code. So let's just read last_avail_idx
- * and set last_used_idx equals to last_avail_idx.
- */
- vui->vrings[msg.state.index].last_used_idx =
- vui->vrings[msg.state.index].last_avail_idx;
- vui->vrings[msg.state.index].last_kick =
- vui->vrings[msg.state.index].last_used_idx;
- vui->vrings[msg.state.index].used_wrap_counter =
- vui->vrings[msg.state.index].avail_wrap_counter;
-
- if (vui->vrings[msg.state.index].avail_wrap_counter == 1)
- vui->vrings[msg.state.index].avail_wrap_counter =
- VRING_DESC_F_AVAIL;
- }
- vlib_worker_thread_barrier_release (vm);
- break;
-
- case VHOST_USER_GET_VRING_BASE:
- if (msg.state.index >= vui->num_qid)
- {
- vu_log_debug (vui, "invalid vring index VHOST_USER_GET_VRING_BASE:"
- " %u >= %u", msg.state.index, vui->num_qid);
- goto close_socket;
- }
-
- /* protection is needed to prevent rx/tx from changing last_avail_idx */
- vlib_worker_thread_barrier_sync (vm);
- /*
- * Copy last_avail_idx from the vring before closing it because
- * closing the vring also initializes the vring last_avail_idx
- */
- msg.state.num = vui->vrings[msg.state.index].last_avail_idx;
- if (vhost_user_is_packed_ring_supported (vui))
- {
- msg.state.num =
- (vui->vrings[msg.state.index].last_avail_idx & 0x7fff) |
- (! !vui->vrings[msg.state.index].avail_wrap_counter << 15);
- msg.state.num |=
- ((vui->vrings[msg.state.index].last_used_idx & 0x7fff) |
- (! !vui->vrings[msg.state.index].used_wrap_counter << 15)) << 16;
- }
- msg.flags |= 4;
- msg.size = sizeof (msg.state);
-
- /*
- * Spec says: Client must [...] stop ring upon receiving
- * VHOST_USER_GET_VRING_BASE
- */
- vhost_user_vring_close (vui, msg.state.index);
- vlib_worker_thread_barrier_release (vm);
- vu_log_debug (vui,
- "if %d msg VHOST_USER_GET_VRING_BASE idx %d num 0x%x",
- vui->hw_if_index, msg.state.index, msg.state.num);
- n =
- send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
- if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
- {
- vu_log_debug (vui, "could not send message response");
- goto close_socket;
- }
- vhost_user_update_iface_state (vui);
- break;
-
- case VHOST_USER_NONE:
- vu_log_debug (vui, "if %d msg VHOST_USER_NONE", vui->hw_if_index);
- break;
-
- case VHOST_USER_SET_LOG_BASE:
- vu_log_debug (vui, "if %d msg VHOST_USER_SET_LOG_BASE",
- vui->hw_if_index);
-
- if (msg.size != sizeof (msg.log))
- {
- vu_log_debug (vui, "invalid msg size for VHOST_USER_SET_LOG_BASE:"
- " %d instead of %d", msg.size, sizeof (msg.log));
- goto close_socket;
- }
-
- if (!(vui->protocol_features & (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD)))
- {
- vu_log_debug (vui, "VHOST_USER_PROTOCOL_F_LOG_SHMFD not set but "
- "VHOST_USER_SET_LOG_BASE received");
- goto close_socket;
- }
-
- fd = fds[0];
- /* align size to page */
- long page_sz = get_huge_page_size (fd);
- ssize_t map_sz =
- (msg.log.size + msg.log.offset + page_sz - 1) & ~(page_sz - 1);
-
- void *log_base_addr = mmap (0, map_sz, PROT_READ | PROT_WRITE,
- MAP_SHARED, fd, 0);
-
- vu_log_debug (vui, "map log region addr 0 len 0x%lx off 0x%lx fd %d "
- "mapped 0x%lx", map_sz, msg.log.offset, fd,
- log_base_addr);
-
- if (log_base_addr == MAP_FAILED)
- {
- vu_log_err (vui, "failed to map memory. errno is %d", errno);
- goto close_socket;
- }
-
- vlib_worker_thread_barrier_sync (vm);
- vui->log_base_addr = log_base_addr;
- vui->log_base_addr += msg.log.offset;
- vui->log_size = msg.log.size;
- vlib_worker_thread_barrier_release (vm);
-
- msg.flags |= 4;
- msg.size = sizeof (msg.u64);
- n =
- send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
- if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
- {
- vu_log_debug (vui, "could not send message response");
- goto close_socket;
- }
- break;
-
- case VHOST_USER_SET_LOG_FD:
- vu_log_debug (vui, "if %d msg VHOST_USER_SET_LOG_FD", vui->hw_if_index);
- break;
-
- case VHOST_USER_GET_PROTOCOL_FEATURES:
- msg.flags |= 4;
- msg.u64 = (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |
- (1 << VHOST_USER_PROTOCOL_F_MQ);
- msg.size = sizeof (msg.u64);
- vu_log_debug (vui, "if %d msg VHOST_USER_GET_PROTOCOL_FEATURES - "
- "reply 0x%016llx", vui->hw_if_index, msg.u64);
- n =
- send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
- if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
- {
- vu_log_debug (vui, "could not send message response");
- goto close_socket;
- }
- break;
-
- case VHOST_USER_SET_PROTOCOL_FEATURES:
- vu_log_debug (vui, "if %d msg VHOST_USER_SET_PROTOCOL_FEATURES "
- "features 0x%016llx", vui->hw_if_index, msg.u64);
- vui->protocol_features = msg.u64;
- break;
-
- case VHOST_USER_GET_QUEUE_NUM:
- msg.flags |= 4;
- msg.u64 = VHOST_VRING_MAX_MQ_PAIR_SZ;
- msg.size = sizeof (msg.u64);
- vu_log_debug (vui, "if %d msg VHOST_USER_GET_QUEUE_NUM - reply %d",
- vui->hw_if_index, msg.u64);
- n =
- send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
- if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
- {
- vu_log_debug (vui, "could not send message response");
- goto close_socket;
- }
- break;
-
- case VHOST_USER_SET_VRING_ENABLE:
- vu_log_debug (vui, "if %d VHOST_USER_SET_VRING_ENABLE: %s queue %d",
- vui->hw_if_index, msg.state.num ? "enable" : "disable",
- msg.state.index);
- if (msg.state.index >= vui->num_qid)
- {
- vu_log_debug (vui, "invalid vring idx VHOST_USER_SET_VRING_ENABLE:"
- " %u >= %u", msg.state.index, vui->num_qid);
- goto close_socket;
- }
-
- vui->vrings[msg.state.index].enabled = msg.state.num;
- vhost_user_thread_placement (vui, msg.state.index);
- vhost_user_update_iface_state (vui);
- break;
-
- default:
- vu_log_debug (vui, "unknown vhost-user message %d received. "
- "closing socket", msg.request);
- goto close_socket;
- }
-
- return 0;
-
-close_socket:
- vlib_worker_thread_barrier_sync (vm);
- vhost_user_if_disconnect (vui);
- vlib_worker_thread_barrier_release (vm);
- vhost_user_update_iface_state (vui);
- return 0;
-}
-
-static clib_error_t *
-vhost_user_socket_error (clib_file_t * uf)
-{
- vlib_main_t *vm = vlib_get_main ();
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui =
- pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
-
- vu_log_debug (vui, "socket error on if %d", vui->sw_if_index);
- vlib_worker_thread_barrier_sync (vm);
- vhost_user_if_disconnect (vui);
- vlib_worker_thread_barrier_release (vm);
- return 0;
-}
-
-static clib_error_t *
-vhost_user_socksvr_accept_ready (clib_file_t * uf)
-{
- int client_fd, client_len;
- struct sockaddr_un client;
- clib_file_t template = { 0 };
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui;
-
- vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
-
- client_len = sizeof (client);
- client_fd = accept (uf->file_descriptor,
- (struct sockaddr *) &client,
- (socklen_t *) & client_len);
-
- if (client_fd < 0)
- return clib_error_return_unix (0, "accept");
-
- if (vui->clib_file_index != ~0)
- {
- vu_log_debug (vui, "Close client socket for vhost interface %d, fd %d",
- vui->sw_if_index, UNIX_GET_FD (vui->clib_file_index));
- clib_file_del (&file_main, file_main.file_pool + vui->clib_file_index);
- }
-
- vu_log_debug (vui, "New client socket for vhost interface %d, fd %d",
- vui->sw_if_index, client_fd);
- template.read_function = vhost_user_socket_read;
- template.error_function = vhost_user_socket_error;
- template.file_descriptor = client_fd;
- template.private_data = vui - vhost_user_main.vhost_user_interfaces;
- template.description = format (0, "vhost interface %d", vui->sw_if_index);
- vui->clib_file_index = clib_file_add (&file_main, &template);
- vui->num_qid = 2;
- return 0;
-}
-
-static clib_error_t *
-vhost_user_init (vlib_main_t * vm)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
-
- vum->log_default = vlib_log_register_class ("vhost-user", 0);
-
- vum->coalesce_frames = 32;
- vum->coalesce_time = 1e-3;
-
- vec_validate (vum->cpus, tm->n_vlib_mains - 1);
-
- vhost_cpu_t *cpu;
- vec_foreach (cpu, vum->cpus)
- {
- /* This is actually not necessary as validate already zeroes it
- * Just keeping the loop here for later because I am lazy. */
- cpu->rx_buffers_len = 0;
- }
-
- vum->random = random_default_seed ();
-
- mhash_init_c_string (&vum->if_index_by_sock_name, sizeof (uword));
-
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_INIT_FUNCTION (vhost_user_init) =
-{
- .runs_after = VLIB_INITS("ip4_init"),
-};
-/* *INDENT-ON* */
-
-static uword
-vhost_user_send_interrupt_process (vlib_main_t * vm,
- vlib_node_runtime_t * rt, vlib_frame_t * f)
-{
- vhost_user_intf_t *vui;
- f64 timeout = 3153600000.0 /* 100 years */ ;
- uword event_type, *event_data = 0;
- vhost_user_main_t *vum = &vhost_user_main;
- u16 qid;
- f64 now, poll_time_remaining;
- f64 next_timeout;
- u8 stop_timer = 0;
-
- while (1)
- {
- poll_time_remaining =
- vlib_process_wait_for_event_or_clock (vm, timeout);
- event_type = vlib_process_get_events (vm, &event_data);
- vec_reset_length (event_data);
-
- /*
- * Use the remaining timeout if it is less than coalesce time to avoid
- * resetting the existing timer in the middle of expiration
- */
- timeout = poll_time_remaining;
- if (vlib_process_suspend_time_is_zero (timeout) ||
- (timeout > vum->coalesce_time))
- timeout = vum->coalesce_time;
-
- now = vlib_time_now (vm);
- switch (event_type)
- {
- case VHOST_USER_EVENT_STOP_TIMER:
- stop_timer = 1;
- break;
-
- case VHOST_USER_EVENT_START_TIMER:
- stop_timer = 0;
- timeout = 1e-3;
- if (!vlib_process_suspend_time_is_zero (poll_time_remaining))
- break;
- /* fall through */
-
- case ~0:
- /* *INDENT-OFF* */
- pool_foreach (vui, vum->vhost_user_interfaces) {
- next_timeout = timeout;
- FOR_ALL_VHOST_RX_TXQ (qid, vui)
- {
- vhost_user_vring_t *vq = &vui->vrings[qid];
-
- if (vq->started == 0)
- continue;
- if (vq->n_since_last_int)
- {
- if (now >= vq->int_deadline)
- vhost_user_send_call (vm, vui, vq);
- else
- next_timeout = vq->int_deadline - now;
- }
-
- if ((next_timeout < timeout) && (next_timeout > 0.0))
- timeout = next_timeout;
- }
- }
- /* *INDENT-ON* */
- break;
-
- default:
- clib_warning ("BUG: unhandled event type %d", event_type);
- break;
- }
- /* No less than 1 millisecond */
- if (timeout < 1e-3)
- timeout = 1e-3;
- if (stop_timer)
- timeout = 3153600000.0;
- }
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (vhost_user_send_interrupt_node) = {
- .function = vhost_user_send_interrupt_process,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "vhost-user-send-interrupt-process",
-};
-/* *INDENT-ON* */
-
-static uword
-vhost_user_process (vlib_main_t * vm,
- vlib_node_runtime_t * rt, vlib_frame_t * f)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui;
- struct sockaddr_un sun;
- int sockfd;
- clib_file_t template = { 0 };
- f64 timeout = 3153600000.0 /* 100 years */ ;
- uword *event_data = 0;
-
- sockfd = -1;
- sun.sun_family = AF_UNIX;
- template.read_function = vhost_user_socket_read;
- template.error_function = vhost_user_socket_error;
-
- while (1)
- {
- vlib_process_wait_for_event_or_clock (vm, timeout);
- vlib_process_get_events (vm, &event_data);
- vec_reset_length (event_data);
-
- timeout = 3.0;
-
- /* *INDENT-OFF* */
- pool_foreach (vui, vum->vhost_user_interfaces) {
-
- if (vui->unix_server_index == ~0) { //Nothing to do for server sockets
- if (vui->clib_file_index == ~0)
- {
- if ((sockfd < 0) &&
- ((sockfd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0))
- {
- /*
- * 1st time error or new error for this interface,
- * spit out the message and record the error
- */
- if (!vui->sock_errno || (vui->sock_errno != errno))
- {
- clib_unix_warning
- ("Error: Could not open unix socket for %s",
- vui->sock_filename);
- vui->sock_errno = errno;
- }
- continue;
- }
-
- /* try to connect */
- strncpy (sun.sun_path, (char *) vui->sock_filename,
- sizeof (sun.sun_path) - 1);
- sun.sun_path[sizeof (sun.sun_path) - 1] = 0;
-
- /* Avoid hanging VPP if the other end does not accept */
- if (fcntl(sockfd, F_SETFL, O_NONBLOCK) < 0)
- clib_unix_warning ("fcntl");
-
- if (connect (sockfd, (struct sockaddr *) &sun,
- sizeof (struct sockaddr_un)) == 0)
- {
- /* Set the socket to blocking as it was before */
- if (fcntl(sockfd, F_SETFL, 0) < 0)
- clib_unix_warning ("fcntl2");
-
- vui->sock_errno = 0;
- template.file_descriptor = sockfd;
- template.private_data =
- vui - vhost_user_main.vhost_user_interfaces;
- template.description = format (0, "vhost user process");
- vui->clib_file_index = clib_file_add (&file_main, &template);
- vui->num_qid = 2;
-
- /* This sockfd is considered consumed */
- sockfd = -1;
- }
- else
- {
- vui->sock_errno = errno;
- }
- }
- else
- {
- /* check if socket is alive */
- int error = 0;
- socklen_t len = sizeof (error);
- int fd = UNIX_GET_FD(vui->clib_file_index);
- int retval =
- getsockopt (fd, SOL_SOCKET, SO_ERROR, &error, &len);
-
- if (retval)
- {
- vu_log_debug (vui, "getsockopt returned %d", retval);
- vhost_user_if_disconnect (vui);
- }
- }
- }
- }
- /* *INDENT-ON* */
- }
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (vhost_user_process_node,static) = {
- .function = vhost_user_process,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "vhost-user-process",
-};
-/* *INDENT-ON* */
-
-/**
- * Disables and reset interface structure.
- * It can then be either init again, or removed from used interfaces.
- */
-static void
-vhost_user_term_if (vhost_user_intf_t * vui)
-{
- int q;
- vhost_user_main_t *vum = &vhost_user_main;
-
- // disconnect interface sockets
- vhost_user_if_disconnect (vui);
- vhost_user_update_gso_interface_count (vui, 0 /* delete */ );
- vhost_user_update_iface_state (vui);
-
- for (q = 0; q < vec_len (vui->vrings); q++)
- clib_spinlock_free (&vui->vrings[q].vring_lock);
-
- if (vui->unix_server_index != ~0)
- {
- //Close server socket
- clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
- vui->unix_server_index);
- clib_file_del (&file_main, uf);
- vui->unix_server_index = ~0;
- unlink (vui->sock_filename);
- }
-
- mhash_unset (&vum->if_index_by_sock_name, vui->sock_filename,
- &vui->if_index);
-}
-
-int
-vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui;
- int rv = 0;
- vnet_hw_interface_t *hwif;
- u16 qid;
-
- if (!
- (hwif =
- vnet_get_sup_hw_interface_api_visible_or_null (vnm, sw_if_index))
- || hwif->dev_class_index != vhost_user_device_class.index)
- return VNET_API_ERROR_INVALID_SW_IF_INDEX;
-
- vui = pool_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance);
-
- vu_log_debug (vui, "Deleting vhost-user interface %s (instance %d)",
- hwif->name, hwif->dev_instance);
-
- FOR_ALL_VHOST_TXQ (qid, vui)
- {
- vhost_user_vring_t *txvq = &vui->vrings[qid];
-
- if ((txvq->mode == VNET_HW_IF_RX_MODE_POLLING) &&
- (txvq->thread_index != ~0))
- {
- vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, txvq->thread_index);
- ASSERT (cpu->polling_q_count != 0);
- cpu->polling_q_count--;
- }
-
- if ((vum->ifq_count > 0) &&
- ((txvq->mode == VNET_HW_IF_RX_MODE_INTERRUPT) ||
- (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)))
- {
- vum->ifq_count--;
- // Stop the timer if there is no more interrupt interface/queue
- if (vum->ifq_count == 0)
- {
- vlib_process_signal_event (vm,
- vhost_user_send_interrupt_node.index,
- VHOST_USER_EVENT_STOP_TIMER, 0);
- break;
- }
- }
- }
-
- // Disable and reset interface
- vhost_user_term_if (vui);
-
- // Reset renumbered iface
- if (hwif->dev_instance <
- vec_len (vum->show_dev_instance_by_real_dev_instance))
- vum->show_dev_instance_by_real_dev_instance[hwif->dev_instance] = ~0;
-
- // Delete ethernet interface
- ethernet_delete_interface (vnm, vui->hw_if_index);
-
- // free vrings
- vec_free (vui->vrings);
-
- // Back to pool
- pool_put (vum->vhost_user_interfaces, vui);
-
- return rv;
-}
-
-static clib_error_t *
-vhost_user_exit (vlib_main_t * vm)
-{
- vnet_main_t *vnm = vnet_get_main ();
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui;
-
- vlib_worker_thread_barrier_sync (vlib_get_main ());
- /* *INDENT-OFF* */
- pool_foreach (vui, vum->vhost_user_interfaces) {
- vhost_user_delete_if (vnm, vm, vui->sw_if_index);
- }
- /* *INDENT-ON* */
- vlib_worker_thread_barrier_release (vlib_get_main ());
- return 0;
-}
-
-VLIB_MAIN_LOOP_EXIT_FUNCTION (vhost_user_exit);
-
-/**
- * Open server unix socket on specified sock_filename.
- */
-static int
-vhost_user_init_server_sock (const char *sock_filename, int *sock_fd)
-{
- int rv = 0;
- struct sockaddr_un un = { };
- int fd;
- /* create listening socket */
- if ((fd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0)
- return VNET_API_ERROR_SYSCALL_ERROR_1;
-
- un.sun_family = AF_UNIX;
- strncpy ((char *) un.sun_path, (char *) sock_filename,
- sizeof (un.sun_path) - 1);
-
- /* remove if exists */
- unlink ((char *) sock_filename);
-
- if (bind (fd, (struct sockaddr *) &un, sizeof (un)) == -1)
- {
- rv = VNET_API_ERROR_SYSCALL_ERROR_2;
- goto error;
- }
-
- if (listen (fd, 1) == -1)
- {
- rv = VNET_API_ERROR_SYSCALL_ERROR_3;
- goto error;
- }
-
- *sock_fd = fd;
- return 0;
-
-error:
- close (fd);
- return rv;
-}
-
-/**
- * Create ethernet interface for vhost user interface.
- */
-static void
-vhost_user_create_ethernet (vnet_main_t *vnm, vlib_main_t *vm,
- vhost_user_intf_t *vui,
- vhost_user_create_if_args_t *args)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- vnet_eth_interface_registration_t eir = {};
- u8 hwaddr[6];
-
- /* create hw and sw interface */
- if (args->use_custom_mac)
- {
- clib_memcpy (hwaddr, args->hwaddr, 6);
- }
- else
- {
- random_u32 (&vum->random);
- clib_memcpy (hwaddr + 2, &vum->random, sizeof (vum->random));
- hwaddr[0] = 2;
- hwaddr[1] = 0xfe;
- }
-
- eir.dev_class_index = vhost_user_device_class.index;
- eir.dev_instance = vui - vum->vhost_user_interfaces /* device instance */,
- eir.address = hwaddr;
- vui->hw_if_index = vnet_eth_register_interface (vnm, &eir);
-}
-
-/*
- * Initialize vui with specified attributes
- */
-static void
-vhost_user_vui_init (vnet_main_t * vnm, vhost_user_intf_t * vui,
- int server_sock_fd, vhost_user_create_if_args_t * args,
- u32 * sw_if_index)
-{
- vnet_sw_interface_t *sw;
- int q;
- vhost_user_main_t *vum = &vhost_user_main;
-
- sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index);
- if (server_sock_fd != -1)
- {
- clib_file_t template = { 0 };
- template.read_function = vhost_user_socksvr_accept_ready;
- template.file_descriptor = server_sock_fd;
- template.private_data = vui - vum->vhost_user_interfaces; //hw index
- template.description = format (0, "vhost user %d", sw);
- vui->unix_server_index = clib_file_add (&file_main, &template);
- }
- else
- {
- vui->unix_server_index = ~0;
- }
-
- vui->sw_if_index = sw->sw_if_index;
- strncpy (vui->sock_filename, args->sock_filename,
- ARRAY_LEN (vui->sock_filename) - 1);
- vui->sock_errno = 0;
- vui->is_ready = 0;
- vui->feature_mask = args->feature_mask;
- vui->clib_file_index = ~0;
- vui->log_base_addr = 0;
- vui->if_index = vui - vum->vhost_user_interfaces;
- vui->enable_gso = args->enable_gso;
- vui->enable_event_idx = args->enable_event_idx;
- vui->enable_packed = args->enable_packed;
- /*
- * enable_gso takes precedence over configurable feature mask if there
- * is a clash.
- * if feature mask disables gso, but enable_gso is configured,
- * then gso is enable
- * if feature mask enables gso, but enable_gso is not configured,
- * then gso is enable
- *
- * if gso is enable via feature mask, it must enable both host and guest
- * gso feature mask, we don't support one sided GSO or partial GSO.
- */
- if ((vui->enable_gso == 0) &&
- ((args->feature_mask & FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS)
- == (FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS)))
- vui->enable_gso = 1;
- vhost_user_update_gso_interface_count (vui, 1 /* add */ );
- mhash_set_mem (&vum->if_index_by_sock_name, vui->sock_filename,
- &vui->if_index, 0);
-
- vec_validate_aligned (vui->vrings, (VHOST_VRING_INIT_MQ_PAIR_SZ << 1) - 1,
- CLIB_CACHE_LINE_BYTES);
- vui->num_qid = 2;
- for (q = 0; q < vec_len (vui->vrings); q++)
- vhost_user_vring_init (vui, q);
-
- vnet_hw_if_set_caps (vnm, vui->hw_if_index, VNET_HW_IF_CAP_INT_MODE);
- vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
-
- if (sw_if_index)
- *sw_if_index = vui->sw_if_index;
-}
-
-int
-vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
- vhost_user_create_if_args_t * args)
-{
- vhost_user_intf_t *vui = NULL;
- u32 sw_if_idx = ~0;
- int rv = 0;
- int server_sock_fd = -1;
- vhost_user_main_t *vum = &vhost_user_main;
- uword *if_index;
-
- if (args->sock_filename == NULL || !(strlen (args->sock_filename) > 0))
- {
- return VNET_API_ERROR_INVALID_ARGUMENT;
- }
-
- if_index = mhash_get (&vum->if_index_by_sock_name,
- (void *) args->sock_filename);
- if (if_index)
- {
- vui = &vum->vhost_user_interfaces[*if_index];
- args->sw_if_index = vui->sw_if_index;
- return VNET_API_ERROR_IF_ALREADY_EXISTS;
- }
-
- if (args->is_server)
- {
- if ((rv =
- vhost_user_init_server_sock (args->sock_filename,
- &server_sock_fd)) != 0)
- {
- return rv;
- }
- }
-
- /* Protect the uninitialized vui from being dispatched by rx/tx */
- vlib_worker_thread_barrier_sync (vm);
- pool_get (vhost_user_main.vhost_user_interfaces, vui);
- vhost_user_create_ethernet (vnm, vm, vui, args);
- vlib_worker_thread_barrier_release (vm);
-
- vhost_user_vui_init (vnm, vui, server_sock_fd, args, &sw_if_idx);
- vnet_sw_interface_set_mtu (vnm, vui->sw_if_index, 9000);
- vhost_user_rx_thread_placement (vui, 1);
-
- if (args->renumber)
- vnet_interface_name_renumber (sw_if_idx, args->custom_dev_instance);
-
- args->sw_if_index = sw_if_idx;
-
- // Process node must connect
- vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
-
- return rv;
-}
-
-int
-vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
- vhost_user_create_if_args_t * args)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui = NULL;
- u32 sw_if_idx = ~0;
- int server_sock_fd = -1;
- int rv = 0;
- vnet_hw_interface_t *hwif;
- uword *if_index;
-
- if (!(hwif = vnet_get_sup_hw_interface_api_visible_or_null (vnm,
- args->sw_if_index))
- || hwif->dev_class_index != vhost_user_device_class.index)
- return VNET_API_ERROR_INVALID_SW_IF_INDEX;
-
- if (args->sock_filename == NULL || !(strlen (args->sock_filename) > 0))
- return VNET_API_ERROR_INVALID_ARGUMENT;
-
- vui = vec_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance);
-
- /*
- * Disallow changing the interface to have the same path name
- * as other interface
- */
- if_index = mhash_get (&vum->if_index_by_sock_name,
- (void *) args->sock_filename);
- if (if_index && (*if_index != vui->if_index))
- return VNET_API_ERROR_IF_ALREADY_EXISTS;
-
- // First try to open server socket
- if (args->is_server)
- if ((rv = vhost_user_init_server_sock (args->sock_filename,
- &server_sock_fd)) != 0)
- return rv;
-
- vhost_user_term_if (vui);
- vhost_user_vui_init (vnm, vui, server_sock_fd, args, &sw_if_idx);
-
- if (args->renumber)
- vnet_interface_name_renumber (sw_if_idx, args->custom_dev_instance);
-
- // Process node must connect
- vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
-
- return rv;
-}
-
-clib_error_t *
-vhost_user_connect_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- vnet_main_t *vnm = vnet_get_main ();
- unformat_input_t _line_input, *line_input = &_line_input;
- clib_error_t *error = NULL;
- vhost_user_create_if_args_t args = { 0 };
- int rv;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- args.feature_mask = (u64) ~ (0ULL);
- args.custom_dev_instance = ~0;
- /* GSO feature is disable by default */
- args.feature_mask &= ~FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS;
- /* packed-ring feature is disable by default */
- args.feature_mask &= ~VIRTIO_FEATURE (VIRTIO_F_RING_PACKED);
- /* event_idx feature is disable by default */
- args.feature_mask &= ~VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX);
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "socket %s", &args.sock_filename))
- ;
- else if (unformat (line_input, "server"))
- args.is_server = 1;
- else if (unformat (line_input, "gso"))
- args.enable_gso = 1;
- else if (unformat (line_input, "packed"))
- args.enable_packed = 1;
- else if (unformat (line_input, "event-idx"))
- args.enable_event_idx = 1;
- else if (unformat (line_input, "feature-mask 0x%llx",
- &args.feature_mask))
- ;
- else if (unformat (line_input, "hwaddr %U", unformat_ethernet_address,
- args.hwaddr))
- args.use_custom_mac = 1;
- else if (unformat (line_input, "renumber %d",
- &args.custom_dev_instance))
- args.renumber = 1;
- else
- {
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
- goto done;
- }
- }
-
- if ((rv = vhost_user_create_if (vnm, vm, &args)))
- {
- error = clib_error_return (0, "vhost_user_create_if returned %d", rv);
- goto done;
- }
-
- vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnm,
- args.sw_if_index);
-
-done:
- vec_free (args.sock_filename);
- unformat_free (line_input);
-
- return error;
-}
-
-clib_error_t *
-vhost_user_delete_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- u32 sw_if_index = ~0;
- vnet_main_t *vnm = vnet_get_main ();
- clib_error_t *error = NULL;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "sw_if_index %d", &sw_if_index))
- ;
- else if (unformat
- (line_input, "%U", unformat_vnet_sw_interface, vnm,
- &sw_if_index))
- {
- vnet_hw_interface_t *hwif =
- vnet_get_sup_hw_interface_api_visible_or_null (vnm, sw_if_index);
- if (hwif == NULL ||
- vhost_user_device_class.index != hwif->dev_class_index)
- {
- error = clib_error_return (0, "Not a vhost interface");
- goto done;
- }
- }
- else
- {
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
- goto done;
- }
- }
-
- vhost_user_delete_if (vnm, vm, sw_if_index);
-
-done:
- unformat_free (line_input);
-
- return error;
-}
-
-int
-vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
- vhost_user_intf_details_t ** out_vuids)
-{
- int rv = 0;
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui;
- vhost_user_intf_details_t *r_vuids = NULL;
- vhost_user_intf_details_t *vuid = NULL;
- u32 *hw_if_indices = 0;
- vnet_hw_interface_t *hi;
- int i;
-
- if (!out_vuids)
- return -1;
-
- pool_foreach (vui, vum->vhost_user_interfaces)
- vec_add1 (hw_if_indices, vui->hw_if_index);
-
- for (i = 0; i < vec_len (hw_if_indices); i++)
- {
- hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
- vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance);
-
- vec_add2 (r_vuids, vuid, 1);
- vuid->sw_if_index = vui->sw_if_index;
- vuid->virtio_net_hdr_sz = vui->virtio_net_hdr_sz;
- vuid->features = vui->features;
- vuid->num_regions = vui->nregions;
- vuid->is_server = vui->unix_server_index != ~0;
- vuid->sock_errno = vui->sock_errno;
- snprintf ((char *) vuid->sock_filename, sizeof (vuid->sock_filename),
- "%s", vui->sock_filename);
- memcpy_s (vuid->if_name, sizeof (vuid->if_name), hi->name,
- clib_min (vec_len (hi->name), sizeof (vuid->if_name) - 1));
- vuid->if_name[sizeof (vuid->if_name) - 1] = 0;
- }
-
- vec_free (hw_if_indices);
-
- *out_vuids = r_vuids;
-
- return rv;
-}
-
-static u8 *
-format_vhost_user_desc (u8 * s, va_list * args)
-{
- char *fmt = va_arg (*args, char *);
- vhost_user_intf_t *vui = va_arg (*args, vhost_user_intf_t *);
- vnet_virtio_vring_desc_t *desc_table =
- va_arg (*args, vnet_virtio_vring_desc_t *);
- int idx = va_arg (*args, int);
- u32 *mem_hint = va_arg (*args, u32 *);
-
- s = format (s, fmt, idx, desc_table[idx].addr, desc_table[idx].len,
- desc_table[idx].flags, desc_table[idx].next,
- pointer_to_uword (map_guest_mem (vui, desc_table[idx].addr,
- mem_hint)));
- return s;
-}
-
-static void
-vhost_user_show_fds (vlib_main_t * vm, vhost_user_vring_t * vq)
-{
- int kickfd = UNIX_GET_FD (vq->kickfd_idx);
- int callfd = UNIX_GET_FD (vq->callfd_idx);
-
- vlib_cli_output (vm, " kickfd %d callfd %d errfd %d\n", kickfd, callfd,
- vq->errfd);
-}
-
-static void
-vhost_user_show_desc (vlib_main_t * vm, vhost_user_intf_t * vui, int q,
- int show_descr, int show_verbose)
-{
- int j;
- u32 mem_hint = 0;
- u32 idx;
- u32 n_entries;
- vnet_virtio_vring_desc_t *desc_table;
- vhost_user_vring_t *vq = &vui->vrings[q];
-
- if (vq->avail && vq->used)
- vlib_cli_output (vm,
- " avail.flags %x avail event idx %u avail.idx %d "
- "used.flags %x used event idx %u used.idx %d\n",
- vq->avail->flags, vhost_user_avail_event_idx (vq),
- vq->avail->idx, vq->used->flags,
- vhost_user_used_event_idx (vq), vq->used->idx);
-
- vhost_user_show_fds (vm, vq);
-
- if (show_descr)
- {
- vlib_cli_output (vm, "\n descriptor table:\n");
- vlib_cli_output (vm,
- " slot addr len flags next "
- "user_addr\n");
- vlib_cli_output (vm,
- " ===== ================== ===== ====== ===== "
- "==================\n");
- for (j = 0; j < vq->qsz_mask + 1; j++)
- {
- desc_table = vq->desc;
- vlib_cli_output (vm, "%U", format_vhost_user_desc,
- " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n", vui,
- desc_table, j, &mem_hint);
- if (show_verbose && (desc_table[j].flags & VRING_DESC_F_INDIRECT))
- {
- n_entries =
- desc_table[j].len / sizeof (vnet_virtio_vring_desc_t);
- desc_table = map_guest_mem (vui, desc_table[j].addr, &mem_hint);
- if (desc_table)
- {
- for (idx = 0; idx < clib_min (20, n_entries); idx++)
- {
- vlib_cli_output
- (vm, "%U", format_vhost_user_desc,
- "> %-4u 0x%016lx %-5u 0x%04x %-5u 0x%016lx\n", vui,
- desc_table, idx, &mem_hint);
- }
- if (n_entries >= 20)
- vlib_cli_output (vm, "Skip displaying entries 20...%u\n",
- n_entries);
- }
- }
- }
- }
-}
-
-static u8 *
-format_vhost_user_packed_desc (u8 * s, va_list * args)
-{
- char *fmt = va_arg (*args, char *);
- vhost_user_intf_t *vui = va_arg (*args, vhost_user_intf_t *);
- vnet_virtio_vring_packed_desc_t *desc_table =
- va_arg (*args, vnet_virtio_vring_packed_desc_t *);
- int idx = va_arg (*args, int);
- u32 *mem_hint = va_arg (*args, u32 *);
-
- s = format (s, fmt, idx, desc_table[idx].addr, desc_table[idx].len,
- desc_table[idx].flags, desc_table[idx].id,
- pointer_to_uword (map_guest_mem (vui, desc_table[idx].addr,
- mem_hint)));
- return s;
-}
-
-static u8 *
-format_vhost_user_event_idx_flags (u8 * s, va_list * args)
-{
- u32 flags = va_arg (*args, u32);
- typedef struct
- {
- u8 value;
- char *str;
- } event_idx_flags;
- static event_idx_flags event_idx_array[] = {
-#define _(s,v) { .str = #s, .value = v, },
- foreach_virtio_event_idx_flags
-#undef _
- };
- u32 num_entries = sizeof (event_idx_array) / sizeof (event_idx_flags);
-
- if (flags < num_entries)
- s = format (s, "%s", event_idx_array[flags].str);
- else
- s = format (s, "%u", flags);
- return s;
-}
-
-static void
-vhost_user_show_desc_packed (vlib_main_t * vm, vhost_user_intf_t * vui, int q,
- int show_descr, int show_verbose)
-{
- int j;
- u32 mem_hint = 0;
- u32 idx;
- u32 n_entries;
- vnet_virtio_vring_packed_desc_t *desc_table;
- vhost_user_vring_t *vq = &vui->vrings[q];
- u16 off_wrap, event_idx;
-
- off_wrap = vq->avail_event->off_wrap;
- event_idx = off_wrap & 0x7fff;
- vlib_cli_output (vm, " avail_event.flags %U avail_event.off_wrap %u "
- "avail event idx %u\n", format_vhost_user_event_idx_flags,
- (u32) vq->avail_event->flags, off_wrap, event_idx);
-
- off_wrap = vq->used_event->off_wrap;
- event_idx = off_wrap & 0x7fff;
- vlib_cli_output (vm, " used_event.flags %U used_event.off_wrap %u "
- "used event idx %u\n", format_vhost_user_event_idx_flags,
- (u32) vq->used_event->flags, off_wrap, event_idx);
-
- vlib_cli_output (vm, " avail wrap counter %u, used wrap counter %u\n",
- vq->avail_wrap_counter, vq->used_wrap_counter);
-
- vhost_user_show_fds (vm, vq);
-
- if (show_descr)
- {
- vlib_cli_output (vm, "\n descriptor table:\n");
- vlib_cli_output (vm,
- " slot addr len flags id "
- "user_addr\n");
- vlib_cli_output (vm,
- " ===== ================== ===== ====== ===== "
- "==================\n");
- for (j = 0; j < vq->qsz_mask + 1; j++)
- {
- desc_table = vq->packed_desc;
- vlib_cli_output (vm, "%U", format_vhost_user_packed_desc,
- " %-5u 0x%016lx %-5u 0x%04x %-5u 0x%016lx\n", vui,
- desc_table, j, &mem_hint);
- if (show_verbose && (desc_table[j].flags & VRING_DESC_F_INDIRECT))
- {
- n_entries = desc_table[j].len >> 4;
- desc_table = map_guest_mem (vui, desc_table[j].addr, &mem_hint);
- if (desc_table)
- {
- for (idx = 0; idx < clib_min (20, n_entries); idx++)
- {
- vlib_cli_output
- (vm, "%U", format_vhost_user_packed_desc,
- "> %-4u 0x%016lx %-5u 0x%04x %-5u 0x%016lx\n", vui,
- desc_table, idx, &mem_hint);
- }
- if (n_entries >= 20)
- vlib_cli_output (vm, "Skip displaying entries 20...%u\n",
- n_entries);
- }
- }
- }
- }
-}
-
-clib_error_t *
-show_vhost_user_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- clib_error_t *error = 0;
- vnet_main_t *vnm = vnet_get_main ();
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui;
- u32 hw_if_index, *hw_if_indices = 0;
- vnet_hw_interface_t *hi;
- u16 qid;
- int i, j, q;
- int show_descr = 0;
- int show_verbose = 0;
- struct feat_struct
- {
- u8 bit;
- char *str;
- };
- struct feat_struct *feat_entry;
-
- static struct feat_struct feat_array[] = {
-#define _(s,b) { .str = #s, .bit = b, },
- foreach_virtio_net_features
-#undef _
- {.str = NULL}
- };
-
-#define foreach_protocol_feature \
- _(VHOST_USER_PROTOCOL_F_MQ) \
- _(VHOST_USER_PROTOCOL_F_LOG_SHMFD)
-
- static struct feat_struct proto_feat_array[] = {
-#define _(s) { .str = #s, .bit = s},
- foreach_protocol_feature
-#undef _
- {.str = NULL}
- };
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat
- (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
- {
- hi = vnet_get_hw_interface (vnm, hw_if_index);
- if (vhost_user_device_class.index != hi->dev_class_index)
- {
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- goto done;
- }
- vec_add1 (hw_if_indices, hw_if_index);
- }
- else if (unformat (input, "descriptors") || unformat (input, "desc"))
- show_descr = 1;
- else if (unformat (input, "verbose"))
- show_verbose = 1;
- else
- {
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- goto done;
- }
- }
- if (vec_len (hw_if_indices) == 0)
- {
- pool_foreach (vui, vum->vhost_user_interfaces)
- vec_add1 (hw_if_indices, vui->hw_if_index);
- }
- vlib_cli_output (vm, "Virtio vhost-user interfaces");
- vlib_cli_output (vm, "Global:\n coalesce frames %d time %e",
- vum->coalesce_frames, vum->coalesce_time);
- vlib_cli_output (vm, " Number of rx virtqueues in interrupt mode: %d",
- vum->ifq_count);
- vlib_cli_output (vm, " Number of GSO interfaces: %d", vum->gso_count);
- for (u32 tid = 0; tid <= vlib_num_workers (); tid++)
- {
- vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, tid);
- vlib_cli_output (vm, " Thread %u: Polling queue count %u", tid,
- cpu->polling_q_count);
- }
-
- for (i = 0; i < vec_len (hw_if_indices); i++)
- {
- hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
- vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance);
- vlib_cli_output (vm, "Interface: %U (ifindex %d)",
- format_vnet_hw_if_index_name, vnm, hw_if_indices[i],
- hw_if_indices[i]);
- vlib_cli_output (vm, " Number of qids %u", vui->num_qid);
- if (vui->enable_gso)
- vlib_cli_output (vm, " GSO enable");
- if (vui->enable_packed)
- vlib_cli_output (vm, " Packed ring enable");
- if (vui->enable_event_idx)
- vlib_cli_output (vm, " Event index enable");
-
- vlib_cli_output (vm, "virtio_net_hdr_sz %d\n"
- " features mask (0x%llx): \n"
- " features (0x%llx): \n",
- vui->virtio_net_hdr_sz, vui->feature_mask,
- vui->features);
-
- feat_entry = (struct feat_struct *) &feat_array;
- while (feat_entry->str)
- {
- if (vui->features & (1ULL << feat_entry->bit))
- vlib_cli_output (vm, " %s (%d)", feat_entry->str,
- feat_entry->bit);
- feat_entry++;
- }
-
- vlib_cli_output (vm, " protocol features (0x%llx)",
- vui->protocol_features);
- feat_entry = (struct feat_struct *) &proto_feat_array;
- while (feat_entry->str)
- {
- if (vui->protocol_features & (1ULL << feat_entry->bit))
- vlib_cli_output (vm, " %s (%d)", feat_entry->str,
- feat_entry->bit);
- feat_entry++;
- }
-
- vlib_cli_output (vm, "\n");
-
- vlib_cli_output (vm, " socket filename %s type %s errno \"%s\"\n\n",
- vui->sock_filename,
- (vui->unix_server_index != ~0) ? "server" : "client",
- strerror (vui->sock_errno));
-
- vlib_cli_output (vm, " rx placement: ");
-
- FOR_ALL_VHOST_TXQ (qid, vui)
- {
- vhost_user_vring_t *txvq = &vui->vrings[qid];
-
- if (txvq->qid == -1)
- continue;
- vlib_cli_output (vm, " thread %d on vring %d, %U\n",
- txvq->thread_index, qid, format_vnet_hw_if_rx_mode,
- txvq->mode);
- }
-
- vlib_cli_output (vm, " tx placement\n");
-
- FOR_ALL_VHOST_RXQ (qid, vui)
- {
- vhost_user_vring_t *rxvq = &vui->vrings[qid];
- vnet_hw_if_tx_queue_t *txq;
-
- if (rxvq->queue_index == ~0)
- continue;
- txq = vnet_hw_if_get_tx_queue (vnm, rxvq->queue_index);
- if (txq->threads)
- vlib_cli_output (vm, " threads %U on vring %u: %s\n",
- format_bitmap_list, txq->threads, qid,
- txq->shared_queue ? "spin-lock" : "lock-free");
- }
-
- vlib_cli_output (vm, "\n");
-
- vlib_cli_output (vm, " Memory regions (total %d)\n", vui->nregions);
-
- if (vui->nregions)
- {
- vlib_cli_output (vm,
- " region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr\n");
- vlib_cli_output (vm,
- " ====== ===== ================== ================== ================== ================== ==================\n");
- }
- for (j = 0; j < vui->nregions; j++)
- {
- vlib_cli_output (vm,
- " %d %-5d 0x%016lx 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n",
- j, vui->region_mmap_fd[j],
- vui->regions[j].guest_phys_addr,
- vui->regions[j].memory_size,
- vui->regions[j].userspace_addr,
- vui->regions[j].mmap_offset,
- pointer_to_uword (vui->region_mmap_addr[j]));
- }
- FOR_ALL_VHOST_RX_TXQ (q, vui)
- {
- if (!vui->vrings[q].started)
- continue;
-
- vlib_cli_output (vm, "\n Virtqueue %d (%s%s)\n", q,
- (q & 1) ? "RX" : "TX",
- vui->vrings[q].enabled ? "" : " disabled");
- vlib_cli_output (vm, " global %s queue index %u\n",
- (q & 1) ? "RX" : "TX", vui->vrings[q].queue_index);
-
- vlib_cli_output (
- vm,
- " qsz %d last_avail_idx %d last_used_idx %d"
- " last_kick %u\n",
- vui->vrings[q].qsz_mask + 1, vui->vrings[q].last_avail_idx,
- vui->vrings[q].last_used_idx, vui->vrings[q].last_kick);
-
- if (vhost_user_is_packed_ring_supported (vui))
- vhost_user_show_desc_packed (vm, vui, q, show_descr, show_verbose);
- else
- vhost_user_show_desc (vm, vui, q, show_descr, show_verbose);
- }
- vlib_cli_output (vm, "\n");
- }
-done:
- vec_free (hw_if_indices);
- return error;
-}
-
-/*
- * CLI functions
- */
-
-/*?
- * Create a vHost User interface. Once created, a new virtual interface
- * will exist with the name '<em>VirtualEthernet0/0/x</em>', where '<em>x</em>'
- * is the next free index.
- *
- * There are several parameters associated with a vHost interface:
- *
- * - <b>socket <socket-filename></b> - Name of the linux socket used by
- * hypervisor and VPP to manage the vHost interface. If in <em>server</em>
- * mode, VPP will create the socket if it does not already exist. If in
- * <em>client</em> mode, hypervisor will create the socket if it does not
- * already exist. The VPP code is indifferent to the file location. However,
- * if SELinux is enabled, then the socket needs to be created in
- * <em>/var/run/vpp/</em>.
- *
- * - <b>server</b> - Optional flag to indicate that VPP should be the server
- * for the linux socket. If not provided, VPP will be the client. In
- * <em>server</em> mode, the VM can be reset without tearing down the vHost
- * Interface. In <em>client</em> mode, VPP can be reset without bringing down
- * the VM and tearing down the vHost Interface.
- *
- * - <b>feature-mask <hex></b> - Optional virtio/vhost feature set negotiated
- * at startup. <b>This is intended for degugging only.</b> It is recommended
- * that this parameter not be used except by experienced users. By default,
- * all supported features will be advertised. Otherwise, provide the set of
- * features desired.
- * - 0x000008000 (15) - VIRTIO_NET_F_MRG_RXBUF
- * - 0x000020000 (17) - VIRTIO_NET_F_CTRL_VQ
- * - 0x000200000 (21) - VIRTIO_NET_F_GUEST_ANNOUNCE
- * - 0x000400000 (22) - VIRTIO_NET_F_MQ
- * - 0x004000000 (26) - VHOST_F_LOG_ALL
- * - 0x008000000 (27) - VIRTIO_F_ANY_LAYOUT
- * - 0x010000000 (28) - VIRTIO_F_INDIRECT_DESC
- * - 0x040000000 (30) - VHOST_USER_F_PROTOCOL_FEATURES
- * - 0x100000000 (32) - VIRTIO_F_VERSION_1
- *
- * - <b>hwaddr <mac-addr></b> - Optional ethernet address, can be in either
- * X:X:X:X:X:X unix or X.X.X cisco format.
- *
- * - <b>renumber <dev_instance></b> - Optional parameter which allows the
- * instance in the name to be specified. If instance already exists, name
- * will be used anyway and multiple instances will have the same name. Use
- * with caution.
- *
- * @cliexpar
- * Example of how to create a vhost interface with VPP as the client and all
- * features enabled:
- * @cliexstart{create vhost-user socket /var/run/vpp/vhost1.sock}
- * VirtualEthernet0/0/0
- * @cliexend
- * Example of how to create a vhost interface with VPP as the server and with
- * just multiple queues enabled:
- * @cliexstart{create vhost-user socket /var/run/vpp/vhost2.sock server
- * feature-mask 0x40400000}
- * VirtualEthernet0/0/1
- * @cliexend
- * Once the vHost interface is created, enable the interface using:
- * @cliexcmd{set interface state VirtualEthernet0/0/0 up}
-?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (vhost_user_connect_command, static) = {
- .path = "create vhost-user",
- .short_help = "create vhost-user socket <socket-filename> [server] "
- "[feature-mask <hex>] [hwaddr <mac-addr>] [renumber <dev_instance>] [gso] "
- "[packed] [event-idx]",
- .function = vhost_user_connect_command_fn,
- .is_mp_safe = 1,
-};
-/* *INDENT-ON* */
-
-/*?
- * Delete a vHost User interface using the interface name or the
- * software interface index. Use the '<em>show interface</em>'
- * command to determine the software interface index. On deletion,
- * the linux socket will not be deleted.
- *
- * @cliexpar
- * Example of how to delete a vhost interface by name:
- * @cliexcmd{delete vhost-user VirtualEthernet0/0/1}
- * Example of how to delete a vhost interface by software interface index:
- * @cliexcmd{delete vhost-user sw_if_index 1}
-?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (vhost_user_delete_command, static) = {
- .path = "delete vhost-user",
- .short_help = "delete vhost-user {<interface> | sw_if_index <sw_idx>}",
- .function = vhost_user_delete_command_fn,
-};
-
-/*?
- * Display the attributes of a single vHost User interface (provide interface
- * name), multiple vHost User interfaces (provide a list of interface names
- * separated by spaces) or all Vhost User interfaces (omit an interface name
- * to display all vHost interfaces).
- *
- * @cliexpar
- * @parblock
- * Example of how to display a vhost interface:
- * @cliexstart{show vhost-user VirtualEthernet0/0/0}
- * Virtio vhost-user interfaces
- * Global:
- * coalesce frames 32 time 1e-3
- * Interface: VirtualEthernet0/0/0 (ifindex 1)
- * virtio_net_hdr_sz 12
- * features mask (0xffffffffffffffff):
- * features (0x50408000):
- * VIRTIO_NET_F_MRG_RXBUF (15)
- * VIRTIO_NET_F_MQ (22)
- * VIRTIO_F_INDIRECT_DESC (28)
- * VHOST_USER_F_PROTOCOL_FEATURES (30)
- * protocol features (0x3)
- * VHOST_USER_PROTOCOL_F_MQ (0)
- * VHOST_USER_PROTOCOL_F_LOG_SHMFD (1)
- *
- * socket filename /var/run/vpp/vhost1.sock type client errno "Success"
- *
- * rx placement:
- * thread 1 on vring 1
- * thread 1 on vring 5
- * thread 2 on vring 3
- * thread 2 on vring 7
- * tx placement: spin-lock
- * thread 0 on vring 0
- * thread 1 on vring 2
- * thread 2 on vring 0
- *
- * Memory regions (total 2)
- * region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr
- * ====== == =============== =========== ============== =========== ==========
- * 0 60 0x00000000 0x000a0000 0xaac00000 0x00000000 0x2b400000
- * 1 61 0x000c0000 0x3ff40000 0xaacc0000 0x000c0000 0xabcc0000
- *
- * Virtqueue 0 (TX)
- * qsz 256 last_avail_idx 0 last_used_idx 0
- * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0
- * kickfd 62 callfd 64 errfd -1
- *
- * Virtqueue 1 (RX)
- * qsz 256 last_avail_idx 0 last_used_idx 0
- * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
- * kickfd 65 callfd 66 errfd -1
- *
- * Virtqueue 2 (TX)
- * qsz 256 last_avail_idx 0 last_used_idx 0
- * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0
- * kickfd 63 callfd 70 errfd -1
- *
- * Virtqueue 3 (RX)
- * qsz 256 last_avail_idx 0 last_used_idx 0
- * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
- * kickfd 72 callfd 74 errfd -1
- *
- * Virtqueue 4 (TX disabled)
- * qsz 256 last_avail_idx 0 last_used_idx 0
- * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
- * kickfd 76 callfd 78 errfd -1
- *
- * Virtqueue 5 (RX disabled)
- * qsz 256 last_avail_idx 0 last_used_idx 0
- * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
- * kickfd 80 callfd 82 errfd -1
- *
- * Virtqueue 6 (TX disabled)
- * qsz 256 last_avail_idx 0 last_used_idx 0
- * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
- * kickfd 84 callfd 86 errfd -1
- *
- * Virtqueue 7 (RX disabled)
- * qsz 256 last_avail_idx 0 last_used_idx 0
- * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
- * kickfd 88 callfd 90 errfd -1
- *
- * @cliexend
- *
- * The optional '<em>descriptors</em>' parameter will display the same output
- * as the previous example but will include the descriptor table for each
- * queue.
- * The output is truncated below:
- * @cliexstart{show vhost-user VirtualEthernet0/0/0 descriptors}
- * Virtio vhost-user interfaces
- * Global:
- * coalesce frames 32 time 1e-3
- * Interface: VirtualEthernet0/0/0 (ifindex 1)
- * virtio_net_hdr_sz 12
- * features mask (0xffffffffffffffff):
- * features (0x50408000):
- * VIRTIO_NET_F_MRG_RXBUF (15)
- * VIRTIO_NET_F_MQ (22)
- * :
- * Virtqueue 0 (TX)
- * qsz 256 last_avail_idx 0 last_used_idx 0
- * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0
- * kickfd 62 callfd 64 errfd -1
- *
- * descriptor table:
- * id addr len flags next user_addr
- * ===== ================== ===== ====== ===== ==================
- * 0 0x0000000010b6e974 2060 0x0002 1 0x00002aabbc76e974
- * 1 0x0000000010b6e034 2060 0x0002 2 0x00002aabbc76e034
- * 2 0x0000000010b6d6f4 2060 0x0002 3 0x00002aabbc76d6f4
- * 3 0x0000000010b6cdb4 2060 0x0002 4 0x00002aabbc76cdb4
- * 4 0x0000000010b6c474 2060 0x0002 5 0x00002aabbc76c474
- * 5 0x0000000010b6bb34 2060 0x0002 6 0x00002aabbc76bb34
- * 6 0x0000000010b6b1f4 2060 0x0002 7 0x00002aabbc76b1f4
- * 7 0x0000000010b6a8b4 2060 0x0002 8 0x00002aabbc76a8b4
- * 8 0x0000000010b69f74 2060 0x0002 9 0x00002aabbc769f74
- * 9 0x0000000010b69634 2060 0x0002 10 0x00002aabbc769634
- * 10 0x0000000010b68cf4 2060 0x0002 11 0x00002aabbc768cf4
- * :
- * 249 0x0000000000000000 0 0x0000 250 0x00002aab2b400000
- * 250 0x0000000000000000 0 0x0000 251 0x00002aab2b400000
- * 251 0x0000000000000000 0 0x0000 252 0x00002aab2b400000
- * 252 0x0000000000000000 0 0x0000 253 0x00002aab2b400000
- * 253 0x0000000000000000 0 0x0000 254 0x00002aab2b400000
- * 254 0x0000000000000000 0 0x0000 255 0x00002aab2b400000
- * 255 0x0000000000000000 0 0x0000 32768 0x00002aab2b400000
- *
- * Virtqueue 1 (RX)
- * qsz 256 last_avail_idx 0 last_used_idx 0
- * :
- * @cliexend
- * @endparblock
-?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (show_vhost_user_command, static) = {
- .path = "show vhost-user",
- .short_help = "show vhost-user [<interface> [<interface> [..]]] "
- "[[descriptors] [verbose]]",
- .function = show_vhost_user_command_fn,
-};
-/* *INDENT-ON* */
-
-
-static clib_error_t *
-vhost_user_config (vlib_main_t * vm, unformat_input_t * input)
-{
- vhost_user_main_t *vum = &vhost_user_main;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "coalesce-frames %d", &vum->coalesce_frames))
- ;
- else if (unformat (input, "coalesce-time %f", &vum->coalesce_time))
- ;
- else if (unformat (input, "dont-dump-memory"))
- vum->dont_dump_vhost_user_memory = 1;
- else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- }
-
- return 0;
-}
-
-/* vhost-user { ... } configuration. */
-VLIB_CONFIG_FUNCTION (vhost_user_config, "vhost-user");
-
-void
-vhost_user_unmap_all (void)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui;
-
- if (vum->dont_dump_vhost_user_memory)
- {
- pool_foreach (vui, vum->vhost_user_interfaces)
- unmap_all_mem_regions (vui);
- }
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/virtio/vhost_user.h b/src/vnet/devices/virtio/vhost_user.h
deleted file mode 100644
index f44951e030a..00000000000
--- a/src/vnet/devices/virtio/vhost_user.h
+++ /dev/null
@@ -1,388 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __VIRTIO_VHOST_USER_H__
-#define __VIRTIO_VHOST_USER_H__
-
-#include <vnet/devices/virtio/virtio_std.h>
-#include <vnet/devices/virtio/vhost_std.h>
-
-/* vhost-user data structures */
-
-#define VHOST_MEMORY_MAX_NREGIONS 8
-#define VHOST_USER_MSG_HDR_SZ 12
-#define VHOST_VRING_INIT_MQ_PAIR_SZ 8 //8TX + 8RX
-
-/*
- * qid is one byte in size in the spec. Please see VHOST_USER_SET_VRING_CALL,
- * VHOST_USER_SET_VRING_KICK, and VHOST_USER_SET_VRING_ERR.
- * The max number for q pair is naturally 128.
- */
-#define VHOST_VRING_MAX_MQ_PAIR_SZ 128
-#define VHOST_VRING_IDX_RX(qid) (2 * (qid))
-#define VHOST_VRING_IDX_TX(qid) (2 * (qid) + 1)
-
-#define VHOST_USER_VRING_NOFD_MASK 0x100
-
-#define VHOST_USER_PROTOCOL_F_MQ 0
-#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
-#define VHOST_VRING_F_LOG 0
-
-#define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
- (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD))
-
-#define vu_log_debug(dev, f, ...) \
-{ \
- vlib_log(VLIB_LOG_LEVEL_DEBUG, vhost_user_main.log_default, "%U: " f, \
- format_vnet_hw_if_index_name, vnet_get_main(), \
- dev->hw_if_index, ##__VA_ARGS__); \
-};
-
-#define vu_log_warn(dev, f, ...) \
-{ \
- vlib_log(VLIB_LOG_LEVEL_WARNING, vhost_user_main.log_default, "%U: " f, \
- format_vnet_hw_if_index_name, vnet_get_main(), \
- dev->hw_if_index, ##__VA_ARGS__); \
-};
-#define vu_log_err(dev, f, ...) \
-{ \
- vlib_log(VLIB_LOG_LEVEL_ERR, vhost_user_main.log_default, "%U: " f, \
- format_vnet_hw_if_index_name, vnet_get_main(), \
- dev->hw_if_index, ##__VA_ARGS__); \
-};
-
-#define UNIX_GET_FD(unixfd_idx) ({ \
- typeof(unixfd_idx) __unixfd_idx = (unixfd_idx); \
- (__unixfd_idx != ~0) ? \
- pool_elt_at_index (file_main.file_pool, \
- __unixfd_idx)->file_descriptor : -1; })
-
-#define foreach_virtio_trace_flags \
- _ (SIMPLE_CHAINED, 0, "Simple descriptor chaining") \
- _ (SINGLE_DESC, 1, "Single descriptor packet") \
- _ (INDIRECT, 2, "Indirect descriptor") \
- _ (MAP_ERROR, 4, "Memory mapping error")
-
-typedef enum
-{
-#define _(n,i,s) VIRTIO_TRACE_F_##n,
- foreach_virtio_trace_flags
-#undef _
-} virtio_trace_flag_t;
-
-#define FEATURE_VIRTIO_NET_F_HOST_TSO_FEATURE_BITS \
- (VIRTIO_FEATURE (VIRTIO_NET_F_CSUM) | \
- VIRTIO_FEATURE (VIRTIO_NET_F_HOST_UFO) | \
- VIRTIO_FEATURE (VIRTIO_NET_F_HOST_TSO4) | \
- VIRTIO_FEATURE (VIRTIO_NET_F_HOST_TSO6))
-
-#define FEATURE_VIRTIO_NET_F_GUEST_TSO_FEATURE_BITS \
- (VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM) | \
- VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_UFO) | \
- VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO4) | \
- VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO6))
-
-#define FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS \
- (FEATURE_VIRTIO_NET_F_HOST_TSO_FEATURE_BITS | \
- FEATURE_VIRTIO_NET_F_GUEST_TSO_FEATURE_BITS)
-
-
-typedef struct
-{
- char *sock_filename;
- u64 feature_mask;
- u32 custom_dev_instance;
- u8 hwaddr[6];
- u8 renumber;
- u8 is_server;
- u8 enable_gso;
- u8 enable_packed;
- u8 enable_event_idx;
- u8 use_custom_mac;
-
- /* return */
- u32 sw_if_index;
-} vhost_user_create_if_args_t;
-
-int vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
- vhost_user_create_if_args_t * args);
-int vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
- vhost_user_create_if_args_t * args);
-int vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm,
- u32 sw_if_index);
-
-/* *INDENT-OFF* */
-typedef struct vhost_user_memory_region
-{
- u64 guest_phys_addr;
- u64 memory_size;
- u64 userspace_addr;
- u64 mmap_offset;
-} __attribute ((packed)) vhost_user_memory_region_t;
-
-typedef struct vhost_user_memory
-{
- u32 nregions;
- u32 padding;
- vhost_user_memory_region_t regions[VHOST_MEMORY_MAX_NREGIONS];
-} __attribute ((packed)) vhost_user_memory_t;
-
-typedef enum vhost_user_req
-{
- VHOST_USER_NONE = 0,
- VHOST_USER_GET_FEATURES = 1,
- VHOST_USER_SET_FEATURES = 2,
- VHOST_USER_SET_OWNER = 3,
- VHOST_USER_RESET_OWNER = 4,
- VHOST_USER_SET_MEM_TABLE = 5,
- VHOST_USER_SET_LOG_BASE = 6,
- VHOST_USER_SET_LOG_FD = 7,
- VHOST_USER_SET_VRING_NUM = 8,
- VHOST_USER_SET_VRING_ADDR = 9,
- VHOST_USER_SET_VRING_BASE = 10,
- VHOST_USER_GET_VRING_BASE = 11,
- VHOST_USER_SET_VRING_KICK = 12,
- VHOST_USER_SET_VRING_CALL = 13,
- VHOST_USER_SET_VRING_ERR = 14,
- VHOST_USER_GET_PROTOCOL_FEATURES = 15,
- VHOST_USER_SET_PROTOCOL_FEATURES = 16,
- VHOST_USER_GET_QUEUE_NUM = 17,
- VHOST_USER_SET_VRING_ENABLE = 18,
- VHOST_USER_MAX
-} vhost_user_req_t;
-
-typedef struct vhost_user_msg {
- vhost_user_req_t request;
- u32 flags;
- u32 size;
- union
- {
- u64 u64;
- vhost_vring_state_t state;
- vhost_vring_addr_t addr;
- vhost_user_memory_t memory;
- vhost_user_log_t log;
- };
-} __attribute ((packed)) vhost_user_msg_t;
-/* *INDENT-ON* */
-
-typedef struct
-{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- u16 qsz_mask;
- u16 last_avail_idx;
- u16 last_used_idx;
- u16 n_since_last_int;
- union
- {
- vnet_virtio_vring_desc_t *desc;
- vnet_virtio_vring_packed_desc_t *packed_desc;
- };
- union
- {
- vnet_virtio_vring_avail_t *avail;
- vnet_virtio_vring_desc_event_t *avail_event;
- };
- union
- {
- vnet_virtio_vring_used_t *used;
- vnet_virtio_vring_desc_event_t *used_event;
- };
- uword desc_user_addr;
- uword used_user_addr;
- uword avail_user_addr;
- f64 int_deadline;
- u8 started;
- u8 enabled;
- u8 log_used;
- clib_spinlock_t vring_lock;
-
- //Put non-runtime in a different cache line
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
- int errfd;
- u32 callfd_idx;
- u32 kickfd_idx;
- u64 log_guest_addr;
-
- /* The rx queue policy (interrupt/adaptive/polling) for this queue */
- u32 mode;
-
- /*
- * It contains the device queue number. -1 if it does not. The idea is
- * to not invoke vnet_hw_interface_assign_rx_thread and
- * vnet_hw_interface_unassign_rx_thread more than once for the duration of
- * the interface even if it is disconnected and reconnected.
- */
- i16 qid;
-
- u16 used_wrap_counter;
- u16 avail_wrap_counter;
- u16 last_kick;
- u8 first_kick;
- u32 queue_index;
- u32 thread_index;
-} vhost_user_vring_t;
-
-#define VHOST_USER_EVENT_START_TIMER 1
-#define VHOST_USER_EVENT_STOP_TIMER 2
-
-typedef struct
-{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- u32 is_ready;
- u32 admin_up;
- u32 unix_server_index;
- u32 clib_file_index;
- char sock_filename[256];
- int sock_errno;
- uword if_index;
- u32 hw_if_index, sw_if_index;
-
- //Feature negotiation
- u64 features;
- u64 feature_mask;
- u64 protocol_features;
-
- //Memory region information
- u32 nregions;
- vhost_user_memory_region_t regions[VHOST_MEMORY_MAX_NREGIONS];
- void *region_mmap_addr[VHOST_MEMORY_MAX_NREGIONS];
- u64 region_guest_addr_lo[VHOST_MEMORY_MAX_NREGIONS];
- u64 region_guest_addr_hi[VHOST_MEMORY_MAX_NREGIONS];
- u32 region_mmap_fd[VHOST_MEMORY_MAX_NREGIONS];
-
- //Virtual rings
- vhost_user_vring_t *vrings;
-
- /*
- * vrings is a dynamic array. It may have more elements than it is
- * currently used. num_qid indicates the current total qid's in the
- * vrings. For example, vec_len(vrings) = 64, num_qid = 60, so the
- * current valid/used qid is (0, 59) in the vrings array.
- */
- u32 num_qid;
-
- int virtio_net_hdr_sz;
- int is_any_layout;
-
- void *log_base_addr;
- u64 log_size;
-
- u8 enable_gso;
-
- /* Packed ring configured */
- u8 enable_packed;
-
- u8 enable_event_idx;
-} vhost_user_intf_t;
-
-#define FOR_ALL_VHOST_TXQ(qid, vui) for (qid = 1; qid < vui->num_qid; qid += 2)
-
-#define FOR_ALL_VHOST_RXQ(qid, vui) for (qid = 0; qid < vui->num_qid; qid += 2)
-
-#define FOR_ALL_VHOST_RX_TXQ(qid, vui) for (qid = 0; qid < vui->num_qid; qid++)
-
-typedef struct
-{
- uword dst;
- uword src;
- u32 len;
-} vhost_copy_t;
-
-typedef struct
-{
- u16 qid; /** The interface queue index (Not the virtio vring idx) */
- u16 device_index; /** The device index */
- u32 virtio_ring_flags; /** Runtime queue flags **/
- u16 first_desc_len; /** Length of the first data descriptor **/
- vnet_virtio_net_hdr_mrg_rxbuf_t hdr; /** Virtio header **/
-} vhost_trace_t;
-
-#define VHOST_USER_RX_BUFFERS_N (2 * VLIB_FRAME_SIZE + 2)
-#define VHOST_USER_COPY_ARRAY_N (4 * VLIB_FRAME_SIZE)
-
-typedef struct
-{
- u32 rx_buffers_len;
- u32 rx_buffers[VHOST_USER_RX_BUFFERS_N];
-
- vnet_virtio_net_hdr_mrg_rxbuf_t tx_headers[VLIB_FRAME_SIZE];
- vhost_copy_t copy[VHOST_USER_COPY_ARRAY_N];
-
- /* This is here so it doesn't end-up
- * using stack or registers. */
- vhost_trace_t *current_trace;
-
- u32 *to_next_list;
- vlib_buffer_t **rx_buffers_pdesc;
- u32 polling_q_count;
-} vhost_cpu_t;
-
-typedef struct
-{
- mhash_t if_index_by_sock_name;
- u32 mtu_bytes;
- vhost_user_intf_t *vhost_user_interfaces;
- u32 *show_dev_instance_by_real_dev_instance;
- u32 coalesce_frames;
- f64 coalesce_time;
- int dont_dump_vhost_user_memory;
-
- /** Per-CPU data for vhost-user */
- vhost_cpu_t *cpus;
-
- /** Pseudo random iterator */
- u32 random;
-
- /* The number of rx interface/queue pairs in interrupt mode */
- u32 ifq_count;
-
- /* logging */
- vlib_log_class_t log_default;
-
- /* gso interface count */
- u32 gso_count;
-} vhost_user_main_t;
-
-typedef struct
-{
- u8 if_name[64];
- u32 sw_if_index;
- u32 virtio_net_hdr_sz;
- u64 features;
- u8 is_server;
- u8 sock_filename[256];
- u32 num_regions;
- int sock_errno;
-} vhost_user_intf_details_t;
-
-int vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
- vhost_user_intf_details_t ** out_vuids);
-void vhost_user_set_operation_mode (vhost_user_intf_t *vui,
- vhost_user_vring_t *txvq);
-
-extern vlib_node_registration_t vhost_user_send_interrupt_node;
-extern vnet_device_class_t vhost_user_device_class;
-extern vlib_node_registration_t vhost_user_input_node;
-extern vhost_user_main_t vhost_user_main;
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/virtio/vhost_user_api.c b/src/vnet/devices/virtio/vhost_user_api.c
deleted file mode 100644
index cc1896b108a..00000000000
--- a/src/vnet/devices/virtio/vhost_user_api.c
+++ /dev/null
@@ -1,352 +0,0 @@
-/*
- *------------------------------------------------------------------
- * vhost-user_api.c - vhost-user api
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <vnet/vnet.h>
-#include <vlibmemory/api.h>
-
-#include <vnet/interface.h>
-#include <vnet/api_errno.h>
-#include <vnet/devices/virtio/vhost_user.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/ethernet/ethernet_types_api.h>
-#include <vnet/devices/virtio/virtio_types_api.h>
-
-#include <vnet/format_fns.h>
-#include <vnet/devices/virtio/vhost_user.api_enum.h>
-#include <vnet/devices/virtio/vhost_user.api_types.h>
-
-#define REPLY_MSG_ID_BASE msg_id_base
-#include <vlibapi/api_helper_macros.h>
-
-static u16 msg_id_base;
-
-static void
-vl_api_create_vhost_user_if_t_handler (vl_api_create_vhost_user_if_t * mp)
-{
- int rv = 0;
- vl_api_create_vhost_user_if_reply_t *rmp;
- vnet_main_t *vnm = vnet_get_main ();
- vlib_main_t *vm = vlib_get_main ();
- u64 disabled_features = (u64) (0ULL);
- vhost_user_create_if_args_t args = { 0 };
-
- args.sw_if_index = (u32) ~ 0;
- args.feature_mask = (u64) ~ (0ULL);
- if (mp->disable_mrg_rxbuf)
- disabled_features = VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF);
-
- if (mp->disable_indirect_desc)
- disabled_features |= VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC);
-
- /*
- * GSO and PACKED are not supported by feature mask via binary API. We
- * disable GSO and PACKED feature in the feature mask. They may be enabled
- * explicitly via enable_gso and enable_packed argument
- */
- disabled_features |= FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS |
- VIRTIO_FEATURE (VIRTIO_F_RING_PACKED);
-
- /* EVENT_IDX is disabled by default */
- disabled_features |= VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX);
- args.feature_mask &= ~disabled_features;
-
- if (mp->use_custom_mac)
- mac_address_decode (mp->mac_address, (mac_address_t *) args.hwaddr);
-
- args.use_custom_mac = mp->use_custom_mac;
- args.is_server = mp->is_server;
- args.sock_filename = (char *) mp->sock_filename;
- args.renumber = mp->renumber;
- args.custom_dev_instance = ntohl (mp->custom_dev_instance);
- args.enable_gso = mp->enable_gso;
- args.enable_packed = mp->enable_packed;
- rv = vhost_user_create_if (vnm, vm, &args);
-
- /* Remember an interface tag for the new interface */
- if (rv == 0)
- {
- /* If a tag was supplied... */
- if (mp->tag[0])
- {
- /* Make sure it's a proper C-string */
- mp->tag[ARRAY_LEN (mp->tag) - 1] = 0;
- u8 *tag = format (0, "%s%c", mp->tag, 0);
- vnet_set_sw_interface_tag (vnm, tag, args.sw_if_index);
- }
- }
-
- /* *INDENT-OFF* */
- REPLY_MACRO2(VL_API_CREATE_VHOST_USER_IF_REPLY,
- ({
- rmp->sw_if_index = ntohl (args.sw_if_index);
- }));
- /* *INDENT-ON* */
-}
-
-static void
-vl_api_modify_vhost_user_if_t_handler (vl_api_modify_vhost_user_if_t * mp)
-{
- int rv = 0;
- vl_api_modify_vhost_user_if_reply_t *rmp;
- u64 disabled_features = (u64) (0ULL);
- vhost_user_create_if_args_t args = { 0 };
- vnet_main_t *vnm = vnet_get_main ();
- vlib_main_t *vm = vlib_get_main ();
-
- args.feature_mask = (u64) ~ (0ULL);
- /*
- * GSO and PACKED are not supported by feature mask via binary API. We
- * disable GSO and PACKED feature in the feature mask. They may be enabled
- * explicitly via enable_gso and enable_packed argument
- */
- disabled_features |= FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS |
- VIRTIO_FEATURE (VIRTIO_F_RING_PACKED);
-
- /* EVENT_IDX is disabled by default */
- disabled_features |= VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX);
- args.feature_mask &= ~disabled_features;
-
- args.sw_if_index = ntohl (mp->sw_if_index);
- args.sock_filename = (char *) mp->sock_filename;
- args.is_server = mp->is_server;
- args.renumber = mp->renumber;
- args.custom_dev_instance = ntohl (mp->custom_dev_instance);
- args.enable_gso = mp->enable_gso;
- args.enable_packed = mp->enable_packed;
- rv = vhost_user_modify_if (vnm, vm, &args);
-
- REPLY_MACRO (VL_API_MODIFY_VHOST_USER_IF_REPLY);
-}
-
-static void
-vl_api_create_vhost_user_if_v2_t_handler (vl_api_create_vhost_user_if_v2_t *
- mp)
-{
- int rv = 0;
- vl_api_create_vhost_user_if_v2_reply_t *rmp;
- vnet_main_t *vnm = vnet_get_main ();
- vlib_main_t *vm = vlib_get_main ();
- u64 disabled_features = (u64) (0ULL);
- vhost_user_create_if_args_t args = { 0 };
-
- args.sw_if_index = (u32) ~ 0;
- args.feature_mask = (u64) ~ (0ULL);
- if (mp->disable_mrg_rxbuf)
- disabled_features = VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF);
-
- if (mp->disable_indirect_desc)
- disabled_features |= VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC);
-
- /*
- * GSO and PACKED are not supported by feature mask via binary API. We
- * disable GSO and PACKED feature in the feature mask. They may be enabled
- * explicitly via enable_gso and enable_packed argument
- */
- disabled_features |= FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS |
- VIRTIO_FEATURE (VIRTIO_F_RING_PACKED);
-
- /* EVENT_IDX is disabled by default */
- disabled_features |= VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX);
- args.feature_mask &= ~disabled_features;
-
- if (mp->use_custom_mac)
- mac_address_decode (mp->mac_address, (mac_address_t *) args.hwaddr);
-
- args.use_custom_mac = mp->use_custom_mac;
- args.is_server = mp->is_server;
- args.sock_filename = (char *) mp->sock_filename;
- args.renumber = mp->renumber;
- args.custom_dev_instance = ntohl (mp->custom_dev_instance);
- args.enable_gso = mp->enable_gso;
- args.enable_packed = mp->enable_packed;
- args.enable_event_idx = mp->enable_event_idx;
- rv = vhost_user_create_if (vnm, vm, &args);
-
- /* Remember an interface tag for the new interface */
- if (rv == 0)
- {
- /* If a tag was supplied... */
- if (mp->tag[0])
- {
- /* Make sure it's a proper C-string */
- mp->tag[ARRAY_LEN (mp->tag) - 1] = 0;
- u8 *tag = format (0, "%s%c", mp->tag, 0);
- vnet_set_sw_interface_tag (vnm, tag, args.sw_if_index);
- }
- }
-
- /* *INDENT-OFF* */
- REPLY_MACRO2(VL_API_CREATE_VHOST_USER_IF_V2_REPLY,
- ({
- rmp->sw_if_index = ntohl (args.sw_if_index);
- }));
- /* *INDENT-ON* */
-}
-
-static void
-vl_api_modify_vhost_user_if_v2_t_handler (vl_api_modify_vhost_user_if_v2_t *
- mp)
-{
- int rv = 0;
- vl_api_modify_vhost_user_if_v2_reply_t *rmp;
- u64 disabled_features = (u64) (0ULL);
- vhost_user_create_if_args_t args = { 0 };
- vnet_main_t *vnm = vnet_get_main ();
- vlib_main_t *vm = vlib_get_main ();
-
- args.feature_mask = (u64) ~ (0ULL);
- /*
- * GSO and PACKED are not supported by feature mask via binary API. We
- * disable GSO and PACKED feature in the feature mask. They may be enabled
- * explicitly via enable_gso and enable_packed argument
- */
- disabled_features |= FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS |
- VIRTIO_FEATURE (VIRTIO_F_RING_PACKED);
-
- /* EVENT_IDX is disabled by default */
- disabled_features |= VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX);
- args.feature_mask &= ~disabled_features;
-
- args.sw_if_index = ntohl (mp->sw_if_index);
- args.sock_filename = (char *) mp->sock_filename;
- args.is_server = mp->is_server;
- args.renumber = mp->renumber;
- args.custom_dev_instance = ntohl (mp->custom_dev_instance);
- args.enable_gso = mp->enable_gso;
- args.enable_packed = mp->enable_packed;
- args.enable_event_idx = mp->enable_event_idx;
- rv = vhost_user_modify_if (vnm, vm, &args);
-
- REPLY_MACRO (VL_API_MODIFY_VHOST_USER_IF_V2_REPLY);
-}
-
-static void
-vl_api_delete_vhost_user_if_t_handler (vl_api_delete_vhost_user_if_t * mp)
-{
- int rv = 0;
- vl_api_delete_vhost_user_if_reply_t *rmp;
- u32 sw_if_index = ntohl (mp->sw_if_index);
- vl_api_registration_t *reg;
-
- vnet_main_t *vnm = vnet_get_main ();
- vlib_main_t *vm = vlib_get_main ();
-
- rv = vhost_user_delete_if (vnm, vm, sw_if_index);
-
- REPLY_MACRO (VL_API_DELETE_VHOST_USER_IF_REPLY);
- if (!rv)
- {
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- vnet_clear_sw_interface_tag (vnm, sw_if_index);
- }
-}
-
-static void
-send_sw_interface_vhost_user_details (vpe_api_main_t * am,
- vl_api_registration_t * reg,
- vhost_user_intf_details_t * vui,
- u32 context)
-{
- vl_api_sw_interface_vhost_user_details_t *mp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id =
- ntohs (REPLY_MSG_ID_BASE + VL_API_SW_INTERFACE_VHOST_USER_DETAILS);
- mp->sw_if_index = ntohl (vui->sw_if_index);
- mp->virtio_net_hdr_sz = ntohl (vui->virtio_net_hdr_sz);
- virtio_features_encode (vui->features, (u32 *) & mp->features_first_32,
- (u32 *) & mp->features_last_32);
- mp->is_server = vui->is_server;
- mp->num_regions = ntohl (vui->num_regions);
- mp->sock_errno = ntohl (vui->sock_errno);
- mp->context = context;
-
- strncpy ((char *) mp->sock_filename,
- (char *) vui->sock_filename, ARRAY_LEN (mp->sock_filename) - 1);
- strncpy ((char *) mp->interface_name,
- (char *) vui->if_name, ARRAY_LEN (mp->interface_name) - 1);
-
- vl_api_send_msg (reg, (u8 *) mp);
-}
-
-static void
- vl_api_sw_interface_vhost_user_dump_t_handler
- (vl_api_sw_interface_vhost_user_dump_t * mp)
-{
- int rv = 0;
- vpe_api_main_t *am = &vpe_api_main;
- vnet_main_t *vnm = vnet_get_main ();
- vlib_main_t *vm = vlib_get_main ();
- vhost_user_intf_details_t *ifaces = NULL;
- vhost_user_intf_details_t *vuid = NULL;
- vl_api_registration_t *reg;
- u32 filter_sw_if_index;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- filter_sw_if_index = htonl (mp->sw_if_index);
- if (filter_sw_if_index != ~0)
- VALIDATE_SW_IF_INDEX (mp);
-
- rv = vhost_user_dump_ifs (vnm, vm, &ifaces);
- if (rv)
- return;
-
- vec_foreach (vuid, ifaces)
- {
- if ((filter_sw_if_index == ~0) ||
- (vuid->sw_if_index == filter_sw_if_index))
- send_sw_interface_vhost_user_details (am, reg, vuid, mp->context);
- }
- BAD_SW_IF_INDEX_LABEL;
- vec_free (ifaces);
-}
-
-#include <vnet/devices/virtio/vhost_user.api.c>
-static clib_error_t *
-vhost_user_api_hookup (vlib_main_t * vm)
-{
- api_main_t *am = vlibapi_get_main ();
- /* Mark CREATE_VHOST_USER_IF as mp safe */
- vl_api_set_msg_thread_safe (am, VL_API_CREATE_VHOST_USER_IF, 1);
- vl_api_set_msg_thread_safe (am, VL_API_CREATE_VHOST_USER_IF_V2, 1);
-
- /*
- * Set up the (msg_name, crc, message-id) table
- */
- REPLY_MSG_ID_BASE = setup_message_id_table ();
-
- return 0;
-}
-
-VLIB_API_INIT_FUNCTION (vhost_user_api_hookup);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/virtio/vhost_user_inline.h b/src/vnet/devices/virtio/vhost_user_inline.h
deleted file mode 100644
index 8bdff3733a7..00000000000
--- a/src/vnet/devices/virtio/vhost_user_inline.h
+++ /dev/null
@@ -1,496 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __VIRTIO_VHOST_USER_INLINE_H__
-#define __VIRTIO_VHOST_USER_INLINE_H__
-/* vhost-user inline functions */
-#include <vppinfra/elog.h>
-
-static_always_inline void *
-map_guest_mem (vhost_user_intf_t * vui, uword addr, u32 * hint)
-{
- int i = *hint;
- if (PREDICT_TRUE ((vui->regions[i].guest_phys_addr <= addr) &&
- ((vui->regions[i].guest_phys_addr +
- vui->regions[i].memory_size) > addr)))
- {
- return (void *) (vui->region_mmap_addr[i] + addr -
- vui->regions[i].guest_phys_addr);
- }
-#if __SSE4_2__
- __m128i rl, rh, al, ah, r;
- al = _mm_set1_epi64x (addr + 1);
- ah = _mm_set1_epi64x (addr);
-
- rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[0]);
- rl = _mm_cmpgt_epi64 (al, rl);
- rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[0]);
- rh = _mm_cmpgt_epi64 (rh, ah);
- r = _mm_and_si128 (rl, rh);
-
- rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[2]);
- rl = _mm_cmpgt_epi64 (al, rl);
- rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[2]);
- rh = _mm_cmpgt_epi64 (rh, ah);
- r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x22);
-
- rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[4]);
- rl = _mm_cmpgt_epi64 (al, rl);
- rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[4]);
- rh = _mm_cmpgt_epi64 (rh, ah);
- r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x44);
-
- rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[6]);
- rl = _mm_cmpgt_epi64 (al, rl);
- rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[6]);
- rh = _mm_cmpgt_epi64 (rh, ah);
- r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x88);
-
- r = _mm_shuffle_epi8 (r, _mm_set_epi64x (0, 0x0e060c040a020800));
- i = count_trailing_zeros (_mm_movemask_epi8 (r) |
- (1 << VHOST_MEMORY_MAX_NREGIONS));
-
- if (i < vui->nregions)
- {
- *hint = i;
- return (void *) (vui->region_mmap_addr[i] + addr -
- vui->regions[i].guest_phys_addr);
- }
-#elif __aarch64__ && __ARM_NEON
- uint64x2_t al, ah, rl, rh, r;
- uint32_t u32 = 0;
-
- al = vdupq_n_u64 (addr + 1);
- ah = vdupq_n_u64 (addr);
-
- /*First Iteration */
- rl = vld1q_u64 (&vui->region_guest_addr_lo[0]);
- rl = vcgtq_u64 (al, rl);
- rh = vld1q_u64 (&vui->region_guest_addr_hi[0]);
- rh = vcgtq_u64 (rh, ah);
- r = vandq_u64 (rl, rh);
- u32 |= (vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1);
- u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 1);
-
- if (u32)
- {
- i = count_trailing_zeros (u32);
- goto vhost_map_guest_mem_done;
- }
-
- /*Second Iteration */
- rl = vld1q_u64 (&vui->region_guest_addr_lo[2]);
- rl = vcgtq_u64 (al, rl);
- rh = vld1q_u64 (&vui->region_guest_addr_hi[2]);
- rh = vcgtq_u64 (rh, ah);
- r = vandq_u64 (rl, rh);
- u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 2);
- u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 3);
-
- if (u32)
- {
- i = count_trailing_zeros (u32);
- goto vhost_map_guest_mem_done;
- }
-
- /*Third Iteration */
- rl = vld1q_u64 (&vui->region_guest_addr_lo[4]);
- rl = vcgtq_u64 (al, rl);
- rh = vld1q_u64 (&vui->region_guest_addr_hi[4]);
- rh = vcgtq_u64 (rh, ah);
- r = vandq_u64 (rl, rh);
- u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 6);
- u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 7);
-
- i = count_trailing_zeros (u32 | (1 << VHOST_MEMORY_MAX_NREGIONS));
-
-vhost_map_guest_mem_done:
- if (i < vui->nregions)
- {
- *hint = i;
- return (void *) (vui->region_mmap_addr[i] + addr -
- vui->regions[i].guest_phys_addr);
- }
-#else
- for (i = 0; i < vui->nregions; i++)
- {
- if ((vui->regions[i].guest_phys_addr <= addr) &&
- ((vui->regions[i].guest_phys_addr + vui->regions[i].memory_size) >
- addr))
- {
- *hint = i;
- return (void *) (vui->region_mmap_addr[i] + addr -
- vui->regions[i].guest_phys_addr);
- }
- }
-#endif
- /* *INDENT-OFF* */
- ELOG_TYPE_DECLARE (el) =
- {
- .format = "failed to map guest mem addr %lx",
- .format_args = "i8",
- };
- /* *INDENT-ON* */
- struct
- {
- uword addr;
- } *ed;
- ed = ELOG_DATA (&vlib_global_main.elog_main, el);
- ed->addr = addr;
- *hint = 0;
- return 0;
-}
-
-static_always_inline void *
-map_user_mem (vhost_user_intf_t * vui, uword addr)
-{
- int i;
- for (i = 0; i < vui->nregions; i++)
- {
- if ((vui->regions[i].userspace_addr <= addr) &&
- ((vui->regions[i].userspace_addr + vui->regions[i].memory_size) >
- addr))
- {
- return (void *) (vui->region_mmap_addr[i] + addr -
- vui->regions[i].userspace_addr);
- }
- }
- return 0;
-}
-
-#define VHOST_LOG_PAGE 0x1000
-
-static_always_inline void
-vhost_user_log_dirty_pages_2 (vhost_user_intf_t * vui,
- u64 addr, u64 len, u8 is_host_address)
-{
- if (PREDICT_TRUE (vui->log_base_addr == 0
- || !(vui->features & VIRTIO_FEATURE (VHOST_F_LOG_ALL))))
- {
- return;
- }
- if (is_host_address)
- {
- addr = pointer_to_uword (map_user_mem (vui, (uword) addr));
- }
- if (PREDICT_FALSE ((addr + len - 1) / VHOST_LOG_PAGE / 8 >= vui->log_size))
- {
- vu_log_debug (vui, "vhost_user_log_dirty_pages(): out of range\n");
- return;
- }
-
- CLIB_MEMORY_BARRIER ();
- u64 page = addr / VHOST_LOG_PAGE;
- while (page * VHOST_LOG_PAGE < addr + len)
- {
- ((u8 *) vui->log_base_addr)[page / 8] |= 1 << page % 8;
- page++;
- }
-}
-
-#define vhost_user_log_dirty_ring(vui, vq, member) \
- if (PREDICT_FALSE (vq->log_used)) \
- { \
- vhost_user_log_dirty_pages_2 ( \
- vui, \
- vq->log_guest_addr + \
- STRUCT_OFFSET_OF (vnet_virtio_vring_used_t, member), \
- sizeof (vq->used->member), 0); \
- }
-
-static_always_inline u8 *
-format_vhost_trace (u8 * s, va_list * va)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
- CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main ();
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_trace_t *t = va_arg (*va, vhost_trace_t *);
- vhost_user_intf_t *vui = vum->vhost_user_interfaces + t->device_index;
- vnet_sw_interface_t *sw;
- u32 indent;
-
- if (pool_is_free (vum->vhost_user_interfaces, vui))
- {
- s = format (s, "vhost-user interface is deleted");
- return s;
- }
- sw = vnet_get_sw_interface (vnm, vui->sw_if_index);
- indent = format_get_indent (s);
- s = format (s, "%U %U queue %d\n", format_white_space, indent,
- format_vnet_sw_interface_name, vnm, sw, t->qid);
-
- s = format (s, "%U virtio flags:\n", format_white_space, indent);
-#define _(n,i,st) \
- if (t->virtio_ring_flags & (1 << VIRTIO_TRACE_F_##n)) \
- s = format (s, "%U %s %s\n", format_white_space, indent, #n, st);
- foreach_virtio_trace_flags
-#undef _
- s = format (s, "%U virtio_net_hdr first_desc_len %u\n",
- format_white_space, indent, t->first_desc_len);
-
- s = format (s, "%U flags 0x%02x gso_type %u\n",
- format_white_space, indent,
- t->hdr.hdr.flags, t->hdr.hdr.gso_type);
-
- if (vui->virtio_net_hdr_sz == 12)
- s = format (s, "%U num_buff %u",
- format_white_space, indent, t->hdr.num_buffers);
-
- return s;
-}
-
-static_always_inline u64
-vhost_user_is_packed_ring_supported (vhost_user_intf_t * vui)
-{
- return (vui->features & VIRTIO_FEATURE (VIRTIO_F_RING_PACKED));
-}
-
-static_always_inline u64
-vhost_user_is_event_idx_supported (vhost_user_intf_t * vui)
-{
- return (vui->features & VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX));
-}
-
-static_always_inline void
-vhost_user_kick (vlib_main_t * vm, vhost_user_vring_t * vq)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- u64 x = 1;
- int fd = UNIX_GET_FD (vq->callfd_idx);
- int rv;
-
- rv = write (fd, &x, sizeof (x));
- if (PREDICT_FALSE (rv <= 0))
- {
- clib_unix_warning
- ("Error: Could not write to unix socket for callfd %d", fd);
- return;
- }
-
- vq->n_since_last_int = 0;
- vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time;
-}
-
-static_always_inline u16
-vhost_user_avail_event_idx (vhost_user_vring_t * vq)
-{
- volatile u16 *event_idx = (u16 *) & (vq->used->ring[vq->qsz_mask + 1]);
-
- return *event_idx;
-}
-
-static_always_inline u16
-vhost_user_used_event_idx (vhost_user_vring_t * vq)
-{
- volatile u16 *event_idx = (u16 *) & (vq->avail->ring[vq->qsz_mask + 1]);
-
- return *event_idx;
-}
-
-static_always_inline u16
-vhost_user_need_event (u16 event_idx, u16 new_idx, u16 old_idx)
-{
- return ((u16) (new_idx - event_idx - 1) < (u16) (new_idx - old_idx));
-}
-
-static_always_inline void
-vhost_user_send_call_event_idx (vlib_main_t * vm, vhost_user_vring_t * vq)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- u8 first_kick = vq->first_kick;
- u16 event_idx = vhost_user_used_event_idx (vq);
-
- vq->first_kick = 1;
- if (vhost_user_need_event (event_idx, vq->last_used_idx, vq->last_kick) ||
- PREDICT_FALSE (!first_kick))
- {
- vhost_user_kick (vm, vq);
- vq->last_kick = event_idx;
- }
- else
- {
- vq->n_since_last_int = 0;
- vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time;
- }
-}
-
-static_always_inline void
-vhost_user_send_call_event_idx_packed (vlib_main_t * vm,
- vhost_user_vring_t * vq)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- u8 first_kick = vq->first_kick;
- u16 off_wrap;
- u16 event_idx;
- u16 new_idx = vq->last_used_idx;
- u16 old_idx = vq->last_kick;
-
- if (PREDICT_TRUE (vq->avail_event->flags == VRING_EVENT_F_DESC))
- {
- CLIB_COMPILER_BARRIER ();
- off_wrap = vq->avail_event->off_wrap;
- event_idx = off_wrap & 0x7fff;
- if (vq->used_wrap_counter != (off_wrap >> 15))
- event_idx -= (vq->qsz_mask + 1);
-
- if (new_idx <= old_idx)
- old_idx -= (vq->qsz_mask + 1);
-
- vq->first_kick = 1;
- vq->last_kick = event_idx;
- if (vhost_user_need_event (event_idx, new_idx, old_idx) ||
- PREDICT_FALSE (!first_kick))
- vhost_user_kick (vm, vq);
- else
- {
- vq->n_since_last_int = 0;
- vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time;
- }
- }
- else
- vhost_user_kick (vm, vq);
-}
-
-static_always_inline void
-vhost_user_send_call (vlib_main_t * vm, vhost_user_intf_t * vui,
- vhost_user_vring_t * vq)
-{
- if (vhost_user_is_event_idx_supported (vui))
- {
- if (vhost_user_is_packed_ring_supported (vui))
- vhost_user_send_call_event_idx_packed (vm, vq);
- else
- vhost_user_send_call_event_idx (vm, vq);
- }
- else
- vhost_user_kick (vm, vq);
-}
-
-static_always_inline u8
-vui_is_link_up (vhost_user_intf_t * vui)
-{
- return vui->admin_up && vui->is_ready;
-}
-
-static_always_inline void
-vhost_user_update_gso_interface_count (vhost_user_intf_t * vui, u8 add)
-{
- vhost_user_main_t *vum = &vhost_user_main;
-
- if (vui->enable_gso)
- {
- if (add)
- {
- vum->gso_count++;
- }
- else
- {
- ASSERT (vum->gso_count > 0);
- vum->gso_count--;
- }
- }
-}
-
-static_always_inline u8
-vhost_user_packed_desc_available (vhost_user_vring_t * vring, u16 idx)
-{
- return (((vring->packed_desc[idx].flags & VRING_DESC_F_AVAIL) ==
- vring->avail_wrap_counter));
-}
-
-static_always_inline void
-vhost_user_advance_last_avail_idx (vhost_user_vring_t * vring)
-{
- vring->last_avail_idx++;
- if (PREDICT_FALSE ((vring->last_avail_idx & vring->qsz_mask) == 0))
- {
- vring->avail_wrap_counter ^= VRING_DESC_F_AVAIL;
- vring->last_avail_idx = 0;
- }
-}
-
-static_always_inline void
-vhost_user_advance_last_avail_table_idx (vhost_user_intf_t * vui,
- vhost_user_vring_t * vring,
- u8 chained)
-{
- if (chained)
- {
- vnet_virtio_vring_packed_desc_t *desc_table = vring->packed_desc;
-
- /* pick up the slot of the next avail idx */
- while (desc_table[vring->last_avail_idx & vring->qsz_mask].flags &
- VRING_DESC_F_NEXT)
- vhost_user_advance_last_avail_idx (vring);
- }
-
- vhost_user_advance_last_avail_idx (vring);
-}
-
-static_always_inline void
-vhost_user_undo_advanced_last_avail_idx (vhost_user_vring_t * vring)
-{
- if (PREDICT_FALSE ((vring->last_avail_idx & vring->qsz_mask) == 0))
- vring->avail_wrap_counter ^= VRING_DESC_F_AVAIL;
-
- if (PREDICT_FALSE (vring->last_avail_idx == 0))
- vring->last_avail_idx = vring->qsz_mask;
- else
- vring->last_avail_idx--;
-}
-
-static_always_inline void
-vhost_user_dequeue_descs (vhost_user_vring_t *rxvq,
- vnet_virtio_net_hdr_mrg_rxbuf_t *hdr,
- u16 *n_descs_processed)
-{
- u16 i;
-
- *n_descs_processed -= (hdr->num_buffers - 1);
- for (i = 0; i < hdr->num_buffers - 1; i++)
- vhost_user_undo_advanced_last_avail_idx (rxvq);
-}
-
-static_always_inline void
-vhost_user_dequeue_chained_descs (vhost_user_vring_t * rxvq,
- u16 * n_descs_processed)
-{
- while (*n_descs_processed)
- {
- vhost_user_undo_advanced_last_avail_idx (rxvq);
- (*n_descs_processed)--;
- }
-}
-
-static_always_inline void
-vhost_user_advance_last_used_idx (vhost_user_vring_t * vring)
-{
- vring->last_used_idx++;
- if (PREDICT_FALSE ((vring->last_used_idx & vring->qsz_mask) == 0))
- {
- vring->used_wrap_counter ^= 1;
- vring->last_used_idx = 0;
- }
-}
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/virtio/vhost_user_input.c b/src/vnet/devices/virtio/vhost_user_input.c
deleted file mode 100644
index 841a9798212..00000000000
--- a/src/vnet/devices/virtio/vhost_user_input.c
+++ /dev/null
@@ -1,1474 +0,0 @@
-/*
- *------------------------------------------------------------------
- * vhost-user-input
- *
- * Copyright (c) 2014-2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <fcntl.h> /* for open */
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/uio.h> /* for iovec */
-#include <netinet/in.h>
-#include <sys/vfs.h>
-
-#include <linux/if_arp.h>
-#include <linux/if_tun.h>
-
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/devices/devices.h>
-#include <vnet/feature/feature.h>
-#include <vnet/udp/udp_packet.h>
-#include <vnet/tcp/tcp_packet.h>
-#include <vnet/interface/rx_queue_funcs.h>
-
-#include <vnet/devices/virtio/vhost_user.h>
-#include <vnet/devices/virtio/vhost_user_inline.h>
-
-#include <vnet/ip/ip4_packet.h>
-#include <vnet/ip/ip6_packet.h>
-
-/*
- * When an RX queue is down but active, received packets
- * must be discarded. This value controls up to how many
- * packets will be discarded during each round.
- */
-#define VHOST_USER_DOWN_DISCARD_COUNT 256
-
-/*
- * When the number of available buffers gets under this threshold,
- * RX node will start discarding packets.
- */
-#define VHOST_USER_RX_BUFFER_STARVATION 32
-
-/*
- * On the receive side, the host should free descriptors as soon
- * as possible in order to avoid TX drop in the VM.
- * This value controls the number of copy operations that are stacked
- * before copy is done for all and descriptors are given back to
- * the guest.
- * The value 64 was obtained by testing (48 and 128 were not as good).
- */
-#define VHOST_USER_RX_COPY_THRESHOLD 64
-
-extern vlib_node_registration_t vhost_user_input_node;
-
-#define foreach_vhost_user_input_func_error \
- _(NO_ERROR, "no error") \
- _(NO_BUFFER, "no available buffer") \
- _(MMAP_FAIL, "mmap failure") \
- _(INDIRECT_OVERFLOW, "indirect descriptor overflows table") \
- _(UNDERSIZED_FRAME, "undersized ethernet frame received (< 14 bytes)") \
- _(NOT_READY, "vhost interface not ready or down") \
- _(FULL_RX_QUEUE, "full rx queue (possible driver tx drop)")
-
-typedef enum
-{
-#define _(f,s) VHOST_USER_INPUT_FUNC_ERROR_##f,
- foreach_vhost_user_input_func_error
-#undef _
- VHOST_USER_INPUT_FUNC_N_ERROR,
-} vhost_user_input_func_error_t;
-
-static __clib_unused char *vhost_user_input_func_error_strings[] = {
-#define _(n,s) s,
- foreach_vhost_user_input_func_error
-#undef _
-};
-
-static_always_inline void
-vhost_user_rx_trace (vhost_trace_t * t,
- vhost_user_intf_t * vui, u16 qid,
- vlib_buffer_t * b, vhost_user_vring_t * txvq,
- u16 last_avail_idx)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- u32 desc_current = txvq->avail->ring[last_avail_idx & txvq->qsz_mask];
- vnet_virtio_vring_desc_t *hdr_desc = 0;
- vnet_virtio_net_hdr_mrg_rxbuf_t *hdr;
- u32 hint = 0;
-
- clib_memset (t, 0, sizeof (*t));
- t->device_index = vui - vum->vhost_user_interfaces;
- t->qid = qid;
-
- hdr_desc = &txvq->desc[desc_current];
- if (txvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT)
- {
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
- /* Header is the first here */
- hdr_desc = map_guest_mem (vui, txvq->desc[desc_current].addr, &hint);
- }
- if (txvq->desc[desc_current].flags & VRING_DESC_F_NEXT)
- {
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
- }
- if (!(txvq->desc[desc_current].flags & VRING_DESC_F_NEXT) &&
- !(txvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT))
- {
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
- }
-
- t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
-
- if (!hdr_desc || !(hdr = map_guest_mem (vui, hdr_desc->addr, &hint)))
- {
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_MAP_ERROR;
- }
- else
- {
- u32 len = vui->virtio_net_hdr_sz;
- memcpy (&t->hdr, hdr, len > hdr_desc->len ? hdr_desc->len : len);
- }
-}
-
-static_always_inline u32
-vhost_user_input_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy,
- u16 copy_len, u32 * map_hint)
-{
- void *src0, *src1, *src2, *src3;
- if (PREDICT_TRUE (copy_len >= 4))
- {
- if (PREDICT_FALSE (!(src2 = map_guest_mem (vui, cpy[0].src, map_hint))))
- return 1;
- if (PREDICT_FALSE (!(src3 = map_guest_mem (vui, cpy[1].src, map_hint))))
- return 1;
-
- while (PREDICT_TRUE (copy_len >= 4))
- {
- src0 = src2;
- src1 = src3;
-
- if (PREDICT_FALSE
- (!(src2 = map_guest_mem (vui, cpy[2].src, map_hint))))
- return 1;
- if (PREDICT_FALSE
- (!(src3 = map_guest_mem (vui, cpy[3].src, map_hint))))
- return 1;
-
- clib_prefetch_load (src2);
- clib_prefetch_load (src3);
-
- clib_memcpy_fast ((void *) cpy[0].dst, src0, cpy[0].len);
- clib_memcpy_fast ((void *) cpy[1].dst, src1, cpy[1].len);
- copy_len -= 2;
- cpy += 2;
- }
- }
- while (copy_len)
- {
- if (PREDICT_FALSE (!(src0 = map_guest_mem (vui, cpy->src, map_hint))))
- return 1;
- clib_memcpy_fast ((void *) cpy->dst, src0, cpy->len);
- copy_len -= 1;
- cpy += 1;
- }
- return 0;
-}
-
-/**
- * Try to discard packets from the tx ring (VPP RX path).
- * Returns the number of discarded packets.
- */
-static_always_inline u32
-vhost_user_rx_discard_packet (vlib_main_t * vm,
- vhost_user_intf_t * vui,
- vhost_user_vring_t * txvq, u32 discard_max)
-{
- /*
- * On the RX side, each packet corresponds to one descriptor
- * (it is the same whether it is a shallow descriptor, chained, or indirect).
- * Therefore, discarding a packet is like discarding a descriptor.
- */
- u32 discarded_packets = 0;
- u32 avail_idx = txvq->avail->idx;
- u16 mask = txvq->qsz_mask;
- u16 last_avail_idx = txvq->last_avail_idx;
- u16 last_used_idx = txvq->last_used_idx;
- while (discarded_packets != discard_max)
- {
- if (avail_idx == last_avail_idx)
- goto out;
-
- u16 desc_chain_head = txvq->avail->ring[last_avail_idx & mask];
- last_avail_idx++;
- txvq->used->ring[last_used_idx & mask].id = desc_chain_head;
- txvq->used->ring[last_used_idx & mask].len = 0;
- vhost_user_log_dirty_ring (vui, txvq, ring[last_used_idx & mask]);
- last_used_idx++;
- discarded_packets++;
- }
-
-out:
- txvq->last_avail_idx = last_avail_idx;
- txvq->last_used_idx = last_used_idx;
- CLIB_MEMORY_STORE_BARRIER ();
- txvq->used->idx = txvq->last_used_idx;
- vhost_user_log_dirty_ring (vui, txvq, idx);
- return discarded_packets;
-}
-
-/*
- * In case of overflow, we need to rewind the array of allocated buffers.
- */
-static_always_inline void
-vhost_user_input_rewind_buffers (vlib_main_t * vm,
- vhost_cpu_t * cpu, vlib_buffer_t * b_head)
-{
- u32 bi_current = cpu->rx_buffers[cpu->rx_buffers_len];
- vlib_buffer_t *b_current = vlib_get_buffer (vm, bi_current);
- b_current->current_length = 0;
- b_current->flags = 0;
- while (b_current != b_head)
- {
- cpu->rx_buffers_len++;
- bi_current = cpu->rx_buffers[cpu->rx_buffers_len];
- b_current = vlib_get_buffer (vm, bi_current);
- b_current->current_length = 0;
- b_current->flags = 0;
- }
- cpu->rx_buffers_len++;
-}
-
-static_always_inline void
-vhost_user_handle_rx_offload (vlib_buffer_t *b0, u8 *b0_data,
- vnet_virtio_net_hdr_t *hdr)
-{
- u8 l4_hdr_sz = 0;
- u8 l4_proto = 0;
- ethernet_header_t *eh = (ethernet_header_t *) b0_data;
- u16 ethertype = clib_net_to_host_u16 (eh->type);
- u16 l2hdr_sz = sizeof (ethernet_header_t);
- vnet_buffer_oflags_t oflags = 0;
-
- if (ethernet_frame_is_tagged (ethertype))
- {
- ethernet_vlan_header_t *vlan = (ethernet_vlan_header_t *) (eh + 1);
-
- ethertype = clib_net_to_host_u16 (vlan->type);
- l2hdr_sz += sizeof (*vlan);
- if (ethertype == ETHERNET_TYPE_VLAN)
- {
- vlan++;
- ethertype = clib_net_to_host_u16 (vlan->type);
- l2hdr_sz += sizeof (*vlan);
- }
- }
- vnet_buffer (b0)->l2_hdr_offset = 0;
- vnet_buffer (b0)->l3_hdr_offset = l2hdr_sz;
- vnet_buffer (b0)->l4_hdr_offset = hdr->csum_start;
- b0->flags |= (VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
- VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
- VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
-
- if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP4))
- {
- ip4_header_t *ip4 = (ip4_header_t *) (b0_data + l2hdr_sz);
- l4_proto = ip4->protocol;
- b0->flags |= VNET_BUFFER_F_IS_IP4;
- oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM;
- }
- else if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP6))
- {
- ip6_header_t *ip6 = (ip6_header_t *) (b0_data + l2hdr_sz);
- l4_proto = ip6->protocol;
- b0->flags |= VNET_BUFFER_F_IS_IP6;
- }
-
- if (l4_proto == IP_PROTOCOL_TCP)
- {
- tcp_header_t *tcp = (tcp_header_t *)
- (b0_data + vnet_buffer (b0)->l4_hdr_offset);
- l4_hdr_sz = tcp_header_bytes (tcp);
- oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
- }
- else if (l4_proto == IP_PROTOCOL_UDP)
- {
- l4_hdr_sz = sizeof (udp_header_t);
- oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
- }
-
- if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP)
- {
- vnet_buffer2 (b0)->gso_size = hdr->gso_size;
- vnet_buffer2 (b0)->gso_l4_hdr_sz = l4_hdr_sz;
- b0->flags |= VNET_BUFFER_F_GSO;
- }
- else if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV4)
- {
- vnet_buffer2 (b0)->gso_size = hdr->gso_size;
- vnet_buffer2 (b0)->gso_l4_hdr_sz = l4_hdr_sz;
- b0->flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP4);
- }
- else if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV6)
- {
- vnet_buffer2 (b0)->gso_size = hdr->gso_size;
- vnet_buffer2 (b0)->gso_l4_hdr_sz = l4_hdr_sz;
- b0->flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP6);
- }
-
- if (oflags)
- vnet_buffer_offload_flags_set (b0, oflags);
-}
-
-static_always_inline void
-vhost_user_input_do_interrupt (vlib_main_t * vm, vhost_user_intf_t * vui,
- vhost_user_vring_t * txvq,
- vhost_user_vring_t * rxvq)
-{
- f64 now = vlib_time_now (vm);
-
- if ((txvq->n_since_last_int) && (txvq->int_deadline < now))
- vhost_user_send_call (vm, vui, txvq);
-
- if ((rxvq->n_since_last_int) && (rxvq->int_deadline < now))
- vhost_user_send_call (vm, vui, rxvq);
-}
-
-static_always_inline void
-vhost_user_input_setup_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
- vhost_user_intf_t * vui,
- u32 * current_config_index, u32 * next_index,
- u32 ** to_next, u32 * n_left_to_next)
-{
- vnet_feature_main_t *fm = &feature_main;
- u8 feature_arc_idx = fm->device_input_feature_arc_index;
-
- if (PREDICT_FALSE (vnet_have_features (feature_arc_idx, vui->sw_if_index)))
- {
- vnet_feature_config_main_t *cm;
- cm = &fm->feature_config_mains[feature_arc_idx];
- *current_config_index = vec_elt (cm->config_index_by_sw_if_index,
- vui->sw_if_index);
- vnet_get_config_data (&cm->config_main, current_config_index,
- next_index, 0);
- }
-
- vlib_get_new_next_frame (vm, node, *next_index, *to_next, *n_left_to_next);
-
- if (*next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT)
- {
- /* give some hints to ethernet-input */
- vlib_next_frame_t *nf;
- vlib_frame_t *f;
- ethernet_input_frame_t *ef;
- nf = vlib_node_runtime_get_next_frame (vm, node, *next_index);
- f = vlib_get_frame (vm, nf->frame);
- f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
-
- ef = vlib_frame_scalar_args (f);
- ef->sw_if_index = vui->sw_if_index;
- ef->hw_if_index = vui->hw_if_index;
- vlib_frame_no_append (f);
- }
-}
-
-static_always_inline u32
-vhost_user_if_input (vlib_main_t *vm, vhost_user_main_t *vum,
- vhost_user_intf_t *vui, u16 qid,
- vlib_node_runtime_t *node, u8 enable_csum)
-{
- vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
- vnet_feature_main_t *fm = &feature_main;
- u16 n_rx_packets = 0;
- u32 n_rx_bytes = 0;
- u16 n_left;
- u32 n_left_to_next, *to_next;
- u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
- u32 n_trace = vlib_get_trace_count (vm, node);
- u32 buffer_data_size = vlib_buffer_get_default_data_size (vm);
- u32 map_hint = 0;
- vhost_cpu_t *cpu = &vum->cpus[vm->thread_index];
- u16 copy_len = 0;
- u8 feature_arc_idx = fm->device_input_feature_arc_index;
- u32 current_config_index = ~(u32) 0;
- u16 mask = txvq->qsz_mask;
-
- /* The descriptor table is not ready yet */
- if (PREDICT_FALSE (txvq->avail == 0))
- goto done;
-
- {
- /* do we have pending interrupts ? */
- vhost_user_vring_t *rxvq = &vui->vrings[VHOST_VRING_IDX_RX (qid)];
- vhost_user_input_do_interrupt (vm, vui, txvq, rxvq);
- }
-
- /*
- * For adaptive mode, it is optimized to reduce interrupts.
- * If the scheduler switches the input node to polling due
- * to burst of traffic, we tell the driver no interrupt.
- * When the traffic subsides, the scheduler switches the node back to
- * interrupt mode. We must tell the driver we want interrupt.
- */
- if (PREDICT_FALSE (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
- {
- if ((node->flags &
- VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) ||
- !(node->flags &
- VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))
- /* Tell driver we want notification */
- txvq->used->flags = 0;
- else
- /* Tell driver we don't want notification */
- txvq->used->flags = VRING_USED_F_NO_NOTIFY;
- }
-
- if (PREDICT_FALSE (txvq->avail->flags & 0xFFFE))
- goto done;
-
- n_left = (u16) (txvq->avail->idx - txvq->last_avail_idx);
-
- /* nothing to do */
- if (PREDICT_FALSE (n_left == 0))
- goto done;
-
- if (PREDICT_FALSE (!vui->admin_up || !(txvq->enabled)))
- {
- /*
- * Discard input packet if interface is admin down or vring is not
- * enabled.
- * "For example, for a networking device, in the disabled state
- * client must not supply any new RX packets, but must process
- * and discard any TX packets."
- */
- vhost_user_rx_discard_packet (vm, vui, txvq,
- VHOST_USER_DOWN_DISCARD_COUNT);
- goto done;
- }
-
- if (PREDICT_FALSE (n_left == (mask + 1)))
- {
- /*
- * Informational error logging when VPP is not
- * receiving packets fast enough.
- */
- vlib_error_count (vm, node->node_index,
- VHOST_USER_INPUT_FUNC_ERROR_FULL_RX_QUEUE, 1);
- }
-
- if (n_left > VLIB_FRAME_SIZE)
- n_left = VLIB_FRAME_SIZE;
-
- /*
- * For small packets (<2kB), we will not need more than one vlib buffer
- * per packet. In case packets are bigger, we will just yield at some point
- * in the loop and come back later. This is not an issue as for big packet,
- * processing cost really comes from the memory copy.
- * The assumption is that big packets will fit in 40 buffers.
- */
- if (PREDICT_FALSE (cpu->rx_buffers_len < n_left + 1 ||
- cpu->rx_buffers_len < 40))
- {
- u32 curr_len = cpu->rx_buffers_len;
- cpu->rx_buffers_len +=
- vlib_buffer_alloc (vm, cpu->rx_buffers + curr_len,
- VHOST_USER_RX_BUFFERS_N - curr_len);
-
- if (PREDICT_FALSE
- (cpu->rx_buffers_len < VHOST_USER_RX_BUFFER_STARVATION))
- {
- /* In case of buffer starvation, discard some packets from the queue
- * and log the event.
- * We keep doing best effort for the remaining packets. */
- u32 flush = (n_left + 1 > cpu->rx_buffers_len) ?
- n_left + 1 - cpu->rx_buffers_len : 1;
- flush = vhost_user_rx_discard_packet (vm, vui, txvq, flush);
-
- n_left -= flush;
- vlib_increment_simple_counter (vnet_main.
- interface_main.sw_if_counters +
- VNET_INTERFACE_COUNTER_DROP,
- vm->thread_index, vui->sw_if_index,
- flush);
-
- vlib_error_count (vm, vhost_user_input_node.index,
- VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, flush);
- }
- }
-
- vhost_user_input_setup_frame (vm, node, vui, &current_config_index,
- &next_index, &to_next, &n_left_to_next);
-
- u16 last_avail_idx = txvq->last_avail_idx;
- u16 last_used_idx = txvq->last_used_idx;
-
- while (n_left > 0)
- {
- vlib_buffer_t *b_head, *b_current;
- u32 bi_current;
- u16 desc_current;
- u32 desc_data_offset;
- vnet_virtio_vring_desc_t *desc_table = txvq->desc;
-
- if (PREDICT_FALSE (cpu->rx_buffers_len <= 1))
- {
- /* Not enough rx_buffers
- * Note: We yeld on 1 so we don't need to do an additional
- * check for the next buffer prefetch.
- */
- n_left = 0;
- break;
- }
-
- desc_current = txvq->avail->ring[last_avail_idx & mask];
- cpu->rx_buffers_len--;
- bi_current = cpu->rx_buffers[cpu->rx_buffers_len];
- b_head = b_current = vlib_get_buffer (vm, bi_current);
- to_next[0] = bi_current; //We do that now so we can forget about bi_current
- to_next++;
- n_left_to_next--;
-
- vlib_prefetch_buffer_with_index
- (vm, cpu->rx_buffers[cpu->rx_buffers_len - 1], LOAD);
-
- /* Just preset the used descriptor id and length for later */
- txvq->used->ring[last_used_idx & mask].id = desc_current;
- txvq->used->ring[last_used_idx & mask].len = 0;
- vhost_user_log_dirty_ring (vui, txvq, ring[last_used_idx & mask]);
-
- /* The buffer should already be initialized */
- b_head->total_length_not_including_first_buffer = 0;
- b_head->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
-
- if (PREDICT_FALSE
- (n_trace > 0 && vlib_trace_buffer (vm, node, next_index, b_head,
- /* follow_chain */ 0)))
- {
- vhost_trace_t *t0 =
- vlib_add_trace (vm, node, b_head, sizeof (t0[0]));
- vhost_user_rx_trace (t0, vui, qid, b_head, txvq, last_avail_idx);
- n_trace--;
- vlib_set_trace_count (vm, node, n_trace);
- }
-
- /* This depends on the setup but is very consistent
- * So I think the CPU branch predictor will make a pretty good job
- * at optimizing the decision. */
- if (txvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT)
- {
- desc_table = map_guest_mem (vui, txvq->desc[desc_current].addr,
- &map_hint);
- desc_current = 0;
- if (PREDICT_FALSE (desc_table == 0))
- {
- vlib_error_count (vm, node->node_index,
- VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
- goto out;
- }
- }
-
- desc_data_offset = vui->virtio_net_hdr_sz;
-
- if (enable_csum)
- {
- vnet_virtio_net_hdr_mrg_rxbuf_t *hdr;
- u8 *b_data;
- u16 current;
-
- hdr = map_guest_mem (vui, desc_table[desc_current].addr, &map_hint);
- if (PREDICT_FALSE (hdr == 0))
- {
- vlib_error_count (vm, node->node_index,
- VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
- goto out;
- }
- if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
- {
- if ((desc_data_offset == desc_table[desc_current].len) &&
- (desc_table[desc_current].flags & VRING_DESC_F_NEXT))
- {
- current = desc_table[desc_current].next;
- b_data = map_guest_mem (vui, desc_table[current].addr,
- &map_hint);
- if (PREDICT_FALSE (b_data == 0))
- {
- vlib_error_count (vm, node->node_index,
- VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL,
- 1);
- goto out;
- }
- }
- else
- b_data = (u8 *) hdr + desc_data_offset;
-
- vhost_user_handle_rx_offload (b_head, b_data, &hdr->hdr);
- }
- }
-
- while (1)
- {
- /* Get more input if necessary. Or end of packet. */
- if (desc_data_offset == desc_table[desc_current].len)
- {
- if (PREDICT_FALSE (desc_table[desc_current].flags &
- VRING_DESC_F_NEXT))
- {
- desc_current = desc_table[desc_current].next;
- desc_data_offset = 0;
- }
- else
- {
- goto out;
- }
- }
-
- /* Get more output if necessary. Or end of packet. */
- if (PREDICT_FALSE (b_current->current_length == buffer_data_size))
- {
- if (PREDICT_FALSE (cpu->rx_buffers_len == 0))
- {
- /* Cancel speculation */
- to_next--;
- n_left_to_next++;
-
- /*
- * Checking if there are some left buffers.
- * If not, just rewind the used buffers and stop.
- * Note: Scheduled copies are not cancelled. This is
- * not an issue as they would still be valid. Useless,
- * but valid.
- */
- vhost_user_input_rewind_buffers (vm, cpu, b_head);
- n_left = 0;
- goto stop;
- }
-
- /* Get next output */
- cpu->rx_buffers_len--;
- u32 bi_next = cpu->rx_buffers[cpu->rx_buffers_len];
- b_current->next_buffer = bi_next;
- b_current->flags |= VLIB_BUFFER_NEXT_PRESENT;
- bi_current = bi_next;
- b_current = vlib_get_buffer (vm, bi_current);
- }
-
- /* Prepare a copy order executed later for the data */
- ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
- vhost_copy_t *cpy = &cpu->copy[copy_len];
- copy_len++;
- u32 desc_data_l = desc_table[desc_current].len - desc_data_offset;
- cpy->len = buffer_data_size - b_current->current_length;
- cpy->len = (cpy->len > desc_data_l) ? desc_data_l : cpy->len;
- cpy->dst = (uword) (vlib_buffer_get_current (b_current) +
- b_current->current_length);
- cpy->src = desc_table[desc_current].addr + desc_data_offset;
-
- desc_data_offset += cpy->len;
-
- b_current->current_length += cpy->len;
- b_head->total_length_not_including_first_buffer += cpy->len;
- }
-
- out:
-
- n_rx_bytes += b_head->total_length_not_including_first_buffer;
- n_rx_packets++;
-
- b_head->total_length_not_including_first_buffer -=
- b_head->current_length;
-
- /* consume the descriptor and return it as used */
- last_avail_idx++;
- last_used_idx++;
-
- vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index;
- vnet_buffer (b_head)->sw_if_index[VLIB_TX] = (u32) ~ 0;
- b_head->error = 0;
-
- if (current_config_index != ~(u32) 0)
- {
- b_head->current_config_index = current_config_index;
- vnet_buffer (b_head)->feature_arc_index = feature_arc_idx;
- }
-
- n_left--;
-
- /*
- * Although separating memory copies from virtio ring parsing
- * is beneficial, we can offer to perform the copies from time
- * to time in order to free some space in the ring.
- */
- if (PREDICT_FALSE (copy_len >= VHOST_USER_RX_COPY_THRESHOLD))
- {
- if (PREDICT_FALSE (vhost_user_input_copy (vui, cpu->copy,
- copy_len, &map_hint)))
- {
- vlib_error_count (vm, node->node_index,
- VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
- }
- copy_len = 0;
-
- /* give buffers back to driver */
- CLIB_MEMORY_STORE_BARRIER ();
- txvq->used->idx = last_used_idx;
- vhost_user_log_dirty_ring (vui, txvq, idx);
- }
- }
-stop:
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-
- txvq->last_used_idx = last_used_idx;
- txvq->last_avail_idx = last_avail_idx;
-
- /* Do the memory copies */
- if (PREDICT_FALSE (vhost_user_input_copy (vui, cpu->copy, copy_len,
- &map_hint)))
- {
- vlib_error_count (vm, node->node_index,
- VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
- }
-
- /* give buffers back to driver */
- CLIB_MEMORY_STORE_BARRIER ();
- txvq->used->idx = txvq->last_used_idx;
- vhost_user_log_dirty_ring (vui, txvq, idx);
-
- /* interrupt (call) handling */
- if ((txvq->callfd_idx != ~0) &&
- !(txvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
- {
- txvq->n_since_last_int += n_rx_packets;
-
- if (txvq->n_since_last_int > vum->coalesce_frames)
- vhost_user_send_call (vm, vui, txvq);
- }
-
- /* increase rx counters */
- vlib_increment_combined_counter
- (vnet_main.interface_main.combined_sw_if_counters
- + VNET_INTERFACE_COUNTER_RX, vm->thread_index, vui->sw_if_index,
- n_rx_packets, n_rx_bytes);
-
- vnet_device_increment_rx_packets (vm->thread_index, n_rx_packets);
-
-done:
- return n_rx_packets;
-}
-
-static_always_inline void
-vhost_user_mark_desc_consumed (vhost_user_intf_t * vui,
- vhost_user_vring_t * txvq, u16 desc_head,
- u16 n_descs_processed)
-{
- vnet_virtio_vring_packed_desc_t *desc_table = txvq->packed_desc;
- u16 desc_idx;
- u16 mask = txvq->qsz_mask;
-
- for (desc_idx = 0; desc_idx < n_descs_processed; desc_idx++)
- {
- if (txvq->used_wrap_counter)
- desc_table[(desc_head + desc_idx) & mask].flags |=
- (VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
- else
- desc_table[(desc_head + desc_idx) & mask].flags &=
- ~(VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
- vhost_user_advance_last_used_idx (txvq);
- }
-}
-
-static_always_inline void
-vhost_user_rx_trace_packed (vhost_trace_t * t, vhost_user_intf_t * vui,
- u16 qid, vhost_user_vring_t * txvq,
- u16 desc_current)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- vnet_virtio_vring_packed_desc_t *hdr_desc;
- vnet_virtio_net_hdr_mrg_rxbuf_t *hdr;
- u32 hint = 0;
-
- clib_memset (t, 0, sizeof (*t));
- t->device_index = vui - vum->vhost_user_interfaces;
- t->qid = qid;
-
- hdr_desc = &txvq->packed_desc[desc_current];
- if (txvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT)
- {
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
- /* Header is the first here */
- hdr_desc = map_guest_mem (vui, txvq->packed_desc[desc_current].addr,
- &hint);
- }
- if (txvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT)
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
-
- if (!(txvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT) &&
- !(txvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT))
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
-
- t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
-
- if (!hdr_desc || !(hdr = map_guest_mem (vui, hdr_desc->addr, &hint)))
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_MAP_ERROR;
- else
- {
- u32 len = vui->virtio_net_hdr_sz;
- clib_memcpy_fast (&t->hdr, hdr,
- len > hdr_desc->len ? hdr_desc->len : len);
- }
-}
-
-static_always_inline u32
-vhost_user_rx_discard_packet_packed (vlib_main_t * vm,
- vhost_user_intf_t * vui,
- vhost_user_vring_t * txvq,
- u32 discard_max)
-{
- u32 discarded_packets = 0;
- u16 mask = txvq->qsz_mask;
- u16 desc_current, desc_head;
-
- desc_head = desc_current = txvq->last_used_idx & mask;
-
- /*
- * On the RX side, each packet corresponds to one descriptor
- * (it is the same whether it is a shallow descriptor, chained, or indirect).
- * Therefore, discarding a packet is like discarding a descriptor.
- */
- while ((discarded_packets != discard_max) &&
- vhost_user_packed_desc_available (txvq, desc_current))
- {
- vhost_user_advance_last_avail_idx (txvq);
- discarded_packets++;
- desc_current = (desc_current + 1) & mask;
- }
-
- if (PREDICT_TRUE (discarded_packets))
- vhost_user_mark_desc_consumed (vui, txvq, desc_head, discarded_packets);
- return (discarded_packets);
-}
-
-static_always_inline u32
-vhost_user_input_copy_packed (vhost_user_intf_t * vui, vhost_copy_t * cpy,
- u16 copy_len, u32 * map_hint)
-{
- void *src0, *src1, *src2, *src3, *src4, *src5, *src6, *src7;
- u8 bad;
- u32 rc = VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR;
-
- if (PREDICT_TRUE (copy_len >= 8))
- {
- src4 = map_guest_mem (vui, cpy[0].src, map_hint);
- src5 = map_guest_mem (vui, cpy[1].src, map_hint);
- src6 = map_guest_mem (vui, cpy[2].src, map_hint);
- src7 = map_guest_mem (vui, cpy[3].src, map_hint);
- bad = (src4 == 0) + (src5 == 0) + (src6 == 0) + (src7 == 0);
- if (PREDICT_FALSE (bad))
- goto one_by_one;
- clib_prefetch_load (src4);
- clib_prefetch_load (src5);
- clib_prefetch_load (src6);
- clib_prefetch_load (src7);
-
- while (PREDICT_TRUE (copy_len >= 8))
- {
- src0 = src4;
- src1 = src5;
- src2 = src6;
- src3 = src7;
-
- src4 = map_guest_mem (vui, cpy[4].src, map_hint);
- src5 = map_guest_mem (vui, cpy[5].src, map_hint);
- src6 = map_guest_mem (vui, cpy[6].src, map_hint);
- src7 = map_guest_mem (vui, cpy[7].src, map_hint);
- bad = (src4 == 0) + (src5 == 0) + (src6 == 0) + (src7 == 0);
- if (PREDICT_FALSE (bad))
- break;
-
- clib_prefetch_load (src4);
- clib_prefetch_load (src5);
- clib_prefetch_load (src6);
- clib_prefetch_load (src7);
-
- clib_memcpy_fast ((void *) cpy[0].dst, src0, cpy[0].len);
- clib_memcpy_fast ((void *) cpy[1].dst, src1, cpy[1].len);
- clib_memcpy_fast ((void *) cpy[2].dst, src2, cpy[2].len);
- clib_memcpy_fast ((void *) cpy[3].dst, src3, cpy[3].len);
- copy_len -= 4;
- cpy += 4;
- }
- }
-
-one_by_one:
- while (copy_len)
- {
- if (PREDICT_FALSE (!(src0 = map_guest_mem (vui, cpy->src, map_hint))))
- {
- rc = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
- break;
- }
- clib_memcpy_fast ((void *) cpy->dst, src0, cpy->len);
- copy_len -= 1;
- cpy += 1;
- }
- return rc;
-}
-
-static_always_inline u32
-vhost_user_do_offload (vhost_user_intf_t *vui,
- vnet_virtio_vring_packed_desc_t *desc_table,
- u16 desc_current, u16 mask, vlib_buffer_t *b_head,
- u32 *map_hint)
-{
- u32 rc = VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR;
- vnet_virtio_net_hdr_mrg_rxbuf_t *hdr;
- u8 *b_data;
- u32 desc_data_offset = vui->virtio_net_hdr_sz;
-
- hdr = map_guest_mem (vui, desc_table[desc_current].addr, map_hint);
- if (PREDICT_FALSE (hdr == 0))
- rc = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
- else if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
- {
- if (desc_data_offset == desc_table[desc_current].len)
- {
- desc_current = (desc_current + 1) & mask;
- b_data =
- map_guest_mem (vui, desc_table[desc_current].addr, map_hint);
- if (PREDICT_FALSE (b_data == 0))
- rc = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
- else
- vhost_user_handle_rx_offload (b_head, b_data, &hdr->hdr);
- }
- else
- {
- b_data = (u8 *) hdr + desc_data_offset;
- vhost_user_handle_rx_offload (b_head, b_data, &hdr->hdr);
- }
- }
-
- return rc;
-}
-
-static_always_inline u32
-vhost_user_compute_buffers_required (u32 desc_len, u32 buffer_data_size)
-{
- div_t result;
- u32 buffers_required;
-
- if (PREDICT_TRUE (buffer_data_size == 2048))
- {
- buffers_required = desc_len >> 11;
- if ((desc_len & 2047) != 0)
- buffers_required++;
- return (buffers_required);
- }
-
- if (desc_len < buffer_data_size)
- return 1;
-
- result = div (desc_len, buffer_data_size);
- if (result.rem)
- buffers_required = result.quot + 1;
- else
- buffers_required = result.quot;
-
- return (buffers_required);
-}
-
-static_always_inline u32
-vhost_user_compute_indirect_desc_len (vhost_user_intf_t * vui,
- vhost_user_vring_t * txvq,
- u32 buffer_data_size, u16 desc_current,
- u32 * map_hint)
-{
- vnet_virtio_vring_packed_desc_t *desc_table = txvq->packed_desc;
- u32 desc_len = 0;
- u16 desc_data_offset = vui->virtio_net_hdr_sz;
- u16 desc_idx = desc_current;
- u32 n_descs;
-
- n_descs = desc_table[desc_idx].len >> 4;
- desc_table = map_guest_mem (vui, desc_table[desc_idx].addr, map_hint);
- if (PREDICT_FALSE (desc_table == 0))
- return 0;
-
- for (desc_idx = 0; desc_idx < n_descs; desc_idx++)
- desc_len += desc_table[desc_idx].len;
-
- if (PREDICT_TRUE (desc_len > desc_data_offset))
- desc_len -= desc_data_offset;
-
- return vhost_user_compute_buffers_required (desc_len, buffer_data_size);
-}
-
-static_always_inline u32
-vhost_user_compute_chained_desc_len (vhost_user_intf_t * vui,
- vhost_user_vring_t * txvq,
- u32 buffer_data_size, u16 * current,
- u16 * n_left)
-{
- vnet_virtio_vring_packed_desc_t *desc_table = txvq->packed_desc;
- u32 desc_len = 0;
- u16 mask = txvq->qsz_mask;
-
- while (desc_table[*current].flags & VRING_DESC_F_NEXT)
- {
- desc_len += desc_table[*current].len;
- (*n_left)++;
- *current = (*current + 1) & mask;
- vhost_user_advance_last_avail_idx (txvq);
- }
- desc_len += desc_table[*current].len;
- (*n_left)++;
- *current = (*current + 1) & mask;
- vhost_user_advance_last_avail_idx (txvq);
-
- if (PREDICT_TRUE (desc_len > vui->virtio_net_hdr_sz))
- desc_len -= vui->virtio_net_hdr_sz;
-
- return vhost_user_compute_buffers_required (desc_len, buffer_data_size);
-}
-
-static_always_inline void
-vhost_user_assemble_packet (vnet_virtio_vring_packed_desc_t *desc_table,
- u16 *desc_idx, vlib_buffer_t *b_head,
- vlib_buffer_t **b_current, u32 **next,
- vlib_buffer_t ***b, u32 *bi_current,
- vhost_cpu_t *cpu, u16 *copy_len, u32 *buffers_used,
- u32 buffers_required, u32 *desc_data_offset,
- u32 buffer_data_size, u16 mask)
-{
- u32 desc_data_l;
-
- while (*desc_data_offset < desc_table[*desc_idx].len)
- {
- /* Get more output if necessary. Or end of packet. */
- if (PREDICT_FALSE ((*b_current)->current_length == buffer_data_size))
- {
- /* Get next output */
- u32 bi_next = **next;
- (*next)++;
- (*b_current)->next_buffer = bi_next;
- (*b_current)->flags |= VLIB_BUFFER_NEXT_PRESENT;
- *bi_current = bi_next;
- *b_current = **b;
- (*b)++;
- (*buffers_used)++;
- ASSERT (*buffers_used <= buffers_required);
- }
-
- /* Prepare a copy order executed later for the data */
- ASSERT (*copy_len < VHOST_USER_COPY_ARRAY_N);
- vhost_copy_t *cpy = &cpu->copy[*copy_len];
- (*copy_len)++;
- desc_data_l = desc_table[*desc_idx].len - *desc_data_offset;
- cpy->len = buffer_data_size - (*b_current)->current_length;
- cpy->len = (cpy->len > desc_data_l) ? desc_data_l : cpy->len;
- cpy->dst = (uword) (vlib_buffer_get_current (*b_current) +
- (*b_current)->current_length);
- cpy->src = desc_table[*desc_idx].addr + *desc_data_offset;
-
- *desc_data_offset += cpy->len;
-
- (*b_current)->current_length += cpy->len;
- b_head->total_length_not_including_first_buffer += cpy->len;
- }
- *desc_idx = (*desc_idx + 1) & mask;;
- *desc_data_offset = 0;
-}
-
-static_always_inline u32
-vhost_user_if_input_packed (vlib_main_t *vm, vhost_user_main_t *vum,
- vhost_user_intf_t *vui, u16 qid,
- vlib_node_runtime_t *node, u8 enable_csum)
-{
- vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
- vnet_feature_main_t *fm = &feature_main;
- u8 feature_arc_idx = fm->device_input_feature_arc_index;
- u16 n_rx_packets = 0;
- u32 n_rx_bytes = 0;
- u16 n_left = 0;
- u32 buffers_required = 0;
- u32 n_left_to_next, *to_next;
- u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
- u32 n_trace = vlib_get_trace_count (vm, node);
- u32 buffer_data_size = vlib_buffer_get_default_data_size (vm);
- u32 map_hint = 0;
- vhost_cpu_t *cpu = &vum->cpus[vm->thread_index];
- u16 copy_len = 0;
- u32 current_config_index = ~0;
- u16 mask = txvq->qsz_mask;
- u16 desc_current, desc_head, last_used_idx;
- vnet_virtio_vring_packed_desc_t *desc_table = 0;
- u32 n_descs_processed = 0;
- u32 rv;
- vlib_buffer_t **b;
- u32 *next;
- u32 buffers_used = 0;
- u16 current, n_descs_to_process;
-
- /* The descriptor table is not ready yet */
- if (PREDICT_FALSE (txvq->packed_desc == 0))
- goto done;
-
- /* do we have pending interrupts ? */
- vhost_user_vring_t *rxvq = &vui->vrings[VHOST_VRING_IDX_RX (qid)];
- vhost_user_input_do_interrupt (vm, vui, txvq, rxvq);
-
- /*
- * For adaptive mode, it is optimized to reduce interrupts.
- * If the scheduler switches the input node to polling due
- * to burst of traffic, we tell the driver no interrupt.
- * When the traffic subsides, the scheduler switches the node back to
- * interrupt mode. We must tell the driver we want interrupt.
- */
- if (PREDICT_FALSE (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
- {
- if ((node->flags &
- VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) ||
- !(node->flags &
- VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))
- /* Tell driver we want notification */
- txvq->used_event->flags = 0;
- else
- /* Tell driver we don't want notification */
- txvq->used_event->flags = VRING_EVENT_F_DISABLE;
- }
-
- last_used_idx = txvq->last_used_idx & mask;
- desc_head = desc_current = last_used_idx;
-
- if (vhost_user_packed_desc_available (txvq, desc_current) == 0)
- goto done;
-
- if (PREDICT_FALSE (!vui->admin_up || !vui->is_ready || !(txvq->enabled)))
- {
- /*
- * Discard input packet if interface is admin down or vring is not
- * enabled.
- * "For example, for a networking device, in the disabled state
- * client must not supply any new RX packets, but must process
- * and discard any TX packets."
- */
- rv = vhost_user_rx_discard_packet_packed (vm, vui, txvq,
- VHOST_USER_DOWN_DISCARD_COUNT);
- vlib_error_count (vm, vhost_user_input_node.index,
- VHOST_USER_INPUT_FUNC_ERROR_NOT_READY, rv);
- goto done;
- }
-
- vhost_user_input_setup_frame (vm, node, vui, &current_config_index,
- &next_index, &to_next, &n_left_to_next);
-
- /*
- * Compute n_left and total buffers needed
- */
- desc_table = txvq->packed_desc;
- current = desc_current;
- while (vhost_user_packed_desc_available (txvq, current) &&
- (n_left < VLIB_FRAME_SIZE))
- {
- if (desc_table[current].flags & VRING_DESC_F_INDIRECT)
- {
- buffers_required +=
- vhost_user_compute_indirect_desc_len (vui, txvq, buffer_data_size,
- current, &map_hint);
- n_left++;
- current = (current + 1) & mask;
- vhost_user_advance_last_avail_idx (txvq);
- }
- else
- {
- buffers_required +=
- vhost_user_compute_chained_desc_len (vui, txvq, buffer_data_size,
- &current, &n_left);
- }
- }
-
- /* Something is broken if we need more than 10000 buffers */
- if (PREDICT_FALSE ((buffers_required == 0) || (buffers_required > 10000)))
- {
- rv = vhost_user_rx_discard_packet_packed (vm, vui, txvq, n_left);
- vlib_error_count (vm, vhost_user_input_node.index,
- VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, rv);
- goto done;
- }
-
- vec_validate (cpu->to_next_list, buffers_required);
- rv = vlib_buffer_alloc (vm, cpu->to_next_list, buffers_required);
- if (PREDICT_FALSE (rv != buffers_required))
- {
- vlib_buffer_free (vm, cpu->to_next_list, rv);
- rv = vhost_user_rx_discard_packet_packed (vm, vui, txvq, n_left);
- vlib_error_count (vm, vhost_user_input_node.index,
- VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, rv);
- goto done;
- }
-
- next = cpu->to_next_list;
- vec_validate (cpu->rx_buffers_pdesc, buffers_required);
- vlib_get_buffers (vm, next, cpu->rx_buffers_pdesc, buffers_required);
- b = cpu->rx_buffers_pdesc;
- n_descs_processed = n_left;
-
- while (n_left)
- {
- vlib_buffer_t *b_head, *b_current;
- u32 bi_current;
- u32 desc_data_offset;
- u16 desc_idx = desc_current;
- u32 n_descs;
-
- desc_table = txvq->packed_desc;
- to_next[0] = bi_current = next[0];
- b_head = b_current = b[0];
- b++;
- buffers_used++;
- ASSERT (buffers_used <= buffers_required);
- to_next++;
- next++;
- n_left_to_next--;
-
- /* The buffer should already be initialized */
- b_head->total_length_not_including_first_buffer = 0;
- b_head->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
- desc_data_offset = vui->virtio_net_hdr_sz;
- n_descs_to_process = 1;
-
- if (desc_table[desc_idx].flags & VRING_DESC_F_INDIRECT)
- {
- n_descs = desc_table[desc_idx].len >> 4;
- desc_table = map_guest_mem (vui, desc_table[desc_idx].addr,
- &map_hint);
- desc_idx = 0;
- if (PREDICT_FALSE (desc_table == 0) ||
- (enable_csum &&
- (PREDICT_FALSE
- (vhost_user_do_offload
- (vui, desc_table, desc_idx, mask, b_head,
- &map_hint) != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))))
- {
- vlib_error_count (vm, node->node_index,
- VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
- to_next--;
- next--;
- n_left_to_next++;
- buffers_used--;
- b--;
- goto out;
- }
- while (n_descs)
- {
- vhost_user_assemble_packet (desc_table, &desc_idx, b_head,
- &b_current, &next, &b, &bi_current,
- cpu, &copy_len, &buffers_used,
- buffers_required, &desc_data_offset,
- buffer_data_size, mask);
- n_descs--;
- }
- }
- else
- {
- if (enable_csum)
- {
- rv = vhost_user_do_offload (vui, desc_table, desc_idx, mask,
- b_head, &map_hint);
- if (PREDICT_FALSE (rv != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))
- {
- vlib_error_count (vm, node->node_index, rv, 1);
- to_next--;
- next--;
- n_left_to_next++;
- buffers_used--;
- b--;
- goto out;
- }
- }
- /*
- * For chained descriptor, we process all chains in a single while
- * loop. So count how many descriptors in the chain.
- */
- n_descs_to_process = 1;
- while (desc_table[desc_idx].flags & VRING_DESC_F_NEXT)
- {
- vhost_user_assemble_packet (desc_table, &desc_idx, b_head,
- &b_current, &next, &b, &bi_current,
- cpu, &copy_len, &buffers_used,
- buffers_required, &desc_data_offset,
- buffer_data_size, mask);
- n_descs_to_process++;
- }
- vhost_user_assemble_packet (desc_table, &desc_idx, b_head,
- &b_current, &next, &b, &bi_current,
- cpu, &copy_len, &buffers_used,
- buffers_required, &desc_data_offset,
- buffer_data_size, mask);
- }
-
- n_rx_bytes += b_head->total_length_not_including_first_buffer;
- n_rx_packets++;
-
- b_head->total_length_not_including_first_buffer -=
- b_head->current_length;
-
- vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index;
- vnet_buffer (b_head)->sw_if_index[VLIB_TX] = ~0;
- b_head->error = 0;
-
- if (current_config_index != ~0)
- {
- b_head->current_config_index = current_config_index;
- vnet_buffer (b_head)->feature_arc_index = feature_arc_idx;
- }
-
- out:
- ASSERT (n_left >= n_descs_to_process);
- n_left -= n_descs_to_process;
-
- /* advance to next descrptor */
- desc_current = (desc_current + n_descs_to_process) & mask;
-
- /*
- * Although separating memory copies from virtio ring parsing
- * is beneficial, we can offer to perform the copies from time
- * to time in order to free some space in the ring.
- */
- if (PREDICT_FALSE (copy_len >= VHOST_USER_RX_COPY_THRESHOLD))
- {
- rv = vhost_user_input_copy_packed (vui, cpu->copy, copy_len,
- &map_hint);
- if (PREDICT_FALSE (rv != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))
- vlib_error_count (vm, node->node_index, rv, 1);
- copy_len = 0;
- }
- }
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-
- /* Do the memory copies */
- rv = vhost_user_input_copy_packed (vui, cpu->copy, copy_len, &map_hint);
- if (PREDICT_FALSE (rv != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))
- vlib_error_count (vm, node->node_index, rv, 1);
-
- /* Must do the tracing before giving buffers back to driver */
- if (PREDICT_FALSE (n_trace))
- {
- u32 left = n_rx_packets;
-
- b = cpu->rx_buffers_pdesc;
- while (n_trace && left)
- {
- if (PREDICT_TRUE
- (vlib_trace_buffer
- (vm, node, next_index, b[0], /* follow_chain */ 0)))
- {
- vhost_trace_t *t0;
- t0 = vlib_add_trace (vm, node, b[0], sizeof (t0[0]));
- vhost_user_rx_trace_packed (t0, vui, qid, txvq, last_used_idx);
- last_used_idx = (last_used_idx + 1) & mask;
- n_trace--;
- vlib_set_trace_count (vm, node, n_trace);
- }
- left--;
- b++;
- }
- }
-
- /*
- * Give buffers back to driver.
- */
- vhost_user_mark_desc_consumed (vui, txvq, desc_head, n_descs_processed);
-
- /* interrupt (call) handling */
- if ((txvq->callfd_idx != ~0) &&
- (txvq->avail_event->flags != VRING_EVENT_F_DISABLE))
- {
- txvq->n_since_last_int += n_rx_packets;
- if (txvq->n_since_last_int > vum->coalesce_frames)
- vhost_user_send_call (vm, vui, txvq);
- }
-
- /* increase rx counters */
- vlib_increment_combined_counter
- (vnet_main.interface_main.combined_sw_if_counters
- + VNET_INTERFACE_COUNTER_RX, vm->thread_index, vui->sw_if_index,
- n_rx_packets, n_rx_bytes);
-
- vnet_device_increment_rx_packets (vm->thread_index, n_rx_packets);
-
- if (PREDICT_FALSE (buffers_used < buffers_required))
- vlib_buffer_free (vm, next, buffers_required - buffers_used);
-
-done:
- return n_rx_packets;
-}
-
-VLIB_NODE_FN (vhost_user_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- uword n_rx_packets = 0;
- vhost_user_intf_t *vui;
- vnet_hw_if_rxq_poll_vector_t *pv = vnet_hw_if_get_rxq_poll_vector (vm, node);
- vnet_hw_if_rxq_poll_vector_t *pve;
-
- vec_foreach (pve, pv)
- {
- vui = pool_elt_at_index (vum->vhost_user_interfaces, pve->dev_instance);
- if (vhost_user_is_packed_ring_supported (vui))
- {
- if (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_CSUM))
- n_rx_packets += vhost_user_if_input_packed (
- vm, vum, vui, pve->queue_id, node, 1);
- else
- n_rx_packets += vhost_user_if_input_packed (
- vm, vum, vui, pve->queue_id, node, 0);
- }
- else
- {
- if (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_CSUM))
- n_rx_packets +=
- vhost_user_if_input (vm, vum, vui, pve->queue_id, node, 1);
- else
- n_rx_packets +=
- vhost_user_if_input (vm, vum, vui, pve->queue_id, node, 0);
- }
- }
-
- return n_rx_packets;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (vhost_user_input_node) = {
- .type = VLIB_NODE_TYPE_INPUT,
- .name = "vhost-user-input",
- .sibling_of = "device-input",
- .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
-
- /* Will be enabled if/when hardware is detected. */
- .state = VLIB_NODE_STATE_DISABLED,
-
- .format_buffer = format_ethernet_header_with_length,
- .format_trace = format_vhost_trace,
-
- .n_errors = VHOST_USER_INPUT_FUNC_N_ERROR,
- .error_strings = vhost_user_input_func_error_strings,
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/virtio/vhost_user_output.c b/src/vnet/devices/virtio/vhost_user_output.c
deleted file mode 100644
index 3b7bf97c3f8..00000000000
--- a/src/vnet/devices/virtio/vhost_user_output.c
+++ /dev/null
@@ -1,1145 +0,0 @@
-/*
- *------------------------------------------------------------------
- * vhost-user-output
- *
- * Copyright (c) 2014-2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <stddef.h>
-#include <fcntl.h> /* for open */
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/uio.h> /* for iovec */
-#include <netinet/in.h>
-#include <sys/vfs.h>
-
-#include <linux/if_arp.h>
-#include <linux/if_tun.h>
-
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/devices/devices.h>
-#include <vnet/feature/feature.h>
-#include <vnet/ip/ip_psh_cksum.h>
-
-#include <vnet/devices/virtio/vhost_user.h>
-#include <vnet/devices/virtio/vhost_user_inline.h>
-
-#include <vnet/gso/hdr_offset_parser.h>
-/*
- * On the transmit side, we keep processing the buffers from vlib in the while
- * loop and prepare the copy order to be executed later. However, the static
- * array which we keep the copy order is limited to VHOST_USER_COPY_ARRAY_N
- * entries. In order to not corrupt memory, we have to do the copy when the
- * static array reaches the copy threshold. We subtract 40 in case the code
- * goes into the inner loop for a maximum of 64k frames which may require
- * more array entries. We subtract 200 because our default buffer size is
- * 2048 and the default desc len is likely 1536. While it takes less than 40
- * vlib buffers for the jumbo frame, it may take twice as much descriptors
- * for the same jumbo frame. Use 200 for the extra head room.
- */
-#define VHOST_USER_TX_COPY_THRESHOLD (VHOST_USER_COPY_ARRAY_N - 200)
-
-extern vnet_device_class_t vhost_user_device_class;
-
-#define foreach_vhost_user_tx_func_error \
- _(NONE, "no error") \
- _(NOT_READY, "vhost vring not ready") \
- _(DOWN, "vhost interface is down") \
- _(PKT_DROP_NOBUF, "tx packet drops (no available descriptors)") \
- _(PKT_DROP_NOMRG, "tx packet drops (cannot merge descriptors)") \
- _(MMAP_FAIL, "mmap failure") \
- _(INDIRECT_OVERFLOW, "indirect descriptor table overflow")
-
-typedef enum
-{
-#define _(f,s) VHOST_USER_TX_FUNC_ERROR_##f,
- foreach_vhost_user_tx_func_error
-#undef _
- VHOST_USER_TX_FUNC_N_ERROR,
-} vhost_user_tx_func_error_t;
-
-static __clib_unused char *vhost_user_tx_func_error_strings[] = {
-#define _(n,s) s,
- foreach_vhost_user_tx_func_error
-#undef _
-};
-
-static __clib_unused u8 *
-format_vhost_user_interface_name (u8 * s, va_list * args)
-{
- u32 i = va_arg (*args, u32);
- u32 show_dev_instance = ~0;
- vhost_user_main_t *vum = &vhost_user_main;
-
- if (i < vec_len (vum->show_dev_instance_by_real_dev_instance))
- show_dev_instance = vum->show_dev_instance_by_real_dev_instance[i];
-
- if (show_dev_instance != ~0)
- i = show_dev_instance;
-
- s = format (s, "VirtualEthernet0/0/%d", i);
- return s;
-}
-
-static __clib_unused int
-vhost_user_name_renumber (vnet_hw_interface_t * hi, u32 new_dev_instance)
-{
- // FIXME: check if the new dev instance is already used
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui = pool_elt_at_index (vum->vhost_user_interfaces,
- hi->dev_instance);
-
- vec_validate_init_empty (vum->show_dev_instance_by_real_dev_instance,
- hi->dev_instance, ~0);
-
- vum->show_dev_instance_by_real_dev_instance[hi->dev_instance] =
- new_dev_instance;
-
- vu_log_debug (vui, "renumbered vhost-user interface dev_instance %d to %d",
- hi->dev_instance, new_dev_instance);
-
- return 0;
-}
-
-static_always_inline void
-vhost_user_tx_trace (vhost_trace_t * t,
- vhost_user_intf_t * vui, u16 qid,
- vlib_buffer_t * b, vhost_user_vring_t * rxvq)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- u32 last_avail_idx = rxvq->last_avail_idx;
- u32 desc_current = rxvq->avail->ring[last_avail_idx & rxvq->qsz_mask];
- vnet_virtio_vring_desc_t *hdr_desc = 0;
- u32 hint = 0;
-
- clib_memset (t, 0, sizeof (*t));
- t->device_index = vui - vum->vhost_user_interfaces;
- t->qid = qid;
-
- hdr_desc = &rxvq->desc[desc_current];
- if (rxvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT)
- {
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
- /* Header is the first here */
- hdr_desc = map_guest_mem (vui, rxvq->desc[desc_current].addr, &hint);
- }
- if (rxvq->desc[desc_current].flags & VRING_DESC_F_NEXT)
- {
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
- }
- if (!(rxvq->desc[desc_current].flags & VRING_DESC_F_NEXT) &&
- !(rxvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT))
- {
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
- }
-
- t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
-}
-
-static_always_inline u32
-vhost_user_tx_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy,
- u16 copy_len, u32 * map_hint)
-{
- void *dst0, *dst1, *dst2, *dst3;
- if (PREDICT_TRUE (copy_len >= 4))
- {
- if (PREDICT_FALSE (!(dst2 = map_guest_mem (vui, cpy[0].dst, map_hint))))
- return 1;
- if (PREDICT_FALSE (!(dst3 = map_guest_mem (vui, cpy[1].dst, map_hint))))
- return 1;
- while (PREDICT_TRUE (copy_len >= 4))
- {
- dst0 = dst2;
- dst1 = dst3;
-
- if (PREDICT_FALSE
- (!(dst2 = map_guest_mem (vui, cpy[2].dst, map_hint))))
- return 1;
- if (PREDICT_FALSE
- (!(dst3 = map_guest_mem (vui, cpy[3].dst, map_hint))))
- return 1;
-
- clib_prefetch_load ((void *) cpy[2].src);
- clib_prefetch_load ((void *) cpy[3].src);
-
- clib_memcpy_fast (dst0, (void *) cpy[0].src, cpy[0].len);
- clib_memcpy_fast (dst1, (void *) cpy[1].src, cpy[1].len);
-
- vhost_user_log_dirty_pages_2 (vui, cpy[0].dst, cpy[0].len, 1);
- vhost_user_log_dirty_pages_2 (vui, cpy[1].dst, cpy[1].len, 1);
- copy_len -= 2;
- cpy += 2;
- }
- }
- while (copy_len)
- {
- if (PREDICT_FALSE (!(dst0 = map_guest_mem (vui, cpy->dst, map_hint))))
- return 1;
- clib_memcpy_fast (dst0, (void *) cpy->src, cpy->len);
- vhost_user_log_dirty_pages_2 (vui, cpy->dst, cpy->len, 1);
- copy_len -= 1;
- cpy += 1;
- }
- return 0;
-}
-
-static_always_inline void
-vhost_user_handle_tx_offload (vhost_user_intf_t *vui, vlib_buffer_t *b,
- vnet_virtio_net_hdr_t *hdr)
-{
- generic_header_offset_t gho = { 0 };
- int is_ip4 = b->flags & VNET_BUFFER_F_IS_IP4;
- int is_ip6 = b->flags & VNET_BUFFER_F_IS_IP6;
- vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags;
- u16 psh_cksum = 0;
- ip4_header_t *ip4 = 0;
- ip6_header_t *ip6 = 0;
-
- ASSERT (!(is_ip4 && is_ip6));
- vnet_generic_header_offset_parser (b, &gho, 1 /* l2 */ , is_ip4, is_ip6);
- if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
- {
- ip4 =
- (ip4_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset);
- ip4->checksum = ip4_header_checksum (ip4);
- psh_cksum = ip4_pseudo_header_cksum (ip4);
- }
- else
- {
- ip6 = (ip6_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset);
- psh_cksum = ip6_pseudo_header_cksum (ip6);
- }
-
- /* checksum offload */
- if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)
- {
- udp_header_t *udp =
- (udp_header_t *) (vlib_buffer_get_current (b) + gho.l4_hdr_offset);
- udp->checksum = psh_cksum;
- hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- hdr->csum_start = gho.l4_hdr_offset;
- hdr->csum_offset = offsetof (udp_header_t, checksum);
- }
- else if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
- {
- tcp_header_t *tcp =
- (tcp_header_t *) (vlib_buffer_get_current (b) + gho.l4_hdr_offset);
- tcp->checksum = psh_cksum;
- hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- hdr->csum_start = gho.l4_hdr_offset;
- hdr->csum_offset = offsetof (tcp_header_t, checksum);
- }
-
- /* GSO offload */
- if (b->flags & VNET_BUFFER_F_GSO)
- {
- if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
- {
- if (is_ip4 &&
- (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO4)))
- {
- hdr->gso_size = vnet_buffer2 (b)->gso_size;
- hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
- }
- else if (is_ip6 &&
- (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO6)))
- {
- hdr->gso_size = vnet_buffer2 (b)->gso_size;
- hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
- }
- }
- else if ((vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_UFO)) &&
- (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM))
- {
- hdr->gso_size = vnet_buffer2 (b)->gso_size;
- hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
- }
- }
-}
-
-static_always_inline void
-vhost_user_mark_desc_available (vlib_main_t * vm, vhost_user_intf_t * vui,
- vhost_user_vring_t * rxvq,
- u16 * n_descs_processed, u8 chained,
- vlib_frame_t * frame, u32 n_left)
-{
- u16 desc_idx, flags;
- vnet_virtio_vring_packed_desc_t *desc_table = rxvq->packed_desc;
- u16 last_used_idx = rxvq->last_used_idx;
-
- if (PREDICT_FALSE (*n_descs_processed == 0))
- return;
-
- if (rxvq->used_wrap_counter)
- flags = desc_table[last_used_idx & rxvq->qsz_mask].flags |
- (VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
- else
- flags = desc_table[last_used_idx & rxvq->qsz_mask].flags &
- ~(VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
-
- vhost_user_advance_last_used_idx (rxvq);
-
- for (desc_idx = 1; desc_idx < *n_descs_processed; desc_idx++)
- {
- if (rxvq->used_wrap_counter)
- desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags |=
- (VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
- else
- desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags &=
- ~(VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
- vhost_user_advance_last_used_idx (rxvq);
- }
-
- desc_table[last_used_idx & rxvq->qsz_mask].flags = flags;
-
- *n_descs_processed = 0;
-
- if (chained)
- {
- vnet_virtio_vring_packed_desc_t *desc_table = rxvq->packed_desc;
-
- while (desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags &
- VRING_DESC_F_NEXT)
- vhost_user_advance_last_used_idx (rxvq);
-
- /* Advance past the current chained table entries */
- vhost_user_advance_last_used_idx (rxvq);
- }
-
- /* interrupt (call) handling */
- if ((rxvq->callfd_idx != ~0) &&
- (rxvq->avail_event->flags != VRING_EVENT_F_DISABLE))
- {
- vhost_user_main_t *vum = &vhost_user_main;
-
- rxvq->n_since_last_int += frame->n_vectors - n_left;
- if (rxvq->n_since_last_int > vum->coalesce_frames)
- vhost_user_send_call (vm, vui, rxvq);
- }
-}
-
-static_always_inline void
-vhost_user_tx_trace_packed (vhost_trace_t * t, vhost_user_intf_t * vui,
- u16 qid, vlib_buffer_t * b,
- vhost_user_vring_t * rxvq)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- u32 last_avail_idx = rxvq->last_avail_idx;
- u32 desc_current = last_avail_idx & rxvq->qsz_mask;
- vnet_virtio_vring_packed_desc_t *hdr_desc = 0;
- u32 hint = 0;
-
- clib_memset (t, 0, sizeof (*t));
- t->device_index = vui - vum->vhost_user_interfaces;
- t->qid = qid;
-
- hdr_desc = &rxvq->packed_desc[desc_current];
- if (rxvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT)
- {
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
- /* Header is the first here */
- hdr_desc = map_guest_mem (vui, rxvq->packed_desc[desc_current].addr,
- &hint);
- }
- if (rxvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT)
- {
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
- }
- if (!(rxvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT) &&
- !(rxvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT))
- {
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
- }
-
- t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
-}
-
-static_always_inline uword
-vhost_user_device_class_packed (vlib_main_t *vm, vlib_node_runtime_t *node,
- vlib_frame_t *frame, vhost_user_intf_t *vui,
- vhost_user_vring_t *rxvq)
-{
- u32 *buffers = vlib_frame_vector_args (frame);
- u32 n_left = frame->n_vectors;
- vhost_user_main_t *vum = &vhost_user_main;
- u32 qid = rxvq->qid;
- u8 error;
- u32 thread_index = vm->thread_index;
- vhost_cpu_t *cpu = &vum->cpus[thread_index];
- u32 map_hint = 0;
- u8 retry = 8;
- u16 copy_len;
- u16 tx_headers_len;
- vnet_virtio_vring_packed_desc_t *desc_table;
- u32 or_flags;
- u16 desc_head, desc_index, desc_len;
- u16 n_descs_processed;
- u8 indirect, chained;
-
-retry:
- error = VHOST_USER_TX_FUNC_ERROR_NONE;
- tx_headers_len = 0;
- copy_len = 0;
- n_descs_processed = 0;
-
- while (n_left > 0)
- {
- vlib_buffer_t *b0, *current_b0;
- uword buffer_map_addr;
- u32 buffer_len;
- u16 bytes_left;
- u32 total_desc_len = 0;
- u16 n_entries = 0;
-
- indirect = 0;
- chained = 0;
- if (PREDICT_TRUE (n_left > 1))
- vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD);
-
- b0 = vlib_get_buffer (vm, buffers[0]);
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- cpu->current_trace = vlib_add_trace (vm, node, b0,
- sizeof (*cpu->current_trace));
- vhost_user_tx_trace_packed (cpu->current_trace, vui, qid / 2, b0,
- rxvq);
- }
-
- desc_table = rxvq->packed_desc;
- desc_head = desc_index = rxvq->last_avail_idx & rxvq->qsz_mask;
- if (PREDICT_FALSE (!vhost_user_packed_desc_available (rxvq, desc_head)))
- {
- error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
- goto done;
- }
- /*
- * Go deeper in case of indirect descriptor.
- * To test it, turn off mrg_rxbuf.
- */
- if (desc_table[desc_head].flags & VRING_DESC_F_INDIRECT)
- {
- indirect = 1;
- if (PREDICT_FALSE (desc_table[desc_head].len <
- sizeof (vnet_virtio_vring_packed_desc_t)))
- {
- error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
- goto done;
- }
- n_entries = desc_table[desc_head].len >> 4;
- desc_table = map_guest_mem (vui, desc_table[desc_index].addr,
- &map_hint);
- if (PREDICT_FALSE (desc_table == 0))
- {
- error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
- goto done;
- }
- desc_index = 0;
- }
- else if (rxvq->packed_desc[desc_head].flags & VRING_DESC_F_NEXT)
- chained = 1;
-
- desc_len = vui->virtio_net_hdr_sz;
- buffer_map_addr = desc_table[desc_index].addr;
- buffer_len = desc_table[desc_index].len;
-
- /* Get a header from the header array */
- vnet_virtio_net_hdr_mrg_rxbuf_t *hdr = &cpu->tx_headers[tx_headers_len];
- tx_headers_len++;
- hdr->hdr.flags = 0;
- hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
- hdr->num_buffers = 1;
-
- or_flags = (b0->flags & VNET_BUFFER_F_OFFLOAD);
-
- /* Guest supports csum offload and buffer requires checksum offload? */
- if (or_flags &&
- (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM)))
- vhost_user_handle_tx_offload (vui, b0, &hdr->hdr);
-
- /* Prepare a copy order executed later for the header */
- ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
- vhost_copy_t *cpy = &cpu->copy[copy_len];
- copy_len++;
- cpy->len = vui->virtio_net_hdr_sz;
- cpy->dst = buffer_map_addr;
- cpy->src = (uword) hdr;
-
- buffer_map_addr += vui->virtio_net_hdr_sz;
- buffer_len -= vui->virtio_net_hdr_sz;
- bytes_left = b0->current_length;
- current_b0 = b0;
- while (1)
- {
- if (buffer_len == 0)
- {
- /* Get new output */
- if (chained)
- {
- /*
- * Next one is chained
- * Test it with both indirect and mrg_rxbuf off
- */
- if (PREDICT_FALSE (!(desc_table[desc_index].flags &
- VRING_DESC_F_NEXT)))
- {
- /*
- * Last descriptor in chain.
- * Dequeue queued descriptors for this packet
- */
- vhost_user_dequeue_chained_descs (rxvq,
- &n_descs_processed);
- error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
- goto done;
- }
- vhost_user_advance_last_avail_idx (rxvq);
- desc_index = rxvq->last_avail_idx & rxvq->qsz_mask;
- n_descs_processed++;
- buffer_map_addr = desc_table[desc_index].addr;
- buffer_len = desc_table[desc_index].len;
- total_desc_len += desc_len;
- desc_len = 0;
- }
- else if (indirect)
- {
- /*
- * Indirect table
- * Test it with mrg_rxnuf off
- */
- if (PREDICT_TRUE (n_entries > 0))
- n_entries--;
- else
- {
- /* Dequeue queued descriptors for this packet */
- vhost_user_dequeue_chained_descs (rxvq,
- &n_descs_processed);
- error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
- goto done;
- }
- total_desc_len += desc_len;
- desc_index = (desc_index + 1) & rxvq->qsz_mask;
- buffer_map_addr = desc_table[desc_index].addr;
- buffer_len = desc_table[desc_index].len;
- desc_len = 0;
- }
- else if (vui->virtio_net_hdr_sz == 12)
- {
- /*
- * MRG is available
- * This is the default setting for the guest VM
- */
- vnet_virtio_net_hdr_mrg_rxbuf_t *hdr =
- &cpu->tx_headers[tx_headers_len - 1];
-
- desc_table[desc_index].len = desc_len;
- vhost_user_advance_last_avail_idx (rxvq);
- desc_head = desc_index =
- rxvq->last_avail_idx & rxvq->qsz_mask;
- hdr->num_buffers++;
- n_descs_processed++;
- desc_len = 0;
-
- if (PREDICT_FALSE (!vhost_user_packed_desc_available
- (rxvq, desc_index)))
- {
- /* Dequeue queued descriptors for this packet */
- vhost_user_dequeue_descs (rxvq, hdr,
- &n_descs_processed);
- error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
- goto done;
- }
-
- buffer_map_addr = desc_table[desc_index].addr;
- buffer_len = desc_table[desc_index].len;
- }
- else
- {
- error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG;
- goto done;
- }
- }
-
- ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
- vhost_copy_t *cpy = &cpu->copy[copy_len];
- copy_len++;
- cpy->len = bytes_left;
- cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len;
- cpy->dst = buffer_map_addr;
- cpy->src = (uword) vlib_buffer_get_current (current_b0) +
- current_b0->current_length - bytes_left;
-
- bytes_left -= cpy->len;
- buffer_len -= cpy->len;
- buffer_map_addr += cpy->len;
- desc_len += cpy->len;
-
- clib_prefetch_load (&rxvq->packed_desc);
-
- /* Check if vlib buffer has more data. If not, get more or break */
- if (PREDICT_TRUE (!bytes_left))
- {
- if (PREDICT_FALSE
- (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT))
- {
- current_b0 = vlib_get_buffer (vm, current_b0->next_buffer);
- bytes_left = current_b0->current_length;
- }
- else
- {
- /* End of packet */
- break;
- }
- }
- }
-
- /* Move from available to used ring */
- total_desc_len += desc_len;
- rxvq->packed_desc[desc_head].len = total_desc_len;
-
- vhost_user_advance_last_avail_table_idx (vui, rxvq, chained);
- n_descs_processed++;
-
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- cpu->current_trace->hdr = cpu->tx_headers[tx_headers_len - 1];
-
- n_left--;
-
- /*
- * Do the copy periodically to prevent
- * cpu->copy array overflow and corrupt memory
- */
- if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD) || chained)
- {
- if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
- &map_hint)))
- vlib_error_count (vm, node->node_index,
- VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
- copy_len = 0;
-
- /* give buffers back to driver */
- vhost_user_mark_desc_available (vm, vui, rxvq, &n_descs_processed,
- chained, frame, n_left);
- }
-
- buffers++;
- }
-
-done:
- if (PREDICT_TRUE (copy_len))
- {
- if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
- &map_hint)))
- vlib_error_count (vm, node->node_index,
- VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
-
- vhost_user_mark_desc_available (vm, vui, rxvq, &n_descs_processed,
- chained, frame, n_left);
- }
-
- /*
- * When n_left is set, error is always set to something too.
- * In case error is due to lack of remaining buffers, we go back up and
- * retry.
- * The idea is that it is better to waste some time on packets
- * that have been processed already than dropping them and get
- * more fresh packets with a good likelyhood that they will be dropped too.
- * This technique also gives more time to VM driver to pick-up packets.
- * In case the traffic flows from physical to virtual interfaces, this
- * technique will end-up leveraging the physical NIC buffer in order to
- * absorb the VM's CPU jitter.
- */
- if (n_left && (error == VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF) && retry)
- {
- retry--;
- goto retry;
- }
-
- clib_spinlock_unlock (&rxvq->vring_lock);
-
- if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE))
- {
- vlib_error_count (vm, node->node_index, error, n_left);
- vlib_increment_simple_counter
- (vnet_main.interface_main.sw_if_counters +
- VNET_INTERFACE_COUNTER_DROP, thread_index, vui->sw_if_index, n_left);
- }
-
- vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
- return frame->n_vectors;
-}
-
-VNET_DEVICE_CLASS_TX_FN (vhost_user_device_class) (vlib_main_t * vm,
- vlib_node_runtime_t *
- node, vlib_frame_t * frame)
-{
- u32 *buffers = vlib_frame_vector_args (frame);
- u32 n_left = frame->n_vectors;
- vhost_user_main_t *vum = &vhost_user_main;
- vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
- vhost_user_intf_t *vui =
- pool_elt_at_index (vum->vhost_user_interfaces, rd->dev_instance);
- u32 qid;
- vhost_user_vring_t *rxvq;
- u8 error;
- u32 thread_index = vm->thread_index;
- vhost_cpu_t *cpu = &vum->cpus[thread_index];
- u32 map_hint = 0;
- u8 retry = 8;
- u16 copy_len;
- u16 tx_headers_len;
- u32 or_flags;
- vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame);
-
- if (PREDICT_FALSE (!vui->admin_up))
- {
- error = VHOST_USER_TX_FUNC_ERROR_DOWN;
- goto done3;
- }
-
- if (PREDICT_FALSE (!vui->is_ready))
- {
- error = VHOST_USER_TX_FUNC_ERROR_NOT_READY;
- goto done3;
- }
-
- qid = VHOST_VRING_IDX_RX (tf->queue_id);
- rxvq = &vui->vrings[qid];
- ASSERT (tf->queue_id == rxvq->qid);
-
- if (PREDICT_FALSE (rxvq->avail == 0))
- {
- error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
- goto done3;
- }
- if (tf->shared_queue)
- clib_spinlock_lock (&rxvq->vring_lock);
-
- if (vhost_user_is_packed_ring_supported (vui))
- return (vhost_user_device_class_packed (vm, node, frame, vui, rxvq));
-
-retry:
- error = VHOST_USER_TX_FUNC_ERROR_NONE;
- tx_headers_len = 0;
- copy_len = 0;
- while (n_left > 0)
- {
- vlib_buffer_t *b0, *current_b0;
- u16 desc_head, desc_index, desc_len;
- vnet_virtio_vring_desc_t *desc_table;
- uword buffer_map_addr;
- u32 buffer_len;
- u16 bytes_left;
-
- if (PREDICT_TRUE (n_left > 1))
- vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD);
-
- b0 = vlib_get_buffer (vm, buffers[0]);
-
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- cpu->current_trace = vlib_add_trace (vm, node, b0,
- sizeof (*cpu->current_trace));
- vhost_user_tx_trace (cpu->current_trace, vui, qid / 2, b0, rxvq);
- }
-
- if (PREDICT_FALSE (rxvq->last_avail_idx == rxvq->avail->idx))
- {
- error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
- goto done;
- }
-
- desc_table = rxvq->desc;
- desc_head = desc_index =
- rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask];
-
- /* Go deeper in case of indirect descriptor
- * I don't know of any driver providing indirect for RX. */
- if (PREDICT_FALSE (rxvq->desc[desc_head].flags & VRING_DESC_F_INDIRECT))
- {
- if (PREDICT_FALSE (rxvq->desc[desc_head].len <
- sizeof (vnet_virtio_vring_desc_t)))
- {
- error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
- goto done;
- }
- if (PREDICT_FALSE
- (!(desc_table =
- map_guest_mem (vui, rxvq->desc[desc_index].addr,
- &map_hint))))
- {
- error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
- goto done;
- }
- desc_index = 0;
- }
-
- desc_len = vui->virtio_net_hdr_sz;
- buffer_map_addr = desc_table[desc_index].addr;
- buffer_len = desc_table[desc_index].len;
-
- {
- // Get a header from the header array
- vnet_virtio_net_hdr_mrg_rxbuf_t *hdr =
- &cpu->tx_headers[tx_headers_len];
- tx_headers_len++;
- hdr->hdr.flags = 0;
- hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
- hdr->num_buffers = 1; //This is local, no need to check
-
- or_flags = (b0->flags & VNET_BUFFER_F_OFFLOAD);
-
- /* Guest supports csum offload and buffer requires checksum offload? */
- if (or_flags
- && (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM)))
- vhost_user_handle_tx_offload (vui, b0, &hdr->hdr);
-
- // Prepare a copy order executed later for the header
- ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
- vhost_copy_t *cpy = &cpu->copy[copy_len];
- copy_len++;
- cpy->len = vui->virtio_net_hdr_sz;
- cpy->dst = buffer_map_addr;
- cpy->src = (uword) hdr;
- }
-
- buffer_map_addr += vui->virtio_net_hdr_sz;
- buffer_len -= vui->virtio_net_hdr_sz;
- bytes_left = b0->current_length;
- current_b0 = b0;
- while (1)
- {
- if (buffer_len == 0)
- { //Get new output
- if (desc_table[desc_index].flags & VRING_DESC_F_NEXT)
- {
- //Next one is chained
- desc_index = desc_table[desc_index].next;
- buffer_map_addr = desc_table[desc_index].addr;
- buffer_len = desc_table[desc_index].len;
- }
- else if (vui->virtio_net_hdr_sz == 12) //MRG is available
- {
- vnet_virtio_net_hdr_mrg_rxbuf_t *hdr =
- &cpu->tx_headers[tx_headers_len - 1];
-
- //Move from available to used buffer
- rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id =
- desc_head;
- rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len =
- desc_len;
- vhost_user_log_dirty_ring (vui, rxvq,
- ring[rxvq->last_used_idx &
- rxvq->qsz_mask]);
-
- rxvq->last_avail_idx++;
- rxvq->last_used_idx++;
- hdr->num_buffers++;
- desc_len = 0;
-
- if (PREDICT_FALSE
- (rxvq->last_avail_idx == rxvq->avail->idx))
- {
- //Dequeue queued descriptors for this packet
- rxvq->last_used_idx -= hdr->num_buffers - 1;
- rxvq->last_avail_idx -= hdr->num_buffers - 1;
- error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
- goto done;
- }
-
- desc_table = rxvq->desc;
- desc_head = desc_index =
- rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask];
- if (PREDICT_FALSE
- (rxvq->desc[desc_head].flags & VRING_DESC_F_INDIRECT))
- {
- //It is seriously unlikely that a driver will put indirect descriptor
- //after non-indirect descriptor.
- if (PREDICT_FALSE (rxvq->desc[desc_head].len <
- sizeof (vnet_virtio_vring_desc_t)))
- {
- error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
- goto done;
- }
- if (PREDICT_FALSE
- (!(desc_table =
- map_guest_mem (vui,
- rxvq->desc[desc_index].addr,
- &map_hint))))
- {
- error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
- goto done;
- }
- desc_index = 0;
- }
- buffer_map_addr = desc_table[desc_index].addr;
- buffer_len = desc_table[desc_index].len;
- }
- else
- {
- error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG;
- goto done;
- }
- }
-
- {
- ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
- vhost_copy_t *cpy = &cpu->copy[copy_len];
- copy_len++;
- cpy->len = bytes_left;
- cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len;
- cpy->dst = buffer_map_addr;
- cpy->src = (uword) vlib_buffer_get_current (current_b0) +
- current_b0->current_length - bytes_left;
-
- bytes_left -= cpy->len;
- buffer_len -= cpy->len;
- buffer_map_addr += cpy->len;
- desc_len += cpy->len;
-
- clib_prefetch_load (&rxvq->desc);
- }
-
- // Check if vlib buffer has more data. If not, get more or break.
- if (PREDICT_TRUE (!bytes_left))
- {
- if (PREDICT_FALSE
- (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT))
- {
- current_b0 = vlib_get_buffer (vm, current_b0->next_buffer);
- bytes_left = current_b0->current_length;
- }
- else
- {
- //End of packet
- break;
- }
- }
- }
-
- //Move from available to used ring
- rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id = desc_head;
- rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len = desc_len;
- vhost_user_log_dirty_ring (vui, rxvq,
- ring[rxvq->last_used_idx & rxvq->qsz_mask]);
- rxvq->last_avail_idx++;
- rxvq->last_used_idx++;
-
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- cpu->current_trace->hdr = cpu->tx_headers[tx_headers_len - 1];
- }
-
- n_left--; //At the end for error counting when 'goto done' is invoked
-
- /*
- * Do the copy periodically to prevent
- * cpu->copy array overflow and corrupt memory
- */
- if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD))
- {
- if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
- &map_hint)))
- {
- vlib_error_count (vm, node->node_index,
- VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
- }
- copy_len = 0;
-
- /* give buffers back to driver */
- CLIB_MEMORY_BARRIER ();
- rxvq->used->idx = rxvq->last_used_idx;
- vhost_user_log_dirty_ring (vui, rxvq, idx);
- }
- buffers++;
- }
-
-done:
- //Do the memory copies
- if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
- &map_hint)))
- {
- vlib_error_count (vm, node->node_index,
- VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
- }
-
- CLIB_MEMORY_BARRIER ();
- rxvq->used->idx = rxvq->last_used_idx;
- vhost_user_log_dirty_ring (vui, rxvq, idx);
-
- /*
- * When n_left is set, error is always set to something too.
- * In case error is due to lack of remaining buffers, we go back up and
- * retry.
- * The idea is that it is better to waste some time on packets
- * that have been processed already than dropping them and get
- * more fresh packets with a good likelihood that they will be dropped too.
- * This technique also gives more time to VM driver to pick-up packets.
- * In case the traffic flows from physical to virtual interfaces, this
- * technique will end-up leveraging the physical NIC buffer in order to
- * absorb the VM's CPU jitter.
- */
- if (n_left && (error == VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF) && retry)
- {
- retry--;
- goto retry;
- }
-
- /* interrupt (call) handling */
- if ((rxvq->callfd_idx != ~0) &&
- !(rxvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
- {
- rxvq->n_since_last_int += frame->n_vectors - n_left;
-
- if (rxvq->n_since_last_int > vum->coalesce_frames)
- vhost_user_send_call (vm, vui, rxvq);
- }
-
- clib_spinlock_unlock (&rxvq->vring_lock);
-
-done3:
- if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE))
- {
- vlib_error_count (vm, node->node_index, error, n_left);
- vlib_increment_simple_counter
- (vnet_main.interface_main.sw_if_counters
- + VNET_INTERFACE_COUNTER_DROP,
- thread_index, vui->sw_if_index, n_left);
- }
-
- vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
- return frame->n_vectors;
-}
-
-static __clib_unused clib_error_t *
-vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index,
- u32 qid, vnet_hw_if_rx_mode mode)
-{
- vlib_main_t *vm = vnm->vlib_main;
- vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui =
- pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
- vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
- vhost_cpu_t *cpu;
-
- if (mode == txvq->mode)
- return 0;
-
- if ((mode != VNET_HW_IF_RX_MODE_POLLING) &&
- (mode != VNET_HW_IF_RX_MODE_ADAPTIVE) &&
- (mode != VNET_HW_IF_RX_MODE_INTERRUPT))
- {
- vu_log_err (vui, "unhandled mode %d changed for if %d queue %d", mode,
- hw_if_index, qid);
- return clib_error_return (0, "unsupported");
- }
-
- if (txvq->thread_index == ~0)
- return clib_error_return (0, "Queue initialization is not finished yet");
-
- cpu = vec_elt_at_index (vum->cpus, txvq->thread_index);
- if ((mode == VNET_HW_IF_RX_MODE_INTERRUPT) ||
- (mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
- {
- if (txvq->kickfd_idx == ~0)
- {
- // We cannot support interrupt mode if the driver opts out
- return clib_error_return (0, "Driver does not support interrupt");
- }
- if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING)
- {
- ASSERT (cpu->polling_q_count != 0);
- if (cpu->polling_q_count)
- cpu->polling_q_count--;
- vum->ifq_count++;
- // Start the timer if this is the first encounter on interrupt
- // interface/queue
- if ((vum->ifq_count == 1) &&
- ((vum->coalesce_time > 0.0) || (vum->coalesce_frames > 0)))
- vlib_process_signal_event (vm,
- vhost_user_send_interrupt_node.index,
- VHOST_USER_EVENT_START_TIMER, 0);
- }
- }
- else if (mode == VNET_HW_IF_RX_MODE_POLLING)
- {
- if (((txvq->mode == VNET_HW_IF_RX_MODE_INTERRUPT) ||
- (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)) && vum->ifq_count)
- {
- cpu->polling_q_count++;
- vum->ifq_count--;
- // Stop the timer if there is no more interrupt interface/queue
- if (vum->ifq_count == 0)
- vlib_process_signal_event (vm,
- vhost_user_send_interrupt_node.index,
- VHOST_USER_EVENT_STOP_TIMER, 0);
- }
- }
-
- txvq->mode = mode;
- vhost_user_set_operation_mode (vui, txvq);
-
- return 0;
-}
-
-static __clib_unused clib_error_t *
-vhost_user_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
- u32 flags)
-{
- vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui =
- pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
- u8 link_old, link_new;
-
- link_old = vui_is_link_up (vui);
-
- vui->admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
-
- link_new = vui_is_link_up (vui);
-
- if (link_old != link_new)
- vnet_hw_interface_set_flags (vnm, vui->hw_if_index, link_new ?
- VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
-
- return /* no error */ 0;
-}
-
-/* *INDENT-OFF* */
-VNET_DEVICE_CLASS (vhost_user_device_class) = {
- .name = "vhost-user",
- .tx_function_n_errors = VHOST_USER_TX_FUNC_N_ERROR,
- .tx_function_error_strings = vhost_user_tx_func_error_strings,
- .format_device_name = format_vhost_user_interface_name,
- .name_renumber = vhost_user_name_renumber,
- .admin_up_down_function = vhost_user_interface_admin_up_down,
- .rx_mode_change_function = vhost_user_interface_rx_mode_change,
- .format_tx_trace = format_vhost_trace,
-};
-
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/virtio/virtio.api b/src/vnet/devices/virtio/virtio.api
index bbe2341a001..a11492ec258 100644
--- a/src/vnet/devices/virtio/virtio.api
+++ b/src/vnet/devices/virtio/virtio.api
@@ -56,7 +56,7 @@ define virtio_pci_create_reply
vl_api_interface_index_t sw_if_index;
};
-enum virtio_flags {
+enumflag virtio_flags {
VIRTIO_API_FLAG_GSO = 1, /* enable gso on the interface */
VIRTIO_API_FLAG_CSUM_OFFLOAD = 2, /* enable checksum offload without gso on the interface */
VIRTIO_API_FLAG_GRO_COALESCE = 4, /* enable packet coalescing on tx side, provided gso enabled */
diff --git a/src/vnet/devices/virtio/virtio.c b/src/vnet/devices/virtio/virtio.c
index 33af8b8c455..d2302fa1dc4 100644
--- a/src/vnet/devices/virtio/virtio.c
+++ b/src/vnet/devices/virtio/virtio.c
@@ -19,7 +19,11 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <net/if.h>
+#ifdef __linux__
#include <linux/if_tun.h>
+#elif __FreeBSD__
+#include <net/if_tun.h>
+#endif /* __linux__ */
#include <sys/ioctl.h>
#include <sys/eventfd.h>
@@ -207,7 +211,6 @@ virtio_set_packet_buffering (virtio_if_t * vif, u16 buffering_size)
vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vif->hw_if_index);
vnet_virtio_vring_t *vring;
clib_error_t *error = 0;
- vif->packet_buffering = 1;
vec_foreach (vring, vif->txq_vrings)
{
diff --git a/src/vnet/devices/virtio/virtio_pci_modern.c b/src/vnet/devices/virtio/virtio_pci_modern.c
index f7313d84bbd..50a7b392367 100644
--- a/src/vnet/devices/virtio/virtio_pci_modern.c
+++ b/src/vnet/devices/virtio/virtio_pci_modern.c
@@ -164,9 +164,7 @@ virtio_pci_modern_set_queue_size (vlib_main_t * vm, virtio_if_t * vif,
return;
}
- if (virtio_pci_modern_get_queue_size (vm, vif, queue_id) > queue_size)
- virtio_pci_reg_write_u16 (vif, VIRTIO_QUEUE_SIZE_OFFSET (vif),
- queue_size);
+ virtio_pci_reg_write_u16 (vif, VIRTIO_QUEUE_SIZE_OFFSET (vif), queue_size);
}
static u16
diff --git a/src/vnet/devices/virtio/virtio_pre_input.c b/src/vnet/devices/virtio/virtio_pre_input.c
index eb208fd3a39..80cc8d6edb0 100644
--- a/src/vnet/devices/virtio/virtio_pre_input.c
+++ b/src/vnet/devices/virtio/virtio_pre_input.c
@@ -31,7 +31,7 @@ virtio_pre_input_inline (vlib_main_t *vm, vnet_virtio_vring_t *txq_vring,
if (clib_spinlock_trylock (&txq_vring->lockp))
{
if (virtio_txq_is_scheduled (txq_vring))
- return 0;
+ goto unlock;
if (packet_coalesce)
vnet_gro_flow_table_schedule_node_on_dispatcher (
vm, txq, txq_vring->flow_table);
@@ -39,6 +39,7 @@ virtio_pre_input_inline (vlib_main_t *vm, vnet_virtio_vring_t *txq_vring,
virtio_vring_buffering_schedule_node_on_dispatcher (
vm, txq, txq_vring->buffering);
virtio_txq_set_scheduled (txq_vring);
+ unlock:
clib_spinlock_unlock (&txq_vring->lockp);
}
}
diff --git a/src/vnet/devices/virtio/virtio_process.c b/src/vnet/devices/virtio/virtio_process.c
index 18b34e0aa62..13ba590659c 100644
--- a/src/vnet/devices/virtio/virtio_process.c
+++ b/src/vnet/devices/virtio/virtio_process.c
@@ -70,13 +70,11 @@ virtio_send_interrupt_process (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (virtio_send_interrupt_node) = {
.function = virtio_send_interrupt_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "virtio-send-interrupt-process",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/devices/virtio/virtio_std.h b/src/vnet/devices/virtio/virtio_std.h
index 86984339bc2..ec988c08dbb 100644
--- a/src/vnet/devices/virtio/virtio_std.h
+++ b/src/vnet/devices/virtio/virtio_std.h
@@ -122,7 +122,6 @@ typedef struct
/* u16 avail_event; */
} vnet_virtio_vring_used_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u64 addr; // packet data buffer address
u32 len; // packet data buffer size
@@ -170,7 +169,6 @@ typedef CLIB_PACKED (struct {
u16 num_buffers;
}) vnet_virtio_net_hdr_mrg_rxbuf_t;
-/* *INDENT-ON* */
#endif
/*
diff --git a/src/vnet/dpo/dpo.c b/src/vnet/dpo/dpo.c
index d8342ff17ae..fc789ae0a7f 100644
--- a/src/vnet/dpo/dpo.c
+++ b/src/vnet/dpo/dpo.c
@@ -613,12 +613,10 @@ dpo_module_init (vlib_main_t * vm)
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION(dpo_module_init) =
{
.runs_before = VLIB_INITS ("ip_main_init"),
};
-/* *INDENT-ON* */
static clib_error_t *
dpo_memory_show (vlib_main_t * vm,
@@ -640,7 +638,6 @@ dpo_memory_show (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
/*?
* The '<em>sh dpo memory </em>' command displays the memory usage for each
* data-plane object type.
@@ -662,6 +659,5 @@ VLIB_CLI_COMMAND (show_fib_memory, static) = {
.function = dpo_memory_show,
.short_help = "show dpo memory",
};
-/* *INDENT-ON* */
// clang-format on
diff --git a/src/vnet/dpo/dvr_dpo.c b/src/vnet/dpo/dvr_dpo.c
index 5db9c803145..2b66467837c 100644
--- a/src/vnet/dpo/dvr_dpo.c
+++ b/src/vnet/dpo/dvr_dpo.c
@@ -206,12 +206,9 @@ format_dvr_dpo (u8* s, va_list *ap)
vnet_main_t * vnm = vnet_get_main();
dvr_dpo_t *dd = dvr_dpo_get(index);
- return (format(s, "%U-dvr-%U-dpo %U",
- format_dpo_proto, dd->dd_proto,
- format_vnet_sw_interface_name,
- vnm,
- vnet_get_sw_interface(vnm, dd->dd_sw_if_index),
- format_dvr_reinject, dd->dd_reinject));
+ return format (s, "%U-dvr-%U-dpo %U", format_dpo_proto, dd->dd_proto,
+ format_vnet_sw_if_index_name, vnm, dd->dd_sw_if_index,
+ format_dvr_reinject, dd->dd_reinject);
}
static void
diff --git a/src/vnet/dpo/interface_rx_dpo.c b/src/vnet/dpo/interface_rx_dpo.c
index d3615d0ce76..5a519d344c1 100644
--- a/src/vnet/dpo/interface_rx_dpo.c
+++ b/src/vnet/dpo/interface_rx_dpo.c
@@ -160,11 +160,8 @@ format_interface_rx_dpo (u8* s, va_list *ap)
vnet_main_t * vnm = vnet_get_main();
interface_rx_dpo_t *ido = interface_rx_dpo_get(index);
- return (format(s, "%U-rx-dpo: %U",
- format_vnet_sw_interface_name,
- vnm,
- vnet_get_sw_interface(vnm, ido->ido_sw_if_index),
- format_dpo_proto, ido->ido_proto));
+ return format (s, "%U-rx-dpo: %U", format_vnet_sw_if_index_name, vnm,
+ ido->ido_sw_if_index, format_dpo_proto, ido->ido_proto);
}
static void
diff --git a/src/vnet/dpo/interface_tx_dpo.c b/src/vnet/dpo/interface_tx_dpo.c
index 870579884a0..73f4e906268 100644
--- a/src/vnet/dpo/interface_tx_dpo.c
+++ b/src/vnet/dpo/interface_tx_dpo.c
@@ -50,10 +50,7 @@ format_interface_tx_dpo (u8* s, va_list *ap)
CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
vnet_main_t * vnm = vnet_get_main();
- return (format(s, "%U-tx-dpo:",
- format_vnet_sw_interface_name,
- vnm,
- vnet_get_sw_interface(vnm, index)));
+ return format (s, "%U-tx-dpo:", format_vnet_sw_if_index_name, vnm, index);
}
static void
diff --git a/src/vnet/dpo/ip6_ll_dpo.c b/src/vnet/dpo/ip6_ll_dpo.c
index deb67d88137..86908efbc04 100644
--- a/src/vnet/dpo/ip6_ll_dpo.c
+++ b/src/vnet/dpo/ip6_ll_dpo.c
@@ -191,7 +191,6 @@ static char *ip6_ll_dpo_error_strings[] = {
/**
* @brief
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_ll_dpo_node) =
{
.function = ip6_ll_dpo_switch,
@@ -206,7 +205,6 @@ VLIB_REGISTER_NODE (ip6_ll_dpo_node) =
[IP6_LL_NEXT_LOOKUP] = "ip6-lookup",
},
};
-/* *INDENT-ON* */
void
ip6_ll_dpo_module_init (void)
diff --git a/src/vnet/dpo/l3_proxy_dpo.c b/src/vnet/dpo/l3_proxy_dpo.c
index 41156301a0e..f89554d775f 100644
--- a/src/vnet/dpo/l3_proxy_dpo.c
+++ b/src/vnet/dpo/l3_proxy_dpo.c
@@ -116,9 +116,8 @@ format_l3_proxy_dpo (u8 *s, va_list *ap)
if (~0 != l3p->l3p_sw_if_index)
{
- return (format(s, "dpo-l3_proxy: %U",
- format_vnet_sw_interface_name, vnm,
- vnet_get_sw_interface(vnm, l3p->l3p_sw_if_index)));
+ return (format (s, "dpo-l3_proxy: %U", format_vnet_sw_if_index_name, vnm,
+ l3p->l3p_sw_if_index));
}
else
{
diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c
index ff46d56e3e2..8f2a0de6ea8 100644
--- a/src/vnet/dpo/load_balance.c
+++ b/src/vnet/dpo/load_balance.c
@@ -149,7 +149,13 @@ load_balance_format (index_t lbi,
dpo_id_t *buckets;
u32 i;
- lb = load_balance_get(lbi);
+ lb = load_balance_get_or_null(lbi);
+ if (lb == NULL)
+ {
+ s = format(s, "DELETED lb:%u", lbi);
+ return (s);
+ }
+
vlib_get_combined_counter(&(load_balance_main.lbm_to_counters), lbi, &to);
vlib_get_combined_counter(&(load_balance_main.lbm_via_counters), lbi, &via);
buckets = load_balance_get_buckets(lb);
@@ -244,6 +250,8 @@ load_balance_create_i (u32 num_buckets,
{
load_balance_t *lb;
+ ASSERT (num_buckets <= LB_MAX_BUCKETS);
+
lb = load_balance_alloc_i();
lb->lb_hash_config = fhc;
lb->lb_n_buckets = num_buckets;
@@ -455,8 +463,9 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops,
/* Try larger and larger power of 2 sized adjacency blocks until we
find one where traffic flows to within 1% of specified weights. */
- for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2)
+ for (n_adj = clib_min(max_pow2 (n_nhs), LB_MAX_BUCKETS); ; n_adj *= 2)
{
+ ASSERT (n_adj <= LB_MAX_BUCKETS);
error = 0;
norm = n_adj / ((f64) sum_weight);
@@ -487,12 +496,22 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops,
nhs[0].path_weight += n_adj_left;
- /* Less than 5% average error per adjacency with this size adjacency block? */
- if (error <= multipath_next_hop_error_tolerance*n_adj)
+ /* Less than 1% average error per adjacency with this size adjacency block,
+ * or did we reached the maximum number of buckets we support? */
+ if (error <= multipath_next_hop_error_tolerance*n_adj ||
+ n_adj >= LB_MAX_BUCKETS)
{
- /* Truncate any next hops with zero weight. */
- vec_set_len (nhs, i);
- break;
+ if (i < n_nhs)
+ {
+ /* Truncate any next hops in excess */
+ vlib_log_err(load_balance_logger,
+ "Too many paths for load-balance, truncating %d -> %d",
+ n_nhs, i);
+ for (int j = i; j < n_nhs; j++)
+ dpo_reset (&vec_elt(nhs, j).path_dpo);
+ }
+ vec_set_len (nhs, i);
+ break;
}
}
@@ -622,6 +641,7 @@ static inline void
load_balance_set_n_buckets (load_balance_t *lb,
u32 n_buckets)
{
+ ASSERT (n_buckets <= LB_MAX_BUCKETS);
lb->lb_n_buckets = n_buckets;
lb->lb_n_buckets_minus_1 = n_buckets-1;
}
@@ -651,8 +671,6 @@ load_balance_multipath_update (const dpo_id_t *dpo,
&sum_of_weights,
multipath_next_hop_error_tolerance);
- ASSERT (n_buckets >= vec_len (raw_nhs));
-
/*
* Save the old load-balance map used, and get a new one if required.
*/
diff --git a/src/vnet/dpo/load_balance.h b/src/vnet/dpo/load_balance.h
index 5428e20e981..eee073f5892 100644
--- a/src/vnet/dpo/load_balance.h
+++ b/src/vnet/dpo/load_balance.h
@@ -50,6 +50,12 @@ typedef struct load_balance_main_t_
extern load_balance_main_t load_balance_main;
/**
+ * The maximum number of buckets that a load-balance object can have
+ * This must not overflow the lb_n_buckets field
+ */
+#define LB_MAX_BUCKETS 8192
+
+/**
* The number of buckets that a load-balance object can have and still
* fit in one cache-line
*/
@@ -176,6 +182,10 @@ typedef struct load_balance_t_ {
STATIC_ASSERT(sizeof(load_balance_t) <= CLIB_CACHE_LINE_BYTES,
"A load_balance object size exceeds one cacheline");
+STATIC_ASSERT (LB_MAX_BUCKETS <= CLIB_U16_MAX,
+ "Too many buckets for load_balance object");
+STATIC_ASSERT (LB_MAX_BUCKETS && !(LB_MAX_BUCKETS & (LB_MAX_BUCKETS - 1)),
+ "LB_MAX_BUCKETS must be a power of 2");
/**
* Flags controlling load-balance formatting/display
@@ -222,6 +232,14 @@ load_balance_get (index_t lbi)
return (pool_elt_at_index(load_balance_pool, lbi));
}
+static inline load_balance_t *
+load_balance_get_or_null (index_t lbi)
+{
+ if (pool_is_free_index (load_balance_pool, lbi))
+ return 0;
+ return (pool_elt_at_index (load_balance_pool, lbi));
+}
+
#define LB_HAS_INLINE_BUCKETS(_lb) \
((_lb)->lb_n_buckets <= LB_NUM_INLINE_BUCKETS)
diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c
index 7856f050cb0..872577dfbe1 100644
--- a/src/vnet/dpo/mpls_label_dpo.c
+++ b/src/vnet/dpo/mpls_label_dpo.c
@@ -84,6 +84,7 @@ mpls_label_dpo_create (fib_mpls_label_t *label_stack,
mld = mpls_label_dpo_alloc();
mld->mld_flags = flags;
+ mld->mld_payload_proto = payload_proto;
dtype = mpls_label_dpo_types[flags];
if (MPLS_LABEL_DPO_MAX_N_LABELS < vec_len(label_stack))
@@ -92,13 +93,12 @@ mpls_label_dpo_create (fib_mpls_label_t *label_stack,
dpo_stack(dtype,
mld->mld_payload_proto,
&mld->mld_dpo,
- drop_dpo_get(DPO_PROTO_MPLS));
+ drop_dpo_get(mld->mld_payload_proto));
}
else
{
mld->mld_n_labels = vec_len(label_stack);
mld->mld_n_hdr_bytes = mld->mld_n_labels * sizeof(mld->mld_hdr[0]);
- mld->mld_payload_proto = payload_proto;
/*
* construct label rewrite headers for each value passed.
@@ -398,22 +398,22 @@ mpls_label_imposition_inline (vlib_main_t * vm,
/* Prefetch next iteration. */
{
- vlib_buffer_t * p2, * p3, *p4, *p5;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
- p4 = vlib_get_buffer (vm, from[4]);
- p5 = vlib_get_buffer (vm, from[5]);
-
- vlib_prefetch_buffer_header (p2, STORE);
- vlib_prefetch_buffer_header (p3, STORE);
- vlib_prefetch_buffer_header (p4, STORE);
- vlib_prefetch_buffer_header (p5, STORE);
-
- CLIB_PREFETCH (p2->data, sizeof (hdr0[0]), STORE);
- CLIB_PREFETCH (p3->data, sizeof (hdr0[0]), STORE);
- CLIB_PREFETCH (p4->data, sizeof (hdr0[0]), STORE);
- CLIB_PREFETCH (p5->data, sizeof (hdr0[0]), STORE);
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ vlib_prefetch_buffer_header (p4, STORE);
+ vlib_prefetch_buffer_header (p5, STORE);
+ vlib_prefetch_buffer_header (p6, STORE);
+ vlib_prefetch_buffer_header (p7, STORE);
+
+ CLIB_PREFETCH (p4->data, sizeof (hdr0[0]), STORE);
+ CLIB_PREFETCH (p5->data, sizeof (hdr0[0]), STORE);
+ CLIB_PREFETCH (p6->data, sizeof (hdr0[0]), STORE);
+ CLIB_PREFETCH (p7->data, sizeof (hdr0[0]), STORE);
}
from += 4;
diff --git a/src/vnet/dpo/receive_dpo.c b/src/vnet/dpo/receive_dpo.c
index 0a97e1d373b..413c3ae5b47 100644
--- a/src/vnet/dpo/receive_dpo.c
+++ b/src/vnet/dpo/receive_dpo.c
@@ -122,10 +122,9 @@ format_receive_dpo (u8 *s, va_list *ap)
if (~0 != rd->rd_sw_if_index)
{
- return (format(s, "dpo-receive: %U on %U",
- format_ip46_address, &rd->rd_addr, IP46_TYPE_ANY,
- format_vnet_sw_interface_name, vnm,
- vnet_get_sw_interface(vnm, rd->rd_sw_if_index)));
+ return (format (s, "dpo-receive: %U on %U", format_ip46_address,
+ &rd->rd_addr, IP46_TYPE_ANY,
+ format_vnet_sw_if_index_name, vnm, rd->rd_sw_if_index));
}
else
{
diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c
index 5f88f12b910..0474fd82984 100644
--- a/src/vnet/dpo/replicate_dpo.c
+++ b/src/vnet/dpo/replicate_dpo.c
@@ -172,6 +172,8 @@ replicate_create_i (u32 num_buckets,
{
replicate_t *rep;
+ ASSERT (num_buckets <= REP_MAX_BUCKETS);
+
rep = replicate_alloc_i();
rep->rep_n_buckets = num_buckets;
rep->rep_proto = rep_proto;
@@ -311,7 +313,8 @@ static inline void
replicate_set_n_buckets (replicate_t *rep,
u32 n_buckets)
{
- rep->rep_n_buckets = n_buckets;
+ ASSERT (n_buckets <= REP_MAX_BUCKETS);
+ rep->rep_n_buckets = n_buckets;
}
void
@@ -331,6 +334,17 @@ replicate_multipath_update (const dpo_id_t *dpo,
rep->rep_proto);
n_buckets = vec_len(nhs);
+ if (n_buckets > REP_MAX_BUCKETS)
+ {
+ vlib_log_err (replicate_logger,
+ "Too many paths for replicate, truncating %d -> %d",
+ n_buckets, REP_MAX_BUCKETS);
+ for (int i = REP_MAX_BUCKETS; i < n_buckets; i++)
+ dpo_reset (&vec_elt (nhs, i).path_dpo);
+ vec_set_len (nhs, REP_MAX_BUCKETS);
+ n_buckets = REP_MAX_BUCKETS;
+ }
+
if (0 == rep->rep_n_buckets)
{
/*
diff --git a/src/vnet/dpo/replicate_dpo.h b/src/vnet/dpo/replicate_dpo.h
index 908c20c1d56..d21f52a4833 100644
--- a/src/vnet/dpo/replicate_dpo.h
+++ b/src/vnet/dpo/replicate_dpo.h
@@ -41,6 +41,12 @@ typedef struct replicate_main_t_
extern replicate_main_t replicate_main;
/**
+ * The number of buckets that a replicate object can have
+ * This must not overflow the rep_n_buckets field
+ */
+#define REP_MAX_BUCKETS 1024
+
+/**
* The number of buckets that a load-balance object can have and still
* fit in one cache-line
*/
@@ -108,6 +114,8 @@ typedef struct replicate_t_ {
STATIC_ASSERT(sizeof(replicate_t) <= CLIB_CACHE_LINE_BYTES,
"A replicate object size exceeds one cacheline");
+STATIC_ASSERT (REP_MAX_BUCKETS <= CLIB_U16_MAX,
+ "Too many buckets for replicate object");
/**
* Flags controlling load-balance formatting/display
diff --git a/src/vnet/error.h b/src/vnet/error.h
index 39a609bdb49..fa1337538c4 100644
--- a/src/vnet/error.h
+++ b/src/vnet/error.h
@@ -156,7 +156,10 @@
_ (EAGAIN, -165, "Retry stream call with cursor") \
_ (INVALID_VALUE_4, -166, "Invalid value #4") \
_ (BUSY, -167, "Busy") \
- _ (BUG, -168, "Bug")
+ _ (BUG, -168, "Bug") \
+ _ (FEATURE_ALREADY_DISABLED, -169, "Feature already disabled") \
+ _ (FEATURE_ALREADY_ENABLED, -170, "Feature already enabled") \
+ _ (INVALID_PREFIX_LENGTH, -171, "Invalid prefix length")
typedef enum
{
diff --git a/src/vnet/ethernet/arp_packet.h b/src/vnet/ethernet/arp_packet.h
index c406dade6e2..9a9df680853 100644
--- a/src/vnet/ethernet/arp_packet.h
+++ b/src/vnet/ethernet/arp_packet.h
@@ -110,12 +110,10 @@ typedef enum
IP4_ARP_N_NEXT,
} ip4_arp_next_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
mac_address_t mac;
ip4_address_t ip4;
}) ethernet_arp_ip4_over_ethernet_address_t;
-/* *INDENT-ON* */
STATIC_ASSERT (sizeof (ethernet_arp_ip4_over_ethernet_address_t) == 10,
"Packet ethernet address and IP4 address too big");
diff --git a/src/vnet/ethernet/init.c b/src/vnet/ethernet/init.c
index f78b65c7cc0..3921e1ec0e6 100644
--- a/src/vnet/ethernet/init.c
+++ b/src/vnet/ethernet/init.c
@@ -62,7 +62,6 @@ add_type (ethernet_main_t * em, ethernet_type_t type, char *type_name)
}
/* Built-in ip4 tx feature path definition */
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (ethernet_output, static) =
{
.arc_name = "ethernet-output",
@@ -77,7 +76,6 @@ VNET_FEATURE_INIT (ethernet_tx_drop, static) =
.node_name = "error-drop",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
static clib_error_t *
ethernet_init (vlib_main_t * vm)
@@ -107,7 +105,6 @@ ethernet_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ethernet_init) =
{
/*
@@ -119,7 +116,6 @@ VLIB_INIT_FUNCTION (ethernet_init) =
"llc_init",
"vnet_feature_init"),
};
-/* *INDENT-ON* */
ethernet_main_t *
ethernet_get_main (vlib_main_t * vm)
diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c
index 46d4203cda1..f1bb6b81070 100644
--- a/src/vnet/ethernet/interface.c
+++ b/src/vnet/ethernet/interface.c
@@ -303,8 +303,17 @@ ethernet_mac_change (vnet_hw_interface_t * hi,
{
ethernet_address_change_ctx_t *cb;
+ u32 id, sw_if_index;
vec_foreach (cb, em->address_change_callbacks)
- cb->function (em, hi->sw_if_index, cb->function_opaque);
+ {
+ cb->function (em, hi->sw_if_index, cb->function_opaque);
+ /* clang-format off */
+ hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id,
+ ({
+ cb->function (em, sw_if_index, cb->function_opaque);
+ }));
+ /* clang-format on */
+ }
}
return (NULL);
@@ -325,7 +334,6 @@ ethernet_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hi,
"underlying driver doesn't support changing Max Frame Size");
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (ethernet_hw_interface_class) = {
.name = "Ethernet",
.tx_hash_fn_type = VNET_HASH_FN_TYPE_ETHERNET,
@@ -338,7 +346,6 @@ VNET_HW_INTERFACE_CLASS (ethernet_hw_interface_class) = {
.mac_addr_change_function = ethernet_mac_change,
.set_max_frame_size = ethernet_set_max_frame_size,
};
-/* *INDENT-ON* */
uword
unformat_ethernet_interface (unformat_input_t * input, va_list * args)
@@ -527,7 +534,7 @@ simulated_ethernet_interface_tx (vlib_main_t * vm,
while (n_left_from >= 4)
{
u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3;
- u32 not_all_match_config;
+ u32x4 xor_ifx4;
/* Prefetch next iteration. */
if (PREDICT_TRUE (n_left_from >= 8))
@@ -544,12 +551,11 @@ simulated_ethernet_interface_tx (vlib_main_t * vm,
sw_if_index2 = vnet_buffer (b[2])->sw_if_index[VLIB_TX];
sw_if_index3 = vnet_buffer (b[3])->sw_if_index[VLIB_TX];
- not_all_match_config = (sw_if_index0 ^ sw_if_index1)
- ^ (sw_if_index2 ^ sw_if_index3);
- not_all_match_config += sw_if_index0 ^ new_rx_sw_if_index;
+ xor_ifx4 = u32x4_gather (&sw_if_index0, &sw_if_index1, &sw_if_index2,
+ &sw_if_index3);
/* Speed path / expected case: all pkts on the same intfc */
- if (PREDICT_TRUE (not_all_match_config == 0))
+ if (PREDICT_TRUE (u32x4_is_all_equal (xor_ifx4, new_rx_sw_if_index)))
{
next[0] = next_index;
next[1] = next_index;
@@ -752,7 +758,6 @@ simulated_ethernet_mac_change (vnet_hw_interface_t * hi,
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (ethernet_simulated_device_class) = {
.name = "Loopback",
.format_device_name = format_simulated_ethernet_name,
@@ -760,7 +765,6 @@ VNET_DEVICE_CLASS (ethernet_simulated_device_class) = {
.admin_up_down_function = simulated_ethernet_admin_up_down,
.mac_addr_change_function = simulated_ethernet_mac_change,
};
-/* *INDENT-ON* */
/*
* Maintain a bitmap of allocated loopback instance numbers.
@@ -949,13 +953,11 @@ create_simulated_ethernet_interfaces (vlib_main_t * vm,
* Example of how to create a loopback interface:
* @cliexcmd{loopback create-interface}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_simulated_ethernet_interface_command, static) = {
.path = "loopback create-interface",
.short_help = "loopback create-interface [mac <mac-addr>] [instance <instance>]",
.function = create_simulated_ethernet_interfaces,
};
-/* *INDENT-ON* */
/*?
* Create a loopback interface. Optionally, a MAC Address can be
@@ -968,13 +970,11 @@ VLIB_CLI_COMMAND (create_simulated_ethernet_interface_command, static) = {
* Example of how to create a loopback interface:
* @cliexcmd{create loopback interface}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_loopback_interface_command, static) = {
.path = "create loopback interface",
.short_help = "create loopback interface [mac <mac-addr>] [instance <instance>]",
.function = create_simulated_ethernet_interfaces,
};
-/* *INDENT-ON* */
ethernet_interface_t *
ethernet_get_interface (ethernet_main_t * em, u32 hw_if_index)
@@ -1185,13 +1185,11 @@ delete_sub_interface (vlib_main_t * vm,
* Example of how to delete a loopback interface:
* @cliexcmd{loopback delete-interface intfc loop0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (delete_simulated_ethernet_interface_command, static) = {
.path = "loopback delete-interface",
.short_help = "loopback delete-interface intfc <interface>",
.function = delete_simulated_ethernet_interfaces,
};
-/* *INDENT-ON* */
/*?
* Delete a loopback interface.
@@ -1203,13 +1201,11 @@ VLIB_CLI_COMMAND (delete_simulated_ethernet_interface_command, static) = {
* Example of how to delete a loopback interface:
* @cliexcmd{delete loopback interface intfc loop0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (delete_loopback_interface_command, static) = {
.path = "delete loopback interface",
.short_help = "delete loopback interface intfc <interface>",
.function = delete_simulated_ethernet_interfaces,
};
-/* *INDENT-ON* */
/*?
* Delete a sub-interface.
@@ -1218,13 +1214,11 @@ VLIB_CLI_COMMAND (delete_loopback_interface_command, static) = {
* Example of how to delete a sub-interface:
* @cliexcmd{delete sub-interface GigabitEthernet0/8/0.200}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (delete_sub_interface_command, static) = {
.path = "delete sub-interface",
.short_help = "delete sub-interface <interface>",
.function = delete_sub_interface,
};
-/* *INDENT-ON* */
/* ethernet { ... } configuration. */
/*?
diff --git a/src/vnet/ethernet/mac_address.c b/src/vnet/ethernet/mac_address.c
index 2237c3772b8..098b3ce19c1 100644
--- a/src/vnet/ethernet/mac_address.c
+++ b/src/vnet/ethernet/mac_address.c
@@ -15,13 +15,11 @@
#include <vnet/ethernet/mac_address.h>
-/* *INDENT-OFF* */
const mac_address_t ZERO_MAC_ADDRESS = {
.bytes = {
0, 0, 0, 0, 0, 0,
},
};
-/* *INDENT-ON* */
u8 *
format_mac_address_t (u8 * s, va_list * args)
diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c
index 4ef575a85fc..03cbdde1c2b 100644
--- a/src/vnet/ethernet/node.c
+++ b/src/vnet/ethernet/node.c
@@ -982,8 +982,31 @@ eth_input_process_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
else
{
for (int j = 0; j < 16; j++)
- if (next[j] == 0)
- slowpath_indices[n_slowpath++] = i + j;
+ {
+ if (next[j] == 0)
+ slowpath_indices[n_slowpath++] = i + j;
+ else if (dmac_check && main_is_l3 && dmacs_bad[i + j])
+ {
+ next[j] = 0;
+ slowpath_indices[n_slowpath++] = i + j;
+ }
+ }
+ }
+ }
+ else
+ {
+ if (dmac_check && main_is_l3)
+ {
+ u8x16 dmac_bad = u8x16_load_unaligned (&dmacs_bad[i]);
+ if (!u8x16_is_all_zero (dmac_bad))
+ {
+ for (int j = 0; j < 16; j++)
+ if (dmacs_bad[i + j])
+ {
+ next[j] = 0;
+ slowpath_indices[n_slowpath++] = i + j;
+ }
+ }
}
}
@@ -994,7 +1017,12 @@ eth_input_process_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
continue;
}
#endif
- if (main_is_l3 && etype[0] == et_ip4)
+ if (dmac_check && main_is_l3 && dmacs_bad[i])
+ {
+ next[0] = 0;
+ slowpath_indices[n_slowpath++] = i;
+ }
+ else if (main_is_l3 && etype[0] == et_ip4)
next[0] = next_ip4;
else if (main_is_l3 && etype[0] == et_ip6)
next[0] = next_ip6;
@@ -1052,7 +1080,7 @@ eth_input_process_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
}
else
{
- /* untagged packet with not well known etyertype */
+ /* untagged packet with not well known ethertype */
if (last_unknown_etype != etype)
{
last_unknown_etype = etype;
@@ -2098,7 +2126,6 @@ static char *ethernet_error_strings[] = {
#undef ethernet_error
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ethernet_input_node) = {
.name = "ethernet-input",
/* Takes a vector of packets. */
@@ -2140,7 +2167,6 @@ VLIB_REGISTER_NODE (ethernet_input_not_l2_node) = {
#undef _
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
void
diff --git a/src/vnet/ethernet/p2p_ethernet.c b/src/vnet/ethernet/p2p_ethernet.c
index ddf23901419..0ece84fd9cc 100644
--- a/src/vnet/ethernet/p2p_ethernet.c
+++ b/src/vnet/ethernet/p2p_ethernet.c
@@ -146,6 +146,8 @@ p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index,
vnet_feature_enable_disable ("device-input",
"p2p-ethernet-input",
parent_if_index, 1, 0, 0);
+ vnet_feature_enable_disable ("port-rx-eth", "p2p-ethernet-input",
+ parent_if_index, 1, 0, 0);
/* Set promiscuous mode on the l2 interface */
ethernet_set_flags (vnm, parent_if_index,
ETHERNET_INTERFACE_FLAG_ACCEPT_ALL);
@@ -153,7 +155,7 @@ p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index,
}
p2pm->p2p_ethernet_by_sw_if_index[parent_if_index]++;
/* set the interface mode */
- set_int_l2_mode (vm, vnm, MODE_L3, p2pe_subif_id, 0,
+ set_int_l2_mode (vm, vnm, MODE_L3, p2pe_sw_if_index, 0,
L2_BD_PORT_TYPE_NORMAL, 0, 0);
return 0;
}
@@ -176,6 +178,9 @@ p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index,
vnet_feature_enable_disable ("device-input",
"p2p-ethernet-input",
parent_if_index, 0, 0, 0);
+ vnet_feature_enable_disable ("port-rx-eth",
+ "p2p-ethernet-input",
+ parent_if_index, 0, 0, 0);
/* Disable promiscuous mode on the l2 interface */
ethernet_set_flags (vnm, parent_if_index, 0);
}
@@ -248,10 +253,11 @@ vnet_p2p_ethernet_add_del (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-VLIB_CLI_COMMAND (p2p_ethernet_add_del_command, static) =
-{
-.path = "p2p_ethernet ",.function = vnet_p2p_ethernet_add_del,.short_help =
- "p2p_ethernet <intfc> <mac-address> [sub-id <id> | del]",};
+VLIB_CLI_COMMAND (p2p_ethernet_add_del_command, static) = {
+ .path = "p2p_ethernet",
+ .function = vnet_p2p_ethernet_add_del,
+ .short_help = "p2p_ethernet <intfc> <mac-address> [sub-id <id>|del]",
+};
static clib_error_t *
p2p_ethernet_init (vlib_main_t * vm)
diff --git a/src/vnet/ethernet/p2p_ethernet_api.c b/src/vnet/ethernet/p2p_ethernet_api.c
index a9a8cc0a444..903678ce445 100644
--- a/src/vnet/ethernet/p2p_ethernet_api.c
+++ b/src/vnet/ethernet/p2p_ethernet_api.c
@@ -58,14 +58,12 @@ vl_api_p2p_ethernet_add_t_handler (vl_api_p2p_ethernet_add_t * mp)
BAD_SW_IF_INDEX_LABEL;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_P2P_ETHERNET_ADD_REPLY,
({
rmp->sw_if_index = htonl(p2pe_if_index);
}));
- /* *INDENT-ON* */
}
void
diff --git a/src/vnet/ethernet/p2p_ethernet_input.c b/src/vnet/ethernet/p2p_ethernet_input.c
index 3e9589e0e19..3d81e99cff2 100644
--- a/src/vnet/ethernet/p2p_ethernet_input.c
+++ b/src/vnet/ethernet/p2p_ethernet_input.c
@@ -235,7 +235,6 @@ VLIB_NODE_FN (p2p_ethernet_input_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (p2p_ethernet_input_node) = {
.name = "p2p-ethernet-input",
.vector_size = sizeof (u32),
@@ -253,7 +252,6 @@ VLIB_REGISTER_NODE (p2p_ethernet_input_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ethernet/packet.h b/src/vnet/ethernet/packet.h
index e1e42badd06..007f93596f3 100644
--- a/src/vnet/ethernet/packet.h
+++ b/src/vnet/ethernet/packet.h
@@ -184,7 +184,6 @@ typedef struct
#define ETHERNET_N_PBB (1 << 24)
} ethernet_pbb_header_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
/* Backbone source/destination address. */
@@ -201,7 +200,6 @@ typedef CLIB_PACKED (struct
/* 3 bit priority, 1 bit DEI, 1 bit UCA, 3 bit RES and 24 bit I_SID (service identifier) */
u32 priority_dei_uca_res_sid;
}) ethernet_pbb_header_packed_t;
-/* *INDENT-ON* */
#endif /* included_ethernet_packet_h */
diff --git a/src/vnet/feature/feature.c b/src/vnet/feature/feature.c
index 1750612783b..a7246fbb16a 100644
--- a/src/vnet/feature/feature.c
+++ b/src/vnet/feature/feature.c
@@ -533,13 +533,11 @@ show_features_command_fn (vlib_main_t * vm,
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_features_command, static) = {
.path = "show features",
.short_help = "show features [verbose]",
.function = show_features_command_fn,
};
-/* *INDENT-ON* */
/** Display the set of driver features configured on a specific interface
* Called by "show interface" handler
@@ -700,14 +698,12 @@ done:
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_feature_command, static) = {
.path = "set interface feature",
.short_help = "set interface feature <intfc> <feature_name> arc <arc_name> "
"[disable]",
.function = set_interface_features_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
vnet_feature_add_del_sw_interface (vnet_main_t * vnm, u32 sw_if_index,
diff --git a/src/vnet/feature/feature.h b/src/vnet/feature/feature.h
index a8235d3d9ee..b1917e8df13 100644
--- a/src/vnet/feature/feature.h
+++ b/src/vnet/feature/feature.h
@@ -344,8 +344,8 @@ vnet_device_input_have_features (u32 sw_if_index)
}
static_always_inline void
-vnet_feature_start_device_input_x1 (u32 sw_if_index, u32 * next0,
- vlib_buffer_t * b0)
+vnet_feature_start_device_input (u32 sw_if_index, u32 *next0,
+ vlib_buffer_t *b0)
{
vnet_feature_main_t *fm = &feature_main;
vnet_feature_config_main_t *cm;
@@ -356,118 +356,11 @@ vnet_feature_start_device_input_x1 (u32 sw_if_index, u32 * next0,
(clib_bitmap_get
(fm->sw_if_index_has_features[feature_arc_index], sw_if_index)))
{
- /*
- * Save next0 so that the last feature in the chain
- * can skip ethernet-input if indicated...
- */
- u16 adv;
-
- adv = device_input_next_node_advance[*next0];
- vlib_buffer_advance (b0, -adv);
-
- vnet_buffer (b0)->feature_arc_index = feature_arc_index;
- b0->current_config_index =
- vec_elt (cm->config_index_by_sw_if_index, sw_if_index);
- vnet_get_config_data (&cm->config_main, &b0->current_config_index,
- next0, /* # bytes of config data */ 0);
- }
-}
-
-static_always_inline void
-vnet_feature_start_device_input_x2 (u32 sw_if_index,
- u32 * next0,
- u32 * next1,
- vlib_buffer_t * b0, vlib_buffer_t * b1)
-{
- vnet_feature_main_t *fm = &feature_main;
- vnet_feature_config_main_t *cm;
- u8 feature_arc_index = fm->device_input_feature_arc_index;
- cm = &fm->feature_config_mains[feature_arc_index];
-
- if (PREDICT_FALSE
- (clib_bitmap_get
- (fm->sw_if_index_has_features[feature_arc_index], sw_if_index)))
- {
- /*
- * Save next0 so that the last feature in the chain
- * can skip ethernet-input if indicated...
- */
- u16 adv;
-
- adv = device_input_next_node_advance[*next0];
- vlib_buffer_advance (b0, -adv);
-
- adv = device_input_next_node_advance[*next1];
- vlib_buffer_advance (b1, -adv);
-
- vnet_buffer (b0)->feature_arc_index = feature_arc_index;
- vnet_buffer (b1)->feature_arc_index = feature_arc_index;
- b0->current_config_index =
- vec_elt (cm->config_index_by_sw_if_index, sw_if_index);
- b1->current_config_index = b0->current_config_index;
- vnet_get_config_data (&cm->config_main, &b0->current_config_index,
- next0, /* # bytes of config data */ 0);
- vnet_get_config_data (&cm->config_main, &b1->current_config_index,
- next1, /* # bytes of config data */ 0);
- }
-}
-
-static_always_inline void
-vnet_feature_start_device_input_x4 (u32 sw_if_index,
- u32 * next0,
- u32 * next1,
- u32 * next2,
- u32 * next3,
- vlib_buffer_t * b0,
- vlib_buffer_t * b1,
- vlib_buffer_t * b2, vlib_buffer_t * b3)
-{
- vnet_feature_main_t *fm = &feature_main;
- vnet_feature_config_main_t *cm;
- u8 feature_arc_index = fm->device_input_feature_arc_index;
- cm = &fm->feature_config_mains[feature_arc_index];
-
- if (PREDICT_FALSE
- (clib_bitmap_get
- (fm->sw_if_index_has_features[feature_arc_index], sw_if_index)))
- {
- /*
- * Save next0 so that the last feature in the chain
- * can skip ethernet-input if indicated...
- */
- u16 adv;
-
- adv = device_input_next_node_advance[*next0];
- vlib_buffer_advance (b0, -adv);
-
- adv = device_input_next_node_advance[*next1];
- vlib_buffer_advance (b1, -adv);
-
- adv = device_input_next_node_advance[*next2];
- vlib_buffer_advance (b2, -adv);
-
- adv = device_input_next_node_advance[*next3];
- vlib_buffer_advance (b3, -adv);
-
vnet_buffer (b0)->feature_arc_index = feature_arc_index;
- vnet_buffer (b1)->feature_arc_index = feature_arc_index;
- vnet_buffer (b2)->feature_arc_index = feature_arc_index;
- vnet_buffer (b3)->feature_arc_index = feature_arc_index;
-
b0->current_config_index =
vec_elt (cm->config_index_by_sw_if_index, sw_if_index);
- b1->current_config_index = b0->current_config_index;
- b2->current_config_index = b0->current_config_index;
- b3->current_config_index = b0->current_config_index;
-
vnet_get_config_data (&cm->config_main, &b0->current_config_index,
next0, /* # bytes of config data */ 0);
- vnet_get_config_data (&cm->config_main, &b1->current_config_index,
- next1, /* # bytes of config data */ 0);
- vnet_get_config_data (&cm->config_main, &b2->current_config_index,
- next2, /* # bytes of config data */ 0);
- vnet_get_config_data (&cm->config_main, &b3->current_config_index,
- next3, /* # bytes of config data */ 0);
}
}
diff --git a/src/vnet/feature/registration.c b/src/vnet/feature/registration.c
index 537a4ada6e4..bc20412b9cf 100644
--- a/src/vnet/feature/registration.c
+++ b/src/vnet/feature/registration.c
@@ -351,12 +351,10 @@ again:
*in_feature_nodes = feature_nodes;
/* Finally, clean up all the shit we allocated */
- /* *INDENT-OFF* */
hash_foreach_pair (hp, index_by_name,
({
vec_add1 (keys_to_delete, (u8 *)hp->key);
}));
- /* *INDENT-ON* */
hash_free (index_by_name);
for (i = 0; i < vec_len (keys_to_delete); i++)
vec_free (keys_to_delete[i]);
diff --git a/src/vnet/fib/fib.c b/src/vnet/fib/fib.c
index ddfa830bb0f..cce03b4b49c 100644
--- a/src/vnet/fib/fib.c
+++ b/src/vnet/fib/fib.c
@@ -32,9 +32,7 @@ fib_module_init (vlib_main_t * vm)
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (fib_module_init) =
{
.runs_after = VLIB_INITS("dpo_module_init", "adj_module_init"),
};
-/* *INDENT-ON* */
diff --git a/src/vnet/fib/fib_api.c b/src/vnet/fib/fib_api.c
index 75a17cfca02..07d6699d87a 100644
--- a/src/vnet/fib/fib_api.c
+++ b/src/vnet/fib/fib_api.c
@@ -69,7 +69,7 @@ fib_api_next_hop_decode (const vl_api_fib_path_t *in,
*out = to_ip46 (FIB_API_PATH_NH_PROTO_IP6 == in->proto, (void *)&in->nh.address);
}
-static vl_api_fib_path_nh_proto_t
+vl_api_fib_path_nh_proto_t
fib_api_path_dpo_proto_to_nh (dpo_proto_t dproto)
{
switch (dproto)
@@ -108,7 +108,7 @@ fib_api_next_hop_encode (const fib_route_path_t *rpath,
sizeof (rpath->frp_addr.ip6));
}
-static int
+int
fib_api_path_nh_proto_to_dpo (vl_api_fib_path_nh_proto_t pp,
dpo_proto_t *dproto)
{
@@ -448,6 +448,9 @@ fib_api_route_add_del (u8 is_add,
fib_entry_flag_t entry_flags,
fib_route_path_t *rpaths)
{
+ if (!fib_prefix_validate(prefix)) {
+ return (VNET_API_ERROR_INVALID_PREFIX_LENGTH);
+ }
if (is_multipath)
{
if (vec_len(rpaths) == 0)
diff --git a/src/vnet/fib/fib_api.h b/src/vnet/fib/fib_api.h
index 7fd7d16cb33..0c59531b438 100644
--- a/src/vnet/fib/fib_api.h
+++ b/src/vnet/fib/fib_api.h
@@ -29,6 +29,8 @@ struct _vl_api_fib_prefix;
/**
* Encode and decode functions from the API types to internal types
*/
+extern vl_api_fib_path_nh_proto_t fib_api_path_dpo_proto_to_nh (dpo_proto_t dproto);
+extern int fib_api_path_nh_proto_to_dpo (vl_api_fib_path_nh_proto_t pp, dpo_proto_t *dproto);
extern void fib_api_path_encode(const fib_route_path_t * api_rpath,
vl_api_fib_path_t *out);
extern int fib_api_path_decode(vl_api_fib_path_t *in,
diff --git a/src/vnet/fib/fib_attached_export.c b/src/vnet/fib/fib_attached_export.c
index 206d10e7140..c6ba0575a04 100644
--- a/src/vnet/fib/fib_attached_export.c
+++ b/src/vnet/fib/fib_attached_export.c
@@ -378,6 +378,7 @@ fib_attached_export_purge (fib_entry_t *fib_entry)
*/
if (0 == --export->faee_locks)
{
+ vec_free (export->faee_importers);
pool_put(fib_ae_export_pool, export);
fib_entry_delegate_remove(export_entry,
FIB_ENTRY_DELEGATE_ATTACHED_EXPORT);
diff --git a/src/vnet/fib/fib_bfd.c b/src/vnet/fib/fib_bfd.c
index b02fbc67a63..6bfd29ae2cc 100644
--- a/src/vnet/fib/fib_bfd.c
+++ b/src/vnet/fib/fib_bfd.c
@@ -188,9 +188,7 @@ fib_bfd_main_init (vlib_main_t * vm)
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (fib_bfd_main_init) =
{
.runs_after = VLIB_INITS("bfd_main_init"),
};
-/* *INDENT-ON* */
diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h
index 4053ff65181..7331f803ec4 100644
--- a/src/vnet/fib/fib_entry.h
+++ b/src/vnet/fib/fib_entry.h
@@ -154,9 +154,13 @@ typedef enum fib_entry_src_attribute_t_ {
*/
FIB_ENTRY_SRC_ATTRIBUTE_INHERITED,
/**
+ * the source is currently used as glean src address
+ */
+ FIB_ENTRY_SRC_ATTRIBUTE_PROVIDES_GLEAN,
+ /**
* Marker. add new entries before this one.
*/
- FIB_ENTRY_SRC_ATTRIBUTE_LAST = FIB_ENTRY_SRC_ATTRIBUTE_INHERITED,
+ FIB_ENTRY_SRC_ATTRIBUTE_LAST = FIB_ENTRY_SRC_ATTRIBUTE_PROVIDES_GLEAN,
} fib_entry_src_attribute_t;
@@ -166,6 +170,7 @@ typedef enum fib_entry_src_attribute_t_ {
[FIB_ENTRY_SRC_ATTRIBUTE_ACTIVE] = "active", \
[FIB_ENTRY_SRC_ATTRIBUTE_STALE] = "stale", \
[FIB_ENTRY_SRC_ATTRIBUTE_INHERITED] = "inherited", \
+ [FIB_ENTRY_SRC_ATTRIBUTE_PROVIDES_GLEAN] = "provides-glean", \
}
#define FOR_EACH_FIB_SRC_ATTRIBUTE(_item) \
@@ -180,6 +185,7 @@ typedef enum fib_entry_src_flag_t_ {
FIB_ENTRY_SRC_FLAG_ACTIVE = (1 << FIB_ENTRY_SRC_ATTRIBUTE_ACTIVE),
FIB_ENTRY_SRC_FLAG_STALE = (1 << FIB_ENTRY_SRC_ATTRIBUTE_STALE),
FIB_ENTRY_SRC_FLAG_INHERITED = (1 << FIB_ENTRY_SRC_ATTRIBUTE_INHERITED),
+ FIB_ENTRY_SRC_FLAG_PROVIDES_GLEAN = (1 << FIB_ENTRY_SRC_ATTRIBUTE_PROVIDES_GLEAN),
} __attribute__ ((packed)) fib_entry_src_flag_t;
extern u8 * format_fib_entry_src_flags(u8 *s, va_list *args);
@@ -421,6 +427,9 @@ extern const int fib_entry_get_dpo_for_source (
fib_node_index_t fib_entry_index,
fib_source_t source,
dpo_id_t *dpo);
+extern fib_node_index_t fib_entry_get_path_list_for_source (
+ fib_node_index_t fib_entry_index,
+ fib_source_t source);
extern adj_index_t fib_entry_get_adj(fib_node_index_t fib_entry_index);
diff --git a/src/vnet/fib/fib_entry_src.c b/src/vnet/fib/fib_entry_src.c
index 39e719e6a7a..c79b745b5b5 100644
--- a/src/vnet/fib/fib_entry_src.c
+++ b/src/vnet/fib/fib_entry_src.c
@@ -757,6 +757,7 @@ fib_entry_src_action_uninstall (fib_entry_t *fib_entry)
&fib_entry->fe_prefix,
&fib_entry->fe_lb);
+ vlib_worker_wait_one_loop();
dpo_reset(&fib_entry->fe_lb);
}
}
@@ -1797,6 +1798,25 @@ fib_entry_get_dpo_for_source (fib_node_index_t fib_entry_index,
return (0);
}
+fib_node_index_t
+fib_entry_get_path_list_for_source (fib_node_index_t fib_entry_index,
+ fib_source_t source)
+{
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *esrc;
+
+ if (FIB_NODE_INDEX_INVALID == fib_entry_index)
+ return FIB_NODE_INDEX_INVALID;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+ esrc = fib_entry_src_find(fib_entry, source);
+
+ if (esrc)
+ return esrc->fes_pl;
+
+ return FIB_NODE_INDEX_INVALID;
+}
+
u32
fib_entry_get_resolving_interface_for_source (fib_node_index_t entry_index,
fib_source_t source)
diff --git a/src/vnet/fib/fib_entry_src_interface.c b/src/vnet/fib/fib_entry_src_interface.c
index 402369d1dfc..c5028dc8798 100644
--- a/src/vnet/fib/fib_entry_src_interface.c
+++ b/src/vnet/fib/fib_entry_src_interface.c
@@ -87,8 +87,16 @@ fib_entry_src_interface_update_glean (fib_entry_t *cover,
if (fib_prefix_is_cover(&adj->sub_type.glean.rx_pfx,
&local->fe_prefix))
{
- adj->sub_type.glean.rx_pfx.fp_addr = local->fe_prefix.fp_addr;
- return (1);
+ fib_entry_src_t *local_src;
+
+ local_src = fib_entry_src_find (local, FIB_SOURCE_INTERFACE);
+ if (local_src != NULL)
+ {
+ adj->sub_type.glean.rx_pfx.fp_addr =
+ local->fe_prefix.fp_addr;
+ local_src->fes_flags |= FIB_ENTRY_SRC_FLAG_PROVIDES_GLEAN;
+ return (1);
+ }
}
}
}
@@ -116,6 +124,52 @@ fib_entry_src_interface_path_swap (fib_entry_src_t *src,
src->fes_pl = fib_path_list_create(pl_flags, paths);
}
+typedef struct fesi_find_glean_ctx_t_ {
+ fib_node_index_t glean_node_index;
+} fesi_find_glean_ctx_t;
+
+static walk_rc_t
+fib_entry_src_interface_find_glean_walk (fib_entry_t *cover,
+ fib_node_index_t covered,
+ void *ctx)
+{
+ fesi_find_glean_ctx_t *find_glean_ctx = ctx;
+ fib_entry_t *covered_entry;
+ fib_entry_src_t *covered_src;
+
+ covered_entry = fib_entry_get (covered);
+ covered_src = fib_entry_src_find (covered_entry, FIB_SOURCE_INTERFACE);
+ if ((covered_src != NULL) &&
+ (covered_src->fes_flags & FIB_ENTRY_SRC_FLAG_PROVIDES_GLEAN))
+ {
+ find_glean_ctx->glean_node_index = covered;
+ return WALK_STOP;
+ }
+
+ return WALK_CONTINUE;
+}
+
+static fib_entry_t *
+fib_entry_src_interface_find_glean (fib_entry_t *cover)
+{
+ fib_entry_src_t *src;
+
+ src = fib_entry_src_find (cover, FIB_SOURCE_INTERFACE);
+ if (src == NULL)
+ /* the cover is not an interface source */
+ return NULL;
+
+ fesi_find_glean_ctx_t ctx = {
+ .glean_node_index = ~0,
+ };
+
+ fib_entry_cover_walk (cover, fib_entry_src_interface_find_glean_walk,
+ &ctx);
+
+ return (ctx.glean_node_index == ~0) ? NULL :
+ fib_entry_get (ctx.glean_node_index);
+}
+
/*
* Source activate.
* Called when the source is teh new longer best source on the entry
@@ -128,6 +182,8 @@ fib_entry_src_interface_activate (fib_entry_src_t *src,
if (FIB_ENTRY_FLAG_LOCAL & src->fes_entry_flags)
{
+ u8 update_glean;
+
/*
* Track the covering attached/connected cover. This is so that
* during an attached export of the cover, this local prefix is
@@ -141,10 +197,17 @@ fib_entry_src_interface_activate (fib_entry_src_t *src,
cover = fib_entry_get(src->u.interface.fesi_cover);
+ /*
+ * Before adding as a child of the cover, check whether an existing
+ * child has already been used to populate the glean adjacency. If so,
+ * we don't need to update the adjacency.
+ */
+ update_glean = (fib_entry_src_interface_find_glean (cover) == NULL);
src->u.interface.fesi_sibling =
fib_entry_cover_track(cover, fib_entry_get_index(fib_entry));
- fib_entry_src_interface_update_glean(cover, fib_entry);
+ if (update_glean)
+ fib_entry_src_interface_update_glean(cover, fib_entry);
}
return (!0);
@@ -167,15 +230,19 @@ fib_entry_src_interface_deactivate (fib_entry_src_t *src,
if (FIB_NODE_INDEX_INVALID != src->u.interface.fesi_cover)
{
cover = fib_entry_get(src->u.interface.fesi_cover);
-
fib_entry_cover_untrack(cover, src->u.interface.fesi_sibling);
src->u.interface.fesi_cover = FIB_NODE_INDEX_INVALID;
src->u.interface.fesi_sibling = ~0;
- fib_entry_cover_walk(cover,
- fib_entry_src_interface_update_glean_walk,
- NULL);
+ /* If this was the glean address, find a new one */
+ if (src->fes_flags & FIB_ENTRY_SRC_FLAG_PROVIDES_GLEAN)
+ {
+ fib_entry_cover_walk(cover,
+ fib_entry_src_interface_update_glean_walk,
+ NULL);
+ src->fes_flags &= ~FIB_ENTRY_SRC_FLAG_PROVIDES_GLEAN;
+ }
}
}
diff --git a/src/vnet/fib/fib_node.c b/src/vnet/fib/fib_node.c
index ff72bcfde40..e668c4fc51f 100644
--- a/src/vnet/fib/fib_node.c
+++ b/src/vnet/fib/fib_node.c
@@ -268,7 +268,6 @@ fib_memory_show (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
/*?
* The '<em>sh fib memory </em>' command displays the memory usage for each
* FIB object type.
@@ -301,4 +300,3 @@ VLIB_CLI_COMMAND (show_fib_memory, static) = {
.function = fib_memory_show,
.short_help = "show fib memory",
};
-/* *INDENT-ON* */
diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c
index db78587fb27..95e7cb6ba7d 100644
--- a/src/vnet/fib/fib_path.c
+++ b/src/vnet/fib/fib_path.c
@@ -501,11 +501,9 @@ format_fib_path (u8 * s, va_list * args)
else
{
s = format (s, " %U",
- format_vnet_sw_interface_name,
+ format_vnet_sw_if_index_name,
vnm,
- vnet_get_sw_interface(
- vnm,
- path->attached_next_hop.fp_interface));
+ path->attached_next_hop.fp_interface);
if (vnet_sw_interface_is_p2p(vnet_get_main(),
path->attached_next_hop.fp_interface))
{
@@ -532,11 +530,8 @@ format_fib_path (u8 * s, va_list * args)
else
{
s = format (s, " %U",
- format_vnet_sw_interface_name,
- vnm,
- vnet_get_sw_interface(
- vnm,
- path->attached.fp_interface));
+ format_vnet_sw_if_index_name,
+ vnm, path->attached.fp_interface);
}
break;
case FIB_PATH_TYPE_RECURSIVE:
@@ -587,11 +582,8 @@ format_fib_path (u8 * s, va_list * args)
break;
case FIB_PATH_TYPE_DVR:
s = format (s, " %U",
- format_vnet_sw_interface_name,
- vnm,
- vnet_get_sw_interface(
- vnm,
- path->dvr.fp_interface));
+ format_vnet_sw_if_index_name,
+ vnm, path->dvr.fp_interface);
break;
case FIB_PATH_TYPE_DEAG:
s = format (s, " %sfib-index:%d",
@@ -1365,7 +1357,8 @@ fib_path_create (fib_node_index_t pl_index,
dpo_copy(&path->exclusive.fp_ex_dpo, &rpath->dpo);
}
else if ((path->fp_cfg_flags & FIB_PATH_CFG_FLAG_ICMP_PROHIBIT) ||
- (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_ICMP_UNREACH))
+ (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_ICMP_UNREACH) ||
+ (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_DROP))
{
path->fp_type = FIB_PATH_TYPE_SPECIAL;
}
@@ -1998,7 +1991,11 @@ fib_path_resolve (fib_node_index_t path_index)
}
else
{
- fib_prefix_from_ip46_addr(&path->recursive.fp_nh.fp_ip, &pfx);
+ ASSERT(!ip46_address_is_zero(&path->recursive.fp_nh.fp_ip));
+
+ fib_protocol_t fp = (ip46_address_is_ip4(&path->recursive.fp_nh.fp_ip) ?
+ FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6);
+ fib_prefix_from_ip46_addr(fp, &path->recursive.fp_nh.fp_ip, &pfx);
}
fib_table_lock(path->recursive.fp_tbl_id,
diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c
index 3a46d226ebd..b2a32d0da56 100644
--- a/src/vnet/fib/fib_table.c
+++ b/src/vnet/fib/fib_table.c
@@ -25,6 +25,13 @@
const static char * fib_table_flags_strings[] = FIB_TABLE_ATTRIBUTES;
+/*
+ * Default names for IP4, IP6, and MPLS FIB table index 0.
+ * Nominally like "ipv6-VRF:0", but this will override that name if set
+ * in a config section of the startup.conf file.
+ */
+char *fib_table_default_names[FIB_PROTOCOL_MAX];
+
fib_table_t *
fib_table_get (fib_node_index_t index,
fib_protocol_t proto)
@@ -534,7 +541,11 @@ fib_table_route_path_fixup (const fib_prefix_t *prefix,
else if (fib_route_path_is_attached(path))
{
path->frp_flags |= FIB_ROUTE_PATH_GLEAN;
- fib_prefix_normalize(prefix, &path->frp_connected);
+ /*
+ * attached prefixes are not suitable as the source of ARP requests
+ * so don't save the prefix in the glean adj
+ */
+ clib_memset(&path->frp_connected, 0, sizeof(path->frp_connected));
}
if (*eflags & FIB_ENTRY_FLAG_DROP)
{
@@ -1149,21 +1160,29 @@ fib_table_find_or_create_and_lock_i (fib_protocol_t proto,
fib_table = fib_table_get(fi, proto);
- if (NULL == fib_table->ft_desc)
+ if (fib_table->ft_desc)
+ return fi;
+
+ if (name && name[0])
{
- if (name && name[0])
- {
- fib_table->ft_desc = format(NULL, "%s", name);
- }
- else
- {
- fib_table->ft_desc = format(NULL, "%U-VRF:%d",
- format_fib_protocol, proto,
- table_id);
- }
+ fib_table->ft_desc = format(NULL, "%s", name);
+ return fi;
}
- return (fi);
+ if (table_id == 0)
+ {
+ char *default_name = fib_table_default_names[proto];
+ if (default_name && default_name[0])
+ {
+ fib_table->ft_desc = format(NULL, "%s", default_name);
+ return fi;
+ }
+ }
+
+ fib_table->ft_desc = format(NULL, "%U-VRF:%d",
+ format_fib_protocol, proto,
+ table_id);
+ return fi;
}
u32
diff --git a/src/vnet/fib/fib_table.h b/src/vnet/fib/fib_table.h
index 11137e173cf..0eaaa67eea2 100644
--- a/src/vnet/fib/fib_table.h
+++ b/src/vnet/fib/fib_table.h
@@ -122,6 +122,15 @@ typedef struct fib_table_t_
u8* ft_desc;
} fib_table_t;
+
+/**
+ * @brief
+ * Default names for IP4, IP6, and MPLS FIB table index 0.
+ * Nominally like "ipv4-VRF:0", but this will override that name if set
+ * in a config section of the startup.conf file.
+ */
+extern char *fib_table_default_names[FIB_PROTOCOL_MAX];
+
/**
* @brief
* Format the description/name of the table
diff --git a/src/vnet/fib/fib_types.c b/src/vnet/fib/fib_types.c
index 7eeb79fffa5..c4472c7122d 100644
--- a/src/vnet/fib/fib_types.c
+++ b/src/vnet/fib/fib_types.c
@@ -78,16 +78,15 @@ format_fib_mpls_label (u8 *s, va_list *ap)
}
void
-fib_prefix_from_ip46_addr (const ip46_address_t *addr,
+fib_prefix_from_ip46_addr (fib_protocol_t fproto,
+ const ip46_address_t *addr,
fib_prefix_t *pfx)
{
- ASSERT(!ip46_address_is_zero(addr));
+ ASSERT(FIB_PROTOCOL_MPLS != fproto);
- pfx->fp_proto = ((ip46_address_is_ip4(addr) ?
- FIB_PROTOCOL_IP4 :
- FIB_PROTOCOL_IP6));
- pfx->fp_len = ((ip46_address_is_ip4(addr) ?
- 32 : 128));
+ pfx->fp_proto = fproto;
+ pfx->fp_len = ((FIB_PROTOCOL_IP4 == fproto) ?
+ 32 : 128);
pfx->fp_addr = *addr;
pfx->___fp___pad = 0;
}
@@ -709,6 +708,13 @@ unformat_fib_route_path (unformat_input_t * input, va_list * args)
rpath->frp_proto = DPO_PROTO_IP4;
rpath->frp_flags = FIB_ROUTE_PATH_INTF_RX;
}
+ else if (unformat (input, "rx-ip6 %U",
+ unformat_vnet_sw_interface, vnm,
+ &rpath->frp_sw_if_index))
+ {
+ rpath->frp_proto = DPO_PROTO_IP6;
+ rpath->frp_flags = FIB_ROUTE_PATH_INTF_RX;
+ }
else if (unformat (input, "local"))
{
clib_memset (&rpath->frp_addr, 0, sizeof (rpath->frp_addr));
@@ -776,6 +782,7 @@ fib_route_path_is_attached (const fib_route_path_t *rpath)
* L3 game with these
*/
if (rpath->frp_flags & (FIB_ROUTE_PATH_DVR |
+ FIB_ROUTE_PATH_INTF_RX |
FIB_ROUTE_PATH_UDP_ENCAP))
{
return (0);
diff --git a/src/vnet/fib/fib_types.h b/src/vnet/fib/fib_types.h
index dbd4e97e867..b9346c75108 100644
--- a/src/vnet/fib/fib_types.h
+++ b/src/vnet/fib/fib_types.h
@@ -276,8 +276,9 @@ extern void fib_prefix_normalize(const fib_prefix_t *p,
/**
* \brief Host prefix from ip
*/
-extern void fib_prefix_from_ip46_addr (const ip46_address_t *addr,
- fib_prefix_t *pfx);
+extern void fib_prefix_from_ip46_addr (fib_protocol_t fproto,
+ const ip46_address_t *addr,
+ fib_prefix_t *pfx);
extern u8 * format_fib_prefix(u8 * s, va_list * args);
extern u8 * format_fib_forw_chain_type(u8 * s, va_list * args);
@@ -632,7 +633,7 @@ extern int fib_route_path_is_attached (const fib_route_path_t *rpath);
/**
* A help string to list the FIB path options
*/
-#define FIB_ROUTE_PATH_HELP "[next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight <value>] [preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-connected] [rx-ip4 <interface>] [out-labels <value value value>]"
+#define FIB_ROUTE_PATH_HELP "[next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight <value>] [preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-connected] [rx-ip4|rx-ip6 <interface>] [out-labels <value value value>]"
/**
* return code to control pat-hlist walk
diff --git a/src/vnet/fib/fib_urpf_list.c b/src/vnet/fib/fib_urpf_list.c
index b1bbe7399d1..67be6699a0e 100644
--- a/src/vnet/fib/fib_urpf_list.c
+++ b/src/vnet/fib/fib_urpf_list.c
@@ -228,7 +228,6 @@ show_fib_urpf_list_command (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
/*?
* The '<em>sh fib uRPF [index] </em>' command displays the uRPF lists
*
@@ -246,4 +245,3 @@ VLIB_CLI_COMMAND (show_fib_urpf_list, static) = {
.function = show_fib_urpf_list_command,
.short_help = "show fib uRPF",
};
-/* *INDENT-OFF* */
diff --git a/src/vnet/fib/fib_walk.c b/src/vnet/fib/fib_walk.c
index b3b2b1e7944..236607cb891 100644
--- a/src/vnet/fib/fib_walk.c
+++ b/src/vnet/fib/fib_walk.c
@@ -611,13 +611,11 @@ fib_walk_process (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (fib_walk_process_node,static) = {
.function = fib_walk_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "fib-walk",
};
-/* *INDENT-ON* */
/**
* @brief Allocate a new walk object
diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c
index 8e580a54716..0eff8d0d485 100644
--- a/src/vnet/fib/ip4_fib.c
+++ b/src/vnet/fib/ip4_fib.c
@@ -621,10 +621,29 @@ ip4_show_fib (vlib_main_t * vm,
* 32 4
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip4_show_fib_command, static) = {
.path = "show ip fib",
.short_help = "show ip fib [summary] [table <table-id>] [index <fib-id>] [<ip4-addr>[/<mask>]] [mtrie] [detail]",
.function = ip4_show_fib,
};
-/* *INDENT-ON* */
+
+static clib_error_t *
+ip_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ char *default_name = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "default-table-name %s", &default_name))
+ ;
+ else
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+
+ fib_table_default_names[FIB_PROTOCOL_IP4] = default_name;
+
+ return 0;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (ip_config, "ip");
diff --git a/src/vnet/fib/ip6_fib.c b/src/vnet/fib/ip6_fib.c
index 6c73d19d8e3..d37b77e08a4 100644
--- a/src/vnet/fib/ip6_fib.c
+++ b/src/vnet/fib/ip6_fib.c
@@ -862,19 +862,18 @@ ip6_show_fib (vlib_main_t * vm,
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_show_fib_command, static) = {
.path = "show ip6 fib",
.short_help = "show ip6 fib [summary] [table <table-id>] [index <fib-id>] [<ip6-addr>[/<width>]] [detail]",
.function = ip6_show_fib,
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_config (vlib_main_t * vm, unformat_input_t * input)
{
uword heapsize = 0;
u32 nbuckets = 0;
+ char *default_name = 0;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
@@ -883,6 +882,8 @@ ip6_config (vlib_main_t * vm, unformat_input_t * input)
else if (unformat (input, "heap-size %U",
unformat_memory_size, &heapsize))
;
+ else if (unformat (input, "default-table-name %s", &default_name))
+ ;
else
return clib_error_return (0, "unknown input '%U'",
format_unformat_error, input);
@@ -890,6 +891,7 @@ ip6_config (vlib_main_t * vm, unformat_input_t * input)
ip6_fib_table_nbuckets = nbuckets;
ip6_fib_table_size = heapsize;
+ fib_table_default_names[FIB_PROTOCOL_IP6] = default_name;
return 0;
}
diff --git a/src/vnet/fib/mpls_fib.c b/src/vnet/fib/mpls_fib.c
index 5dcd70b4c53..767fc84c8a8 100644
--- a/src/vnet/fib/mpls_fib.c
+++ b/src/vnet/fib/mpls_fib.c
@@ -481,3 +481,24 @@ VLIB_CLI_COMMAND (mpls_fib_show_command, static) = {
.short_help = "show mpls fib [summary] [table <n>]",
.function = mpls_fib_show,
};
+
+static clib_error_t *
+mpls_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ char *default_name = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "default-table-name %s", &default_name))
+ ;
+ else
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+
+ fib_table_default_names[FIB_PROTOCOL_MPLS] = default_name;
+
+ return 0;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (mpls_config, "mpls");
diff --git a/src/vnet/flow/flow.api b/src/vnet/flow/flow.api
index dff3eec370d..1e807b539d5 100644
--- a/src/vnet/flow/flow.api
+++ b/src/vnet/flow/flow.api
@@ -13,7 +13,7 @@
* limitations under the License.
*/
-option version = "0.0.3";
+option version = "1.0.3";
import "vnet/interface_types.api";
import "vnet/ip/ip_types.api";
@@ -26,6 +26,8 @@ import "vnet/flow/flow_types.api";
*/
define flow_add
{
+ option deprecated;
+
u32 client_index;
u32 context;
vl_api_flow_rule_t flow;
@@ -52,6 +54,8 @@ define flow_add_v2
*/
define flow_add_reply
{
+ option deprecated;
+
u32 context;
i32 retval;
u32 flow_index;
diff --git a/src/vnet/flow/flow.c b/src/vnet/flow/flow.c
index 9b6a376af3e..eda15356958 100644
--- a/src/vnet/flow/flow.c
+++ b/src/vnet/flow/flow.c
@@ -74,12 +74,10 @@ vnet_flow_del (vnet_main_t * vnm, u32 flow_index)
if (f == 0)
return VNET_FLOW_ERROR_NO_SUCH_ENTRY;
- /* *INDENT-OFF* */
hash_foreach (hw_if_index, private_data, f->private_data,
({
vnet_flow_disable (vnm, flow_index, hw_if_index);
}));
- /* *INDENT-ON* */
hash_free (f->private_data);
clib_memset (f, 0, sizeof (*f));
diff --git a/src/vnet/flow/flow.h b/src/vnet/flow/flow.h
index 194579b88d8..ada822257e3 100644
--- a/src/vnet/flow/flow.h
+++ b/src/vnet/flow/flow.h
@@ -45,7 +45,16 @@
_ (IP4_GTPC, ip4_gtpc, "ipv4-gtpc") \
_ (IP4_GTPU, ip4_gtpu, "ipv4-gtpu") \
/* generic flow */ \
- _ (GENERIC, generic, "generic")
+ _ (GENERIC, generic, "generic") \
+ /* IP in IP */ \
+ _ (IP6_IP6, ip6_ip6, "ipv6-ipv6") \
+ _ (IP6_IP4, ip6_ip4, "ipv6-ipv4") \
+ _ (IP4_IP6, ip4_ip6, "ipv4-ipv6") \
+ _ (IP4_IP4, ip4_ip4, "ipv4-ipv4") \
+ _ (IP6_IP6_N_TUPLE, ip6_ip6_n_tuple, "ipv6-ipv6-n-tuple") \
+ _ (IP6_IP4_N_TUPLE, ip6_ip4_n_tuple, "ipv6-ipv4-n-tuple") \
+ _ (IP4_IP6_N_TUPLE, ip4_ip6_n_tuple, "ipv4-ipv6-n-tuple") \
+ _ (IP4_IP4_N_TUPLE, ip4_ip4_n_tuple, "ipv4-ipv4-n-tuple")
#define foreach_flow_entry_ethernet \
_fe(ethernet_header_t, eth_hdr)
@@ -106,6 +115,42 @@
foreach_flow_entry_ip4_n_tuple \
_fe(u32, teid)
+#define foreach_flow_entry_ip6_ip6 \
+ foreach_flow_entry_ip6 _fe (ip6_address_and_mask_t, in_src_addr) \
+ _fe (ip6_address_and_mask_t, in_dst_addr) \
+ _fe (ip_prot_and_mask_t, in_protocol)
+
+#define foreach_flow_entry_ip6_ip6_n_tuple \
+ foreach_flow_entry_ip6_ip6 _fe (ip_port_and_mask_t, in_src_port) \
+ _fe (ip_port_and_mask_t, in_dst_port)
+
+#define foreach_flow_entry_ip6_ip4 \
+ foreach_flow_entry_ip6 _fe (ip4_address_and_mask_t, in_src_addr) \
+ _fe (ip4_address_and_mask_t, in_dst_addr) \
+ _fe (ip_prot_and_mask_t, in_protocol)
+
+#define foreach_flow_entry_ip6_ip4_n_tuple \
+ foreach_flow_entry_ip6_ip4 _fe (ip_port_and_mask_t, in_src_port) \
+ _fe (ip_port_and_mask_t, in_dst_port)
+
+#define foreach_flow_entry_ip4_ip6 \
+ foreach_flow_entry_ip4 _fe (ip6_address_and_mask_t, in_src_addr) \
+ _fe (ip6_address_and_mask_t, in_dst_addr) \
+ _fe (ip_prot_and_mask_t, in_protocol)
+
+#define foreach_flow_entry_ip4_ip6_n_tuple \
+ foreach_flow_entry_ip4_ip6 _fe (ip_port_and_mask_t, in_src_port) \
+ _fe (ip_port_and_mask_t, in_dst_port)
+
+#define foreach_flow_entry_ip4_ip4 \
+ foreach_flow_entry_ip4 _fe (ip4_address_and_mask_t, in_src_addr) \
+ _fe (ip4_address_and_mask_t, in_dst_addr) \
+ _fe (ip_prot_and_mask_t, in_protocol)
+
+#define foreach_flow_entry_ip4_ip4_n_tuple \
+ foreach_flow_entry_ip4_ip4 _fe (ip_port_and_mask_t, in_src_port) \
+ _fe (ip_port_and_mask_t, in_dst_port)
+
#define foreach_flow_entry_generic _fe (generic_pattern_t, pattern)
#define foreach_flow_action \
@@ -155,6 +200,7 @@ typedef enum
_ (19, NVGRE, "nvgre") \
_ (20, GTPU, "gtpu") \
_ (21, ESP, "esp") \
+ _ (22, L2TPV3, "l2tpv3") \
_ (60, L4_DST_ONLY, "l4-dst-only") \
_ (61, L4_SRC_ONLY, "l4-src-only") \
_ (62, L3_DST_ONLY, "l3-dst-only") \
diff --git a/src/vnet/flow/flow_api.c b/src/vnet/flow/flow_api.c
index 0e25fb3017b..bfe97ec2978 100644
--- a/src/vnet/flow/flow_api.c
+++ b/src/vnet/flow/flow_api.c
@@ -299,12 +299,10 @@ vl_api_flow_add_t_handler (vl_api_flow_add_t * mp)
rv = vnet_flow_add (vnm, &flow, &flow_index);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_FLOW_ADD_REPLY,
({
rmp->flow_index = ntohl (flow_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -328,7 +326,7 @@ vl_api_flow_add_v2_t_handler (vl_api_flow_add_v2_t *mp)
flow.buffer_advance = ntohl (f->buffer_advance);
flow.queue_index = ntohl (f->queue_index);
flow.queue_num = ntohl (f->queue_num);
- flow.rss_types = ntohl (f->rss_types);
+ flow.rss_types = clib_net_to_host_u64 (f->rss_types);
flow.rss_fun = ntohl (f->rss_fun);
switch (flow.type)
diff --git a/src/vnet/flow/flow_cli.c b/src/vnet/flow/flow_cli.c
index 5f44a099f57..e4b73717241 100644
--- a/src/vnet/flow/flow_cli.c
+++ b/src/vnet/flow/flow_cli.c
@@ -138,13 +138,11 @@ format_flow_enabled_hw (u8 * s, va_list * args)
u32 hw_if_index;
uword private_data;
vnet_main_t *vnm = vnet_get_main ();
- /* *INDENT-OFF* */
hash_foreach (hw_if_index, private_data, f->private_data,
({
t = format (t, "%s%U", t ? ", " : "",
format_vnet_hw_if_index_name, vnm, hw_if_index);
}));
- /* *INDENT-ON* */
s = format (s, "%v", t);
vec_free (t);
return s;
@@ -228,7 +226,6 @@ show_flow_entry (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "%s: %s", "spec", f->generic.pattern.spec);
vlib_cli_output (vm, "%s: %s", "mask", f->generic.pattern.mask);
}
- /* *INDENT-OFF* */
hash_foreach (hw_if_index, private_data, f->private_data,
({
hi = vnet_get_hw_interface (vnm, hw_if_index);
@@ -239,12 +236,10 @@ show_flow_entry (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, " %U\n", dev_class->format_flow,
hi->dev_instance, f->index, private_data);
}));
- /* *INDENT-ON* */
return 0;
}
no_args:
- /* *INDENT-OFF* */
pool_foreach (f, fm->global_flow_pool)
{
vlib_cli_output (vm, "%U\n", format_flow, f);
@@ -254,18 +249,15 @@ no_args:
vlib_cli_output (vm, "%s: %s", "mask", f->generic.pattern.mask);
}
}
- /* *INDENT-ON* */
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_flow_entry_command, static) = {
.path = "show flow entry",
.short_help = "show flow entry [index <index>]",
.function = show_flow_entry,
};
-/* *INDENT-ON* */
static clib_error_t *
show_flow_ranges (vlib_main_t * vm, unformat_input_t * input,
@@ -276,22 +268,18 @@ show_flow_ranges (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "%8s %8s %s", "Start", "Count", "Owner");
- /* *INDENT-OFF* */
vec_foreach (r, fm->ranges)
{
vlib_cli_output (vm, "%8u %8u %s", r->start, r->count, r->owner);
};
- /* *INDENT-ON* */
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_flow_ranges_command, static) = {
.path = "show flow ranges",
.short_help = "show flow ranges",
.function = show_flow_ranges,
};
-/* *INDENT-ON* */
static clib_error_t *
show_flow_interface (vlib_main_t * vm, unformat_input_t * input,
@@ -329,13 +317,11 @@ show_flow_interface (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_flow_interface_command, static) = {
.path = "show flow interface",
.short_help = "show flow interface <interface name>",
.function = show_flow_interface,
};
-/* *INDENT-ON* */
static clib_error_t *
test_flow (vlib_main_t * vm, unformat_input_t * input,
@@ -366,15 +352,16 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
u32 vni = 0;
u32 queue_start = 0, queue_end = 0;
vnet_flow_type_t type = VNET_FLOW_TYPE_UNKNOWN;
- ip4_address_and_mask_t ip4s = { };
- ip4_address_and_mask_t ip4d = { };
- ip6_address_and_mask_t ip6s = { };
- ip6_address_and_mask_t ip6d = { };
- ip_port_and_mask_t sport = { };
- ip_port_and_mask_t dport = { };
- ip_prot_and_mask_t protocol = { };
+ ip4_address_and_mask_t ip4s = {}, in_ip4s = {};
+ ip4_address_and_mask_t ip4d = {}, in_ip4d = {};
+ ip6_address_and_mask_t ip6s = {}, in_ip6s = {};
+ ip6_address_and_mask_t ip6d = {}, in_ip6d = {};
+ ip_port_and_mask_t sport = {}, in_sport = {};
+ ip_port_and_mask_t dport = {}, in_dport = {};
+ ip_prot_and_mask_t protocol = {}, in_proto = {};
u16 eth_type;
- bool tcp_udp_port_set = false;
+ bool inner_ip4_set = false, inner_ip6_set = false;
+ bool tcp_udp_port_set = false, inner_port_set = false;
bool gtpc_set = false;
bool gtpu_set = false;
bool vni_set = false;
@@ -415,12 +402,24 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
else if (unformat (line_input, "dst-ip %U",
unformat_ip4_address_and_mask, &ip4d))
flow_class = FLOW_IPV4_CLASS;
+ else if (unformat (line_input, "in-src-ip %U",
+ unformat_ip4_address_and_mask, &in_ip4s))
+ inner_ip4_set = true;
+ else if (unformat (line_input, "in-dst-ip %U",
+ unformat_ip4_address_and_mask, &in_ip4d))
+ inner_ip4_set = true;
else if (unformat (line_input, "ip6-src-ip %U",
unformat_ip6_address_and_mask, &ip6s))
flow_class = FLOW_IPV6_CLASS;
else if (unformat (line_input, "ip6-dst-ip %U",
unformat_ip6_address_and_mask, &ip6d))
flow_class = FLOW_IPV6_CLASS;
+ else if (unformat (line_input, "in-ip6-src-ip %U",
+ unformat_ip6_address_and_mask, &in_ip6s))
+ inner_ip6_set = true;
+ else if (unformat (line_input, "in-ip6-dst-ip %U",
+ unformat_ip6_address_and_mask, &in_ip6d))
+ inner_ip6_set = true;
else if (unformat (line_input, "src-port %U", unformat_ip_port_and_mask,
&sport))
tcp_udp_port_set = true;
@@ -432,6 +431,15 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
(line_input, "proto %U", unformat_ip_protocol_and_mask,
&protocol))
;
+ else if (unformat (line_input, "in-src-port %U",
+ unformat_ip_port_and_mask, &in_sport))
+ inner_port_set = true;
+ else if (unformat (line_input, "in-dst-port %U",
+ unformat_ip_port_and_mask, &in_dport))
+ inner_port_set = true;
+ else if (unformat (line_input, "in-proto %U",
+ unformat_ip_protocol_and_mask, &in_proto))
+ ;
else if (unformat (line_input, "gtpc teid %u", &teid))
gtpc_set = true;
else if (unformat (line_input, "gtpu teid %u", &teid))
@@ -592,6 +600,22 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
type = VNET_FLOW_TYPE_IP4_IPSEC_AH;
else if (tcp_udp_port_set)
type = VNET_FLOW_TYPE_IP4_N_TUPLE;
+ else if (inner_ip4_set)
+ {
+ if (inner_port_set)
+ type = VNET_FLOW_TYPE_IP4_IP4_N_TUPLE;
+ else
+ type = VNET_FLOW_TYPE_IP4_IP4;
+ protocol.prot = IP_PROTOCOL_IP_IN_IP;
+ }
+ else if (inner_ip6_set)
+ {
+ if (inner_port_set)
+ type = VNET_FLOW_TYPE_IP4_IP6_N_TUPLE;
+ else
+ type = VNET_FLOW_TYPE_IP4_IP6;
+ protocol.prot = IP_PROTOCOL_IPV6;
+ }
else
type = VNET_FLOW_TYPE_IP4;
break;
@@ -600,6 +624,22 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
type = VNET_FLOW_TYPE_IP6_N_TUPLE;
else if (vni_set)
type = VNET_FLOW_TYPE_IP6_VXLAN;
+ else if (inner_ip4_set)
+ {
+ if (inner_port_set)
+ type = VNET_FLOW_TYPE_IP6_IP4_N_TUPLE;
+ else
+ type = VNET_FLOW_TYPE_IP6_IP4;
+ protocol.prot = IP_PROTOCOL_IP_IN_IP;
+ }
+ else if (inner_ip6_set)
+ {
+ if (inner_port_set)
+ type = VNET_FLOW_TYPE_IP6_IP6_N_TUPLE;
+ else
+ type = VNET_FLOW_TYPE_IP6_IP6;
+ protocol.prot = IP_PROTOCOL_IPV6;
+ }
else
type = VNET_FLOW_TYPE_IP6;
break;
@@ -660,6 +700,30 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
case IP_PROTOCOL_IPSEC_AH:
flow.ip4_ipsec_esp.spi = spi;
break;
+ case IP_PROTOCOL_IP_IN_IP:
+ clib_memcpy (&flow.ip4_ip4.in_src_addr, &in_ip4s,
+ sizeof (ip4_address_and_mask_t));
+ clib_memcpy (&flow.ip4_ip4.in_dst_addr, &in_ip4d,
+ sizeof (ip4_address_and_mask_t));
+ if (type == VNET_FLOW_TYPE_IP4_IP4_N_TUPLE)
+ {
+ flow.ip4_ip4.in_protocol.prot = in_proto.prot;
+ flow.ip4_ip4_n_tuple.in_src_port = in_sport;
+ flow.ip4_ip4_n_tuple.in_dst_port = in_dport;
+ }
+ break;
+ case IP_PROTOCOL_IPV6:
+ clib_memcpy (&flow.ip4_ip6.in_src_addr, &in_ip6s,
+ sizeof (ip6_address_and_mask_t));
+ clib_memcpy (&flow.ip4_ip6.in_dst_addr, &in_ip6d,
+ sizeof (ip6_address_and_mask_t));
+ if (type == VNET_FLOW_TYPE_IP4_IP6_N_TUPLE)
+ {
+ flow.ip4_ip6.in_protocol.prot = in_proto.prot;
+ flow.ip4_ip6_n_tuple.in_src_port = in_sport;
+ flow.ip4_ip6_n_tuple.in_dst_port = in_dport;
+ }
+ break;
default:
break;
}
@@ -693,6 +757,30 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
if (type == VNET_FLOW_TYPE_IP6_VXLAN)
flow.ip6_vxlan.vni = vni;
break;
+ case IP_PROTOCOL_IP_IN_IP:
+ clib_memcpy (&flow.ip6_ip4.in_src_addr, &in_ip4s,
+ sizeof (ip4_address_and_mask_t));
+ clib_memcpy (&flow.ip6_ip4.in_dst_addr, &in_ip4d,
+ sizeof (ip4_address_and_mask_t));
+ if (type == VNET_FLOW_TYPE_IP6_IP4_N_TUPLE)
+ {
+ flow.ip6_ip4.in_protocol.prot = in_proto.prot;
+ flow.ip6_ip4_n_tuple.in_src_port = in_sport;
+ flow.ip6_ip4_n_tuple.in_dst_port = in_dport;
+ }
+ break;
+ case IP_PROTOCOL_IPV6:
+ clib_memcpy (&flow.ip6_ip6.in_src_addr, &in_ip6s,
+ sizeof (ip6_address_and_mask_t));
+ clib_memcpy (&flow.ip6_ip6.in_dst_addr, &in_ip6d,
+ sizeof (ip6_address_and_mask_t));
+ if (type == VNET_FLOW_TYPE_IP6_IP6_N_TUPLE)
+ {
+ flow.ip6_ip6.in_protocol.prot = in_proto.prot;
+ flow.ip6_ip6_n_tuple.in_src_port = in_sport;
+ flow.ip6_ip6_n_tuple.in_dst_port = in_dport;
+ }
+ break;
default:
break;
}
@@ -731,7 +819,6 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_flow_command, static) = {
.path = "test flow",
.short_help = "test flow [add|del|enable|disable] [index <id>] "
@@ -748,7 +835,6 @@ VLIB_CLI_COMMAND (test_flow_command, static) = {
"[rss queues <queue_start> to <queue_end>]",
.function = test_flow,
};
-/* *INDENT-ON* */
static u8 *
format_flow_match_element (u8 * s, va_list * args)
diff --git a/src/vnet/gre/FEATURE.yaml b/src/vnet/gre/FEATURE.yaml
deleted file mode 100644
index 4b35b870dc3..00000000000
--- a/src/vnet/gre/FEATURE.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
----
-name: Generic Routing Encapsulation
-maintainer: Neale Ranns <nranns@cisco.com>
-features:
- - L3 tunnels, all combinations of IPv4 and IPv6
- - Encap/Decap flags to control the copying of DSCP, ECN, DF from overlay to
- underlay and vice-versa.
- - L2 tunnels
-missing:
- - GRE keys
-description: "An implementation of Generic Routing Encapsulation (GRE)"
-state: production
-properties: [API, CLI, MULTITHREAD]
diff --git a/src/vnet/gre/error.def b/src/vnet/gre/error.def
deleted file mode 100644
index 161ecc1d874..00000000000
--- a/src/vnet/gre/error.def
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * gre_error.def: gre errors
- *
- * Copyright (c) 2012 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-gre_error (NONE, "no error")
-gre_error (UNKNOWN_PROTOCOL, "unknown protocol")
-gre_error (UNSUPPORTED_VERSION, "unsupported version")
-gre_error (PKTS_DECAP, "GRE input packets decapsulated")
-gre_error (PKTS_ENCAP, "GRE output packets encapsulated")
-gre_error (NO_SUCH_TUNNEL, "GRE input packets dropped due to missing tunnel")
diff --git a/src/vnet/gre/gre.api b/src/vnet/gre/gre.api
deleted file mode 100644
index 9c69ba4007d..00000000000
--- a/src/vnet/gre/gre.api
+++ /dev/null
@@ -1,110 +0,0 @@
-/* Hey Emacs use -*- mode: C -*- */
-/*
- * Copyright (c) 2015-2020 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-option version = "2.1.1";
-
-import "vnet/interface_types.api";
-import "vnet/tunnel/tunnel_types.api";
-import "vnet/ip/ip_types.api";
-
-/** \brief A GRE tunnel type
-*/
-enum gre_tunnel_type : u8
-{
- GRE_API_TUNNEL_TYPE_L3 = 0,
- /* L2 Transparent Ethernet Bridge */
- GRE_API_TUNNEL_TYPE_TEB,
- /* Encapsulated Remote Switched Port ANalyzer */
- GRE_API_TUNNEL_TYPE_ERSPAN,
-};
-
-/** \brief A composite type uniquely defining a GRE tunnel.
- @param type - tunnel type (see enum definition), 0: L3, 1: TEB, 2: ERSPAN
- @param mode - P2P or P2MP
- @param flags - to control encap/decap behaviour
- @param session_id - session for ERSPAN tunnel, range 0-1023
- @param instance - optional unique custom device instance, else ~0.
- @param outer_table_id - Encap FIB table ID
- @param sw_if_index - ignored on create/delete, present in details.
- @param src - Source IP address
- @param dst - Destination IP address, can be multicast
-*/
-typedef gre_tunnel
-{
- vl_api_gre_tunnel_type_t type;
- vl_api_tunnel_mode_t mode;
- vl_api_tunnel_encap_decap_flags_t flags;
- u16 session_id;
- u32 instance;
- u32 outer_table_id;
- vl_api_interface_index_t sw_if_index;
- vl_api_address_t src;
- vl_api_address_t dst;
-};
-
-/** \brief Add or delete a single GRE tunnel.
- @param client_index - opaque cookie to identify the sender.
- @param context - sender context, to match reply w/ request.
- @param is_add - add if true, delete if false.
- @param tunnel - tunnel definition to add or delete.
-*/
-define gre_tunnel_add_del
-{
- u32 client_index;
- u32 context;
- bool is_add;
- vl_api_gre_tunnel_t tunnel;
-};
-
-/** \brief Add or delete a single GRE tunnel.
- @param context - sender context, to match reply w/ request.
- @param retval - return code for the request.
- @param sw_if_index - the interface corresponding to the affected tunnel.
-*/
-define gre_tunnel_add_del_reply
-{
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
-};
-
-/** \brief Dump details of all or just a single GRE tunnel.
- @param client_index - opaque cookie to identify the sender.
- @param context - sender context, to match reply w/ request.
- @param sw_if_index - filter for tunnel of this interface index, ~0 for all.
-*/
-define gre_tunnel_dump
-{
- u32 client_index;
- u32 context;
- vl_api_interface_index_t sw_if_index;
-};
-
-/** \brief Details response for one of the requested GRE tunnels.
- @param context - sender context, to match reply w/ request.
- @param tunnel - definition of the dumped tunnel.
-*/
-define gre_tunnel_details
-{
- u32 context;
- vl_api_gre_tunnel_t tunnel;
-};
-
-/*
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/gre/gre.c b/src/vnet/gre/gre.c
deleted file mode 100644
index dc735e6a77b..00000000000
--- a/src/vnet/gre/gre.c
+++ /dev/null
@@ -1,867 +0,0 @@
-/*
- * gre.c: gre
- *
- * Copyright (c) 2012 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vnet/vnet.h>
-#include <vnet/gre/gre.h>
-#include <vnet/adj/adj_midchain.h>
-#include <vnet/tunnel/tunnel_dp.h>
-
-extern gre_main_t gre_main;
-
-#ifndef CLIB_MARCH_VARIANT
-gre_main_t gre_main;
-
-typedef struct
-{
- union
- {
- ip4_and_gre_header_t ip4_and_gre;
- u64 as_u64[3];
- };
-} ip4_and_gre_union_t;
-
-typedef struct
-{
- union
- {
- ip6_and_gre_header_t ip6_and_gre;
- u64 as_u64[3];
- };
-} ip6_and_gre_union_t;
-#endif /* CLIB_MARCH_VARIANT */
-
-
-/* Packet trace structure */
-typedef struct
-{
- /* Tunnel-id / index in tunnel vector */
- u32 tunnel_id;
-
- /* pkt length */
- u32 length;
-
- /* tunnel ip addresses */
- ip46_address_t src;
- ip46_address_t dst;
-} gre_tx_trace_t;
-
-extern u8 *format_gre_tx_trace (u8 * s, va_list * args);
-
-#ifndef CLIB_MARCH_VARIANT
-u8 *
-format_gre_tx_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- gre_tx_trace_t *t = va_arg (*args, gre_tx_trace_t *);
-
- s = format (s, "GRE: tunnel %d len %d src %U dst %U",
- t->tunnel_id, t->length,
- format_ip46_address, &t->src, IP46_TYPE_ANY,
- format_ip46_address, &t->dst, IP46_TYPE_ANY);
- return s;
-}
-
-u8 *
-format_gre_protocol (u8 * s, va_list * args)
-{
- gre_protocol_t p = va_arg (*args, u32);
- gre_main_t *gm = &gre_main;
- gre_protocol_info_t *pi = gre_get_protocol_info (gm, p);
-
- if (pi)
- s = format (s, "%s", pi->name);
- else
- s = format (s, "0x%04x", p);
-
- return s;
-}
-
-u8 *
-format_gre_header_with_length (u8 * s, va_list * args)
-{
- gre_main_t *gm = &gre_main;
- gre_header_t *h = va_arg (*args, gre_header_t *);
- u32 max_header_bytes = va_arg (*args, u32);
- gre_protocol_t p = clib_net_to_host_u16 (h->protocol);
- u32 indent, header_bytes;
-
- header_bytes = sizeof (h[0]);
- if (max_header_bytes != 0 && header_bytes > max_header_bytes)
- return format (s, "gre header truncated");
-
- indent = format_get_indent (s);
-
- s = format (s, "GRE %U", format_gre_protocol, p);
-
- if (max_header_bytes != 0 && header_bytes < max_header_bytes)
- {
- gre_protocol_info_t *pi = gre_get_protocol_info (gm, p);
- vlib_node_t *node = vlib_get_node (gm->vlib_main, pi->node_index);
- if (node->format_buffer)
- s = format (s, "\n%U%U",
- format_white_space, indent,
- node->format_buffer, (void *) (h + 1),
- max_header_bytes - header_bytes);
- }
-
- return s;
-}
-
-u8 *
-format_gre_header (u8 * s, va_list * args)
-{
- gre_header_t *h = va_arg (*args, gre_header_t *);
- return format (s, "%U", format_gre_header_with_length, h, 0);
-}
-
-/* Returns gre protocol as an int in host byte order. */
-uword
-unformat_gre_protocol_host_byte_order (unformat_input_t * input,
- va_list * args)
-{
- u16 *result = va_arg (*args, u16 *);
- gre_main_t *gm = &gre_main;
- int i;
-
- /* Named type. */
- if (unformat_user (input, unformat_vlib_number_by_name,
- gm->protocol_info_by_name, &i))
- {
- gre_protocol_info_t *pi = vec_elt_at_index (gm->protocol_infos, i);
- *result = pi->protocol;
- return 1;
- }
-
- return 0;
-}
-
-uword
-unformat_gre_protocol_net_byte_order (unformat_input_t * input,
- va_list * args)
-{
- u16 *result = va_arg (*args, u16 *);
- if (!unformat_user (input, unformat_gre_protocol_host_byte_order, result))
- return 0;
- *result = clib_host_to_net_u16 ((u16) * result);
- return 1;
-}
-
-uword
-unformat_gre_header (unformat_input_t * input, va_list * args)
-{
- u8 **result = va_arg (*args, u8 **);
- gre_header_t _h, *h = &_h;
- u16 p;
-
- if (!unformat (input, "%U", unformat_gre_protocol_host_byte_order, &p))
- return 0;
-
- h->protocol = clib_host_to_net_u16 (p);
-
- /* Add header to result. */
- {
- void *p;
- u32 n_bytes = sizeof (h[0]);
-
- vec_add2 (*result, p, n_bytes);
- clib_memcpy (p, h, n_bytes);
- }
-
- return 1;
-}
-
-static int
-gre_proto_from_vnet_link (vnet_link_t link)
-{
- switch (link)
- {
- case VNET_LINK_IP4:
- return (GRE_PROTOCOL_ip4);
- case VNET_LINK_IP6:
- return (GRE_PROTOCOL_ip6);
- case VNET_LINK_MPLS:
- return (GRE_PROTOCOL_mpls_unicast);
- case VNET_LINK_ETHERNET:
- return (GRE_PROTOCOL_teb);
- case VNET_LINK_ARP:
- return (GRE_PROTOCOL_arp);
- case VNET_LINK_NSH:
- ASSERT (0);
- break;
- }
- ASSERT (0);
- return (GRE_PROTOCOL_ip4);
-}
-
-static u8 *
-gre_build_rewrite (vnet_main_t * vnm,
- u32 sw_if_index,
- vnet_link_t link_type, const void *dst_address)
-{
- gre_main_t *gm = &gre_main;
- const ip46_address_t *dst;
- ip4_and_gre_header_t *h4;
- ip6_and_gre_header_t *h6;
- gre_header_t *gre;
- u8 *rewrite = NULL;
- gre_tunnel_t *t;
- u32 ti;
- u8 is_ipv6;
-
- dst = dst_address;
- ti = gm->tunnel_index_by_sw_if_index[sw_if_index];
-
- if (~0 == ti)
- /* not one of ours */
- return (0);
-
- t = pool_elt_at_index (gm->tunnels, ti);
-
- is_ipv6 = t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6 ? 1 : 0;
-
- if (!is_ipv6)
- {
- vec_validate (rewrite, sizeof (*h4) - 1);
- h4 = (ip4_and_gre_header_t *) rewrite;
- gre = &h4->gre;
- h4->ip4.ip_version_and_header_length = 0x45;
- h4->ip4.ttl = 254;
- h4->ip4.protocol = IP_PROTOCOL_GRE;
- /* fixup ip4 header length and checksum after-the-fact */
- h4->ip4.src_address.as_u32 = t->tunnel_src.ip4.as_u32;
- h4->ip4.dst_address.as_u32 = dst->ip4.as_u32;
- h4->ip4.checksum = ip4_header_checksum (&h4->ip4);
- }
- else
- {
- vec_validate (rewrite, sizeof (*h6) - 1);
- h6 = (ip6_and_gre_header_t *) rewrite;
- gre = &h6->gre;
- h6->ip6.ip_version_traffic_class_and_flow_label =
- clib_host_to_net_u32 (6 << 28);
- h6->ip6.hop_limit = 255;
- h6->ip6.protocol = IP_PROTOCOL_GRE;
- /* fixup ip6 header length and checksum after-the-fact */
- h6->ip6.src_address.as_u64[0] = t->tunnel_src.ip6.as_u64[0];
- h6->ip6.src_address.as_u64[1] = t->tunnel_src.ip6.as_u64[1];
- h6->ip6.dst_address.as_u64[0] = dst->ip6.as_u64[0];
- h6->ip6.dst_address.as_u64[1] = dst->ip6.as_u64[1];
- }
-
- if (PREDICT_FALSE (t->type == GRE_TUNNEL_TYPE_ERSPAN))
- {
- gre->protocol = clib_host_to_net_u16 (GRE_PROTOCOL_erspan);
- gre->flags_and_version = clib_host_to_net_u16 (GRE_FLAGS_SEQUENCE);
- }
- else
- gre->protocol =
- clib_host_to_net_u16 (gre_proto_from_vnet_link (link_type));
-
- return (rewrite);
-}
-
-static void
-gre44_fixup (vlib_main_t * vm,
- const ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data)
-{
- tunnel_encap_decap_flags_t flags;
- ip4_and_gre_header_t *ip0;
-
- ip0 = vlib_buffer_get_current (b0);
- flags = pointer_to_uword (data);
-
- /* Fixup the checksum and len fields in the GRE tunnel encap
- * that was applied at the midchain node */
- ip0->ip4.length =
- clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
- tunnel_encap_fixup_4o4 (flags, (ip4_header_t *) (ip0 + 1), &ip0->ip4);
- ip0->ip4.checksum = ip4_header_checksum (&ip0->ip4);
-}
-
-static void
-gre64_fixup (vlib_main_t * vm,
- const ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data)
-{
- tunnel_encap_decap_flags_t flags;
- ip4_and_gre_header_t *ip0;
-
- ip0 = vlib_buffer_get_current (b0);
- flags = pointer_to_uword (data);
-
- /* Fixup the checksum and len fields in the GRE tunnel encap
- * that was applied at the midchain node */
- ip0->ip4.length =
- clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
- tunnel_encap_fixup_6o4 (flags, (ip6_header_t *) (ip0 + 1), &ip0->ip4);
- ip0->ip4.checksum = ip4_header_checksum (&ip0->ip4);
-}
-
-static void
-grex4_fixup (vlib_main_t * vm,
- const ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data)
-{
- ip4_header_t *ip0;
-
- ip0 = vlib_buffer_get_current (b0);
-
- /* Fixup the checksum and len fields in the GRE tunnel encap
- * that was applied at the midchain node */
- ip0->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
- ip0->checksum = ip4_header_checksum (ip0);
-}
-
-static void
-gre46_fixup (vlib_main_t * vm,
- const ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data)
-{
- tunnel_encap_decap_flags_t flags;
- ip6_and_gre_header_t *ip0;
-
- ip0 = vlib_buffer_get_current (b0);
- flags = pointer_to_uword (data);
-
- /* Fixup the payload length field in the GRE tunnel encap that was applied
- * at the midchain node */
- ip0->ip6.payload_length =
- clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
- sizeof (ip0->ip6));
- tunnel_encap_fixup_4o6 (flags, b0, (ip4_header_t *) (ip0 + 1), &ip0->ip6);
-}
-
-static void
-gre66_fixup (vlib_main_t * vm,
- const ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data)
-{
- tunnel_encap_decap_flags_t flags;
- ip6_and_gre_header_t *ip0;
-
- ip0 = vlib_buffer_get_current (b0);
- flags = pointer_to_uword (data);
-
- /* Fixup the payload length field in the GRE tunnel encap that was applied
- * at the midchain node */
- ip0->ip6.payload_length =
- clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
- sizeof (ip0->ip6));
- tunnel_encap_fixup_6o6 (flags, (ip6_header_t *) (ip0 + 1), &ip0->ip6);
-}
-
-static void
-grex6_fixup (vlib_main_t * vm,
- const ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data)
-{
- ip6_and_gre_header_t *ip0;
-
- ip0 = vlib_buffer_get_current (b0);
-
- /* Fixup the payload length field in the GRE tunnel encap that was applied
- * at the midchain node */
- ip0->ip6.payload_length =
- clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
- sizeof (ip0->ip6));
-}
-
-/**
- * return the appropriate fixup function given the overlay (link-type) and
- * underlay (fproto) combination
- */
-static adj_midchain_fixup_t
-gre_get_fixup (fib_protocol_t fproto, vnet_link_t lt)
-{
- if (fproto == FIB_PROTOCOL_IP6 && lt == VNET_LINK_IP6)
- return (gre66_fixup);
- if (fproto == FIB_PROTOCOL_IP6 && lt == VNET_LINK_IP4)
- return (gre46_fixup);
- if (fproto == FIB_PROTOCOL_IP4 && lt == VNET_LINK_IP6)
- return (gre64_fixup);
- if (fproto == FIB_PROTOCOL_IP4 && lt == VNET_LINK_IP4)
- return (gre44_fixup);
- if (fproto == FIB_PROTOCOL_IP6)
- return (grex6_fixup);
- if (fproto == FIB_PROTOCOL_IP4)
- return (grex4_fixup);
-
- ASSERT (0);
- return (gre44_fixup);
-}
-
-void
-gre_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
-{
- gre_main_t *gm = &gre_main;
- gre_tunnel_t *t;
- adj_flags_t af;
- u32 ti;
-
- ti = gm->tunnel_index_by_sw_if_index[sw_if_index];
- t = pool_elt_at_index (gm->tunnels, ti);
- af = ADJ_FLAG_NONE;
-
- /*
- * the user has not requested that the load-balancing be based on
- * a flow hash of the inner packet. so use the stacking to choose
- * a path.
- */
- if (!(t->flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_INNER_HASH))
- af |= ADJ_FLAG_MIDCHAIN_IP_STACK;
-
- adj_nbr_midchain_update_rewrite
- (ai, gre_get_fixup (t->tunnel_dst.fp_proto,
- adj_get_link_type (ai)),
- uword_to_pointer (t->flags, void *), af,
- gre_build_rewrite (vnm, sw_if_index, adj_get_link_type (ai),
- &t->tunnel_dst.fp_addr));
-
- gre_tunnel_stack (ai);
-}
-
-adj_walk_rc_t
-mgre_mk_complete_walk (adj_index_t ai, void *data)
-{
- mgre_walk_ctx_t *ctx = data;
- adj_flags_t af;
-
- af = ADJ_FLAG_NONE;
-
- /*
- * the user has not requested that the load-balancing be based on
- * a flow hash of the inner packet. so use the stacking to choose
- * a path.
- */
- if (!(ctx->t->flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_INNER_HASH))
- af |= ADJ_FLAG_MIDCHAIN_IP_STACK;
-
- adj_nbr_midchain_update_rewrite
- (ai, gre_get_fixup (ctx->t->tunnel_dst.fp_proto,
- adj_get_link_type (ai)),
- uword_to_pointer (ctx->t->flags, void *),
- af,
- gre_build_rewrite (vnet_get_main (),
- ctx->t->sw_if_index,
- adj_get_link_type (ai),
- &teib_entry_get_nh (ctx->ne)->fp_addr));
-
- teib_entry_adj_stack (ctx->ne, ai);
-
- return (ADJ_WALK_RC_CONTINUE);
-}
-
-adj_walk_rc_t
-mgre_mk_incomplete_walk (adj_index_t ai, void *data)
-{
- gre_tunnel_t *t = data;
-
- adj_nbr_midchain_update_rewrite (ai, gre_get_fixup (t->tunnel_dst.fp_proto,
- adj_get_link_type (ai)),
- NULL, ADJ_FLAG_NONE, NULL);
-
- adj_midchain_delegate_unstack (ai);
-
- return (ADJ_WALK_RC_CONTINUE);
-}
-
-void
-mgre_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
-{
- gre_main_t *gm = &gre_main;
- ip_adjacency_t *adj;
- teib_entry_t *ne;
- gre_tunnel_t *t;
- u32 ti;
-
- adj = adj_get (ai);
- ti = gm->tunnel_index_by_sw_if_index[sw_if_index];
- t = pool_elt_at_index (gm->tunnels, ti);
-
- ne = teib_entry_find_46 (sw_if_index,
- adj->ia_nh_proto, &adj->sub_type.nbr.next_hop);
-
- if (NULL == ne)
- {
- // no TEIB entry to provide the next-hop
- adj_nbr_midchain_update_rewrite (
- ai, gre_get_fixup (t->tunnel_dst.fp_proto, adj_get_link_type (ai)),
- uword_to_pointer (t->flags, void *), ADJ_FLAG_NONE, NULL);
- return;
- }
-
- mgre_walk_ctx_t ctx = {
- .t = t,
- .ne = ne
- };
- adj_nbr_walk_nh (sw_if_index,
- adj->ia_nh_proto,
- &adj->sub_type.nbr.next_hop, mgre_mk_complete_walk, &ctx);
-}
-#endif /* CLIB_MARCH_VARIANT */
-
-typedef enum
-{
- GRE_ENCAP_NEXT_L2_MIDCHAIN,
- GRE_ENCAP_N_NEXT,
-} gre_encap_next_t;
-
-/**
- * @brief TX function. Only called for L2 payload including TEB or ERSPAN.
- * L3 traffic uses the adj-midchains.
- */
-static_always_inline u32
-gre_encap_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, gre_tunnel_type_t type)
-{
- gre_main_t *gm = &gre_main;
- u32 *from, n_left_from;
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
- u32 sw_if_index[2] = { ~0, ~0 };
- const gre_tunnel_t *gt[2] = { 0 };
- adj_index_t adj_index[2] = { ADJ_INDEX_INVALID, ADJ_INDEX_INVALID };
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- vlib_get_buffers (vm, from, bufs, n_left_from);
-
- while (n_left_from >= 2)
- {
-
- if (PREDICT_FALSE
- (sw_if_index[0] != vnet_buffer (b[0])->sw_if_index[VLIB_TX]))
- {
- const vnet_hw_interface_t *hi;
- sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
- hi = vnet_get_sup_hw_interface (gm->vnet_main, sw_if_index[0]);
- gt[0] = &gm->tunnels[hi->dev_instance];
- adj_index[0] = gt[0]->l2_adj_index;
- }
- if (PREDICT_FALSE
- (sw_if_index[1] != vnet_buffer (b[1])->sw_if_index[VLIB_TX]))
- {
- const vnet_hw_interface_t *hi;
- sw_if_index[1] = vnet_buffer (b[1])->sw_if_index[VLIB_TX];
- hi = vnet_get_sup_hw_interface (gm->vnet_main, sw_if_index[1]);
- gt[1] = &gm->tunnels[hi->dev_instance];
- adj_index[1] = gt[1]->l2_adj_index;
- }
-
- vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = adj_index[0];
- vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = adj_index[1];
-
- if (type == GRE_TUNNEL_TYPE_ERSPAN)
- {
- /* Encap GRE seq# and ERSPAN type II header */
- erspan_t2_t *h0;
- u32 seq_num;
- u64 hdr;
- vlib_buffer_advance (b[0], -sizeof (erspan_t2_t));
- h0 = vlib_buffer_get_current (b[0]);
- seq_num = clib_atomic_fetch_add (&gt[0]->gre_sn->seq_num, 1);
- hdr = clib_host_to_net_u64 (ERSPAN_HDR2);
- h0->seq_num = clib_host_to_net_u32 (seq_num);
- h0->t2_u64 = hdr;
- h0->t2.cos_en_t_session |= clib_host_to_net_u16 (gt[0]->session_id);
- }
- if (type == GRE_TUNNEL_TYPE_ERSPAN)
- {
- /* Encap GRE seq# and ERSPAN type II header */
- erspan_t2_t *h0;
- u32 seq_num;
- u64 hdr;
- vlib_buffer_advance (b[1], -sizeof (erspan_t2_t));
- h0 = vlib_buffer_get_current (b[1]);
- seq_num = clib_atomic_fetch_add (&gt[1]->gre_sn->seq_num, 1);
- hdr = clib_host_to_net_u64 (ERSPAN_HDR2);
- h0->seq_num = clib_host_to_net_u32 (seq_num);
- h0->t2_u64 = hdr;
- h0->t2.cos_en_t_session |= clib_host_to_net_u16 (gt[1]->session_id);
- }
-
- if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
- {
- gre_tx_trace_t *tr = vlib_add_trace (vm, node,
- b[0], sizeof (*tr));
- tr->tunnel_id = gt[0] - gm->tunnels;
- tr->src = gt[0]->tunnel_src;
- tr->dst = gt[0]->tunnel_dst.fp_addr;
- tr->length = vlib_buffer_length_in_chain (vm, b[0]);
- }
- if (PREDICT_FALSE (b[1]->flags & VLIB_BUFFER_IS_TRACED))
- {
- gre_tx_trace_t *tr = vlib_add_trace (vm, node,
- b[1], sizeof (*tr));
- tr->tunnel_id = gt[1] - gm->tunnels;
- tr->src = gt[1]->tunnel_src;
- tr->dst = gt[1]->tunnel_dst.fp_addr;
- tr->length = vlib_buffer_length_in_chain (vm, b[1]);
- }
-
- b += 2;
- n_left_from -= 2;
- }
-
- while (n_left_from >= 1)
- {
-
- if (PREDICT_FALSE
- (sw_if_index[0] != vnet_buffer (b[0])->sw_if_index[VLIB_TX]))
- {
- const vnet_hw_interface_t *hi;
- sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
- hi = vnet_get_sup_hw_interface (gm->vnet_main, sw_if_index[0]);
- gt[0] = &gm->tunnels[hi->dev_instance];
- adj_index[0] = gt[0]->l2_adj_index;
- }
-
- vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = adj_index[0];
-
- if (type == GRE_TUNNEL_TYPE_ERSPAN)
- {
- /* Encap GRE seq# and ERSPAN type II header */
- erspan_t2_t *h0;
- u32 seq_num;
- u64 hdr;
- ASSERT (gt[0]->type == GRE_TUNNEL_TYPE_ERSPAN);
- vlib_buffer_advance (b[0], -sizeof (erspan_t2_t));
- h0 = vlib_buffer_get_current (b[0]);
- seq_num = clib_atomic_fetch_add (&gt[0]->gre_sn->seq_num, 1);
- hdr = clib_host_to_net_u64 (ERSPAN_HDR2);
- h0->seq_num = clib_host_to_net_u32 (seq_num);
- h0->t2_u64 = hdr;
- h0->t2.cos_en_t_session |= clib_host_to_net_u16 (gt[0]->session_id);
- }
-
- if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
- {
- gre_tx_trace_t *tr = vlib_add_trace (vm, node,
- b[0], sizeof (*tr));
- tr->tunnel_id = gt[0] - gm->tunnels;
- tr->src = gt[0]->tunnel_src;
- tr->dst = gt[0]->tunnel_dst.fp_addr;
- tr->length = vlib_buffer_length_in_chain (vm, b[0]);
- }
-
- b += 1;
- n_left_from -= 1;
- }
-
- vlib_buffer_enqueue_to_single_next (vm, node, from,
- GRE_ENCAP_NEXT_L2_MIDCHAIN,
- frame->n_vectors);
-
- vlib_node_increment_counter (vm, node->node_index,
- GRE_ERROR_PKTS_ENCAP, frame->n_vectors);
-
- return frame->n_vectors;
-}
-
-static char *gre_error_strings[] = {
-#define gre_error(n,s) s,
-#include "error.def"
-#undef gre_error
-};
-
-VLIB_NODE_FN (gre_teb_encap_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gre_encap_inline (vm, node, frame, GRE_TUNNEL_TYPE_TEB));
-}
-
-VLIB_NODE_FN (gre_erspan_encap_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gre_encap_inline (vm, node, frame, GRE_TUNNEL_TYPE_ERSPAN));
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (gre_teb_encap_node) =
-{
- .name = "gre-teb-encap",
- .vector_size = sizeof (u32),
- .format_trace = format_gre_tx_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = GRE_N_ERROR,
- .error_strings = gre_error_strings,
- .n_next_nodes = GRE_ENCAP_N_NEXT,
- .next_nodes = {
- [GRE_ENCAP_NEXT_L2_MIDCHAIN] = "adj-l2-midchain",
- },
-};
-VLIB_REGISTER_NODE (gre_erspan_encap_node) =
-{
- .name = "gre-erspan-encap",
- .vector_size = sizeof (u32),
- .format_trace = format_gre_tx_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = GRE_N_ERROR,
- .error_strings = gre_error_strings,
- .n_next_nodes = GRE_ENCAP_N_NEXT,
- .next_nodes = {
- [GRE_ENCAP_NEXT_L2_MIDCHAIN] = "adj-l2-midchain",
- },
-};
-/* *INDENT-ON* */
-
-#ifndef CLIB_MARCH_VARIANT
-static u8 *
-format_gre_tunnel_name (u8 * s, va_list * args)
-{
- u32 dev_instance = va_arg (*args, u32);
- gre_main_t *gm = &gre_main;
- gre_tunnel_t *t;
-
- if (dev_instance >= vec_len (gm->tunnels))
- return format (s, "<improperly-referenced>");
-
- t = pool_elt_at_index (gm->tunnels, dev_instance);
- return format (s, "gre%d", t->user_instance);
-}
-
-static u8 *
-format_gre_device (u8 * s, va_list * args)
-{
- u32 dev_instance = va_arg (*args, u32);
- CLIB_UNUSED (int verbose) = va_arg (*args, int);
-
- s = format (s, "GRE tunnel: id %d\n", dev_instance);
- return s;
-}
-
-static int
-gre_tunnel_desc (u32 sw_if_index,
- ip46_address_t * src, ip46_address_t * dst, u8 * is_l2)
-{
- gre_main_t *gm = &gre_main;
- gre_tunnel_t *t;
- u32 ti;
-
- ti = gm->tunnel_index_by_sw_if_index[sw_if_index];
-
- if (~0 == ti)
- /* not one of ours */
- return -1;
-
- t = pool_elt_at_index (gm->tunnels, ti);
-
- *src = t->tunnel_src;
- *dst = t->tunnel_dst.fp_addr;
- *is_l2 = t->type == GRE_TUNNEL_TYPE_TEB;
-
- return (0);
-}
-
-/* *INDENT-OFF* */
-VNET_DEVICE_CLASS (gre_device_class) = {
- .name = "GRE tunnel device",
- .format_device_name = format_gre_tunnel_name,
- .format_device = format_gre_device,
- .format_tx_trace = format_gre_tx_trace,
- .admin_up_down_function = gre_interface_admin_up_down,
- .ip_tun_desc = gre_tunnel_desc,
-#ifdef SOON
- .clear counter = 0;
-#endif
-};
-
-VNET_HW_INTERFACE_CLASS (gre_hw_interface_class) = {
- .name = "GRE",
- .format_header = format_gre_header_with_length,
- .unformat_header = unformat_gre_header,
- .build_rewrite = gre_build_rewrite,
- .update_adjacency = gre_update_adj,
- .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
-};
-
-VNET_HW_INTERFACE_CLASS (mgre_hw_interface_class) = {
- .name = "mGRE",
- .format_header = format_gre_header_with_length,
- .unformat_header = unformat_gre_header,
- .build_rewrite = gre_build_rewrite,
- .update_adjacency = mgre_update_adj,
- .flags = VNET_HW_INTERFACE_CLASS_FLAG_NBMA,
-};
-/* *INDENT-ON* */
-#endif /* CLIB_MARCH_VARIANT */
-
-static void
-add_protocol (gre_main_t * gm, gre_protocol_t protocol, char *protocol_name)
-{
- gre_protocol_info_t *pi;
- u32 i;
-
- vec_add2 (gm->protocol_infos, pi, 1);
- i = pi - gm->protocol_infos;
-
- pi->name = protocol_name;
- pi->protocol = protocol;
- pi->next_index = pi->node_index = ~0;
-
- hash_set (gm->protocol_info_by_protocol, protocol, i);
- hash_set_mem (gm->protocol_info_by_name, pi->name, i);
-}
-
-static clib_error_t *
-gre_init (vlib_main_t * vm)
-{
- gre_main_t *gm = &gre_main;
- clib_error_t *error;
- ip_main_t *im = &ip_main;
- ip_protocol_info_t *pi;
-
- clib_memset (gm, 0, sizeof (gm[0]));
- gm->vlib_main = vm;
- gm->vnet_main = vnet_get_main ();
-
- if ((error = vlib_call_init_function (vm, ip_main_init)))
- return error;
-
- if ((error = vlib_call_init_function (vm, ip4_lookup_init)))
- return error;
-
- if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
- return error;
-
- /* Set up the ip packet generator */
- pi = ip_get_protocol_info (im, IP_PROTOCOL_GRE);
- pi->format_header = format_gre_header;
- pi->unformat_pg_edit = unformat_pg_gre_header;
-
- gm->protocol_info_by_name = hash_create_string (0, sizeof (uword));
- gm->protocol_info_by_protocol = hash_create (0, sizeof (uword));
- gm->tunnel_by_key4 =
- hash_create_mem (0, sizeof (gre_tunnel_key4_t), sizeof (uword));
- gm->tunnel_by_key6 =
- hash_create_mem (0, sizeof (gre_tunnel_key6_t), sizeof (uword));
- gm->seq_num_by_key =
- hash_create_mem (0, sizeof (gre_sn_key_t), sizeof (uword));
-
-#define _(n,s) add_protocol (gm, GRE_PROTOCOL_##s, #s);
- foreach_gre_protocol
-#undef _
- return vlib_call_init_function (vm, gre_input_init);
-}
-
-VLIB_INIT_FUNCTION (gre_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/gre/gre.h b/src/vnet/gre/gre.h
deleted file mode 100644
index ea085bf0fa1..00000000000
--- a/src/vnet/gre/gre.h
+++ /dev/null
@@ -1,443 +0,0 @@
-/*
- * gre.h: types/functions for gre.
- *
- * Copyright (c) 2012 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef included_gre_h
-#define included_gre_h
-
-#include <vnet/vnet.h>
-#include <vnet/gre/packet.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ip/format.h>
-#include <vnet/adj/adj_types.h>
-#include <vnet/tunnel/tunnel.h>
-#include <vnet/teib/teib.h>
-
-extern vnet_hw_interface_class_t gre_hw_interface_class;
-extern vnet_hw_interface_class_t mgre_hw_interface_class;
-
-typedef enum
-{
-#define gre_error(n,s) GRE_ERROR_##n,
-#include <vnet/gre/error.def>
-#undef gre_error
- GRE_N_ERROR,
-} gre_error_t;
-
-/**
- * L3: GRE (i.e. this tunnel is in L3 mode)
- * TEB: Transparent Ethernet Bridging - the tunnel is in L2 mode
- * ERSPAN: type 2 - the tunnel is for port mirror SPAN output. Each tunnel is
- * associated with a session ID and expected to be used for encap
- * and output of mirrored packet from a L2 network only. There is
- * no support for receiving ERSPAN packets from a GRE ERSPAN tunnel
- */
-#define foreach_gre_tunnel_type \
- _(L3, "L3") \
- _(TEB, "TEB") \
- _(ERSPAN, "ERSPAN") \
-
-/**
- * @brief The GRE tunnel type
- */
-typedef enum gre_tunnel_type_t_
-{
-#define _(n, s) GRE_TUNNEL_TYPE_##n,
- foreach_gre_tunnel_type
-#undef _
-} __clib_packed gre_tunnel_type_t;
-
-extern u8 *format_gre_tunnel_type (u8 * s, va_list * args);
-
-
-/**
- * A GRE payload protocol registration
- */
-typedef struct
-{
- /** Name (a c string). */
- char *name;
-
- /** GRE protocol type in host byte order. */
- gre_protocol_t protocol;
-
- /** GRE tunnel type */
- gre_tunnel_type_t tunnel_type;
-
- /** Node which handles this type. */
- u32 node_index;
-
- /** Next index for this type. */
- u32 next_index;
-} gre_protocol_info_t;
-
-/**
- * Elements of the GRE key that are common for v6 and v6 addresses
- */
-typedef struct gre_tunnel_key_common_t_
-{
- union
- {
- struct
- {
- u32 fib_index;
- u16 session_id;
- gre_tunnel_type_t type;
- tunnel_mode_t mode;
- };
- u64 as_u64;
- };
-} gre_tunnel_key_common_t;
-
-STATIC_ASSERT_SIZEOF (gre_tunnel_key_common_t, sizeof (u64));
-
-/**
- * @brief Key for a IPv4 GRE Tunnel
- */
-typedef struct gre_tunnel_key4_t_
-{
- /**
- * Source and destination IP addresses
- */
- union
- {
- struct
- {
- ip4_address_t gtk_src;
- ip4_address_t gtk_dst;
- };
- u64 gtk_as_u64;
- };
-
- /** address independent attributes */
- gre_tunnel_key_common_t gtk_common;
-} __attribute__ ((packed)) gre_tunnel_key4_t;
-
-STATIC_ASSERT_SIZEOF (gre_tunnel_key4_t, 2 * sizeof (u64));
-
-/**
- * @brief Key for a IPv6 GRE Tunnel
- * We use a different type so that the V4 key hash is as small as possible
- */
-typedef struct gre_tunnel_key6_t_
-{
- /**
- * Source and destination IP addresses
- */
- ip6_address_t gtk_src;
- ip6_address_t gtk_dst;
-
- /** address independent attributes */
- gre_tunnel_key_common_t gtk_common;
-} __attribute__ ((packed)) gre_tunnel_key6_t;
-
-STATIC_ASSERT_SIZEOF (gre_tunnel_key6_t, 5 * sizeof (u64));
-
-/**
- * Union of the two possible key types
- */
-typedef union gre_tunnel_key_t_
-{
- gre_tunnel_key4_t gtk_v4;
- gre_tunnel_key6_t gtk_v6;
-} gre_tunnel_key_t;
-
-/**
- * The session ID is only a 10 bit value
- */
-#define GTK_SESSION_ID_MAX (0x3ff)
-
-/**
- * Used for GRE header seq number generation for ERSPAN encap
- */
-typedef struct
-{
- u32 seq_num;
- u32 ref_count;
-} gre_sn_t;
-
-/**
- * Hash key for GRE header seq number generation for ERSPAN encap
- */
-typedef struct
-{
- ip46_address_t src;
- ip46_address_t dst;
- u32 fib_index;
-} gre_sn_key_t;
-
-/**
- * @brief A representation of a GRE tunnel
- */
-typedef struct
-{
- /**
- * Required for pool_get_aligned
- */
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
-
- /**
- * The tunnel's source/local address
- */
- ip46_address_t tunnel_src;
- /**
- * The tunnel's destination/remote address
- */
- fib_prefix_t tunnel_dst;
- /**
- * The FIB in which the src.dst address are present
- */
- u32 outer_fib_index;
- u32 hw_if_index;
- u32 sw_if_index;
- gre_tunnel_type_t type;
- tunnel_mode_t mode;
- tunnel_encap_decap_flags_t flags;
-
- /**
- * an L2 tunnel always rquires an L2 midchain. cache here for DP.
- */
- adj_index_t l2_adj_index;
-
- /**
- * ERSPAN type 2 session ID, least significant 10 bits of u16
- */
- u16 session_id;
-
- /**
- * GRE header sequence number (SN) used for ERSPAN type 2 header, must be
- * bumped automically to be thread safe. As multiple GRE tunnels are created
- * for the same fib-idx/DIP/SIP with different ERSPAN session number, they all
- * share the same SN which is kept per FIB/DIP/SIP, as specified by RFC2890.
- */
- gre_sn_t *gre_sn;
-
-
- u32 dev_instance; /* Real device instance in tunnel vector */
- u32 user_instance; /* Instance name being shown to user */
-} gre_tunnel_t;
-
-typedef struct
-{
- u8 next_index;
- u8 tunnel_type;
-} next_info_t;
-
-/**
- * @brief GRE related global data
- */
-typedef struct
-{
- /**
- * pool of tunnel instances
- */
- gre_tunnel_t *tunnels;
-
- /**
- * GRE payload protocol registrations
- */
- gre_protocol_info_t *protocol_infos;
-
- /**
- * Hash tables mapping name/protocol to protocol info index.
- */
- uword *protocol_info_by_name, *protocol_info_by_protocol;
-
- /**
- * Hash mapping to tunnels with ipv4 src/dst addr
- */
- uword *tunnel_by_key4;
-
- /**
- * Hash mapping to tunnels with ipv6 src/dst addr
- */
- uword *tunnel_by_key6;
-
- /**
- * Hash mapping tunnel src/dst addr and fib-idx to sequence number
- */
- uword *seq_num_by_key;
-
- /**
- * Mapping from sw_if_index to tunnel index
- */
- u32 *tunnel_index_by_sw_if_index;
-
- /* Sparse vector mapping gre protocol in network byte order
- to next index. */
- next_info_t *next_by_protocol;
-
- /* convenience */
- vlib_main_t *vlib_main;
- vnet_main_t *vnet_main;
-
- /* Record used instances */
- uword *instance_used;
-
- u16 msg_id_base;
-} gre_main_t;
-
-/**
- * @brief IPv4 and GRE header.
- */
-/* *INDENT-OFF* */
-typedef CLIB_PACKED (struct {
- ip4_header_t ip4;
- gre_header_t gre;
-}) ip4_and_gre_header_t;
-/* *INDENT-ON* */
-
-/**
- * @brief IPv6 and GRE header.
- */
-/* *INDENT-OFF* */
-typedef CLIB_PACKED (struct {
- ip6_header_t ip6;
- gre_header_t gre;
-}) ip6_and_gre_header_t;
-/* *INDENT-ON* */
-
-always_inline gre_protocol_info_t *
-gre_get_protocol_info (gre_main_t * em, gre_protocol_t protocol)
-{
- uword *p = hash_get (em->protocol_info_by_protocol, protocol);
- return p ? vec_elt_at_index (em->protocol_infos, p[0]) : 0;
-}
-
-extern gre_main_t gre_main;
-
-extern clib_error_t *gre_interface_admin_up_down (vnet_main_t * vnm,
- u32 hw_if_index, u32 flags);
-
-extern void gre_tunnel_stack (adj_index_t ai);
-extern void gre_update_adj (vnet_main_t * vnm,
- u32 sw_if_index, adj_index_t ai);
-
-typedef struct mgre_walk_ctx_t_
-{
- const gre_tunnel_t *t;
- const teib_entry_t *ne;
-} mgre_walk_ctx_t;
-
-adj_walk_rc_t mgre_mk_complete_walk (adj_index_t ai, void *data);
-adj_walk_rc_t mgre_mk_incomplete_walk (adj_index_t ai, void *data);
-
-format_function_t format_gre_protocol;
-format_function_t format_gre_header;
-format_function_t format_gre_header_with_length;
-
-extern vlib_node_registration_t gre4_input_node;
-extern vlib_node_registration_t gre6_input_node;
-extern vlib_node_registration_t gre_erspan_encap_node;
-extern vlib_node_registration_t gre_teb_encap_node;
-extern vnet_device_class_t gre_device_class;
-
-/* Parse gre protocol as 0xXXXX or protocol name.
- In either host or network byte order. */
-unformat_function_t unformat_gre_protocol_host_byte_order;
-unformat_function_t unformat_gre_protocol_net_byte_order;
-
-/* Parse gre header. */
-unformat_function_t unformat_gre_header;
-unformat_function_t unformat_pg_gre_header;
-
-void
-gre_register_input_protocol (vlib_main_t * vm, gre_protocol_t protocol,
- u32 node_index, gre_tunnel_type_t tunnel_type);
-
-/* manually added to the interface output node in gre.c */
-#define GRE_OUTPUT_NEXT_LOOKUP 1
-
-typedef struct
-{
- u8 is_add;
- gre_tunnel_type_t type;
- tunnel_mode_t mode;
- u8 is_ipv6;
- u32 instance;
- ip46_address_t src, dst;
- u32 outer_table_id;
- u16 session_id;
- tunnel_encap_decap_flags_t flags;
-} vnet_gre_tunnel_add_del_args_t;
-
-extern int vnet_gre_tunnel_add_del (vnet_gre_tunnel_add_del_args_t * a,
- u32 * sw_if_indexp);
-
-static inline void
-gre_mk_key4 (ip4_address_t src,
- ip4_address_t dst,
- u32 fib_index,
- gre_tunnel_type_t ttype,
- tunnel_mode_t tmode, u16 session_id, gre_tunnel_key4_t * key)
-{
- key->gtk_src = src;
- key->gtk_dst = dst;
- key->gtk_common.type = ttype;
- key->gtk_common.mode = tmode;
- key->gtk_common.fib_index = fib_index;
- key->gtk_common.session_id = session_id;
-}
-
-static inline int
-gre_match_key4 (const gre_tunnel_key4_t * key1,
- const gre_tunnel_key4_t * key2)
-{
- return ((key1->gtk_as_u64 == key2->gtk_as_u64) &&
- (key1->gtk_common.as_u64 == key2->gtk_common.as_u64));
-}
-
-static inline void
-gre_mk_key6 (const ip6_address_t * src,
- const ip6_address_t * dst,
- u32 fib_index,
- gre_tunnel_type_t ttype,
- tunnel_mode_t tmode, u16 session_id, gre_tunnel_key6_t * key)
-{
- key->gtk_src = *src;
- key->gtk_dst = *dst;
- key->gtk_common.type = ttype;
- key->gtk_common.mode = tmode;
- key->gtk_common.fib_index = fib_index;
- key->gtk_common.session_id = session_id;
-}
-
-static inline int
-gre_match_key6 (const gre_tunnel_key6_t * key1,
- const gre_tunnel_key6_t * key2)
-{
- return (ip6_address_is_equal (&key1->gtk_src, &key2->gtk_src) &&
- ip6_address_is_equal (&key1->gtk_dst, &key2->gtk_dst) &&
- (key1->gtk_common.as_u64 == key2->gtk_common.as_u64));
-}
-
-static inline void
-gre_mk_sn_key (const gre_tunnel_t * gt, gre_sn_key_t * key)
-{
- key->src = gt->tunnel_src;
- key->dst = gt->tunnel_dst.fp_addr;
- key->fib_index = gt->outer_fib_index;
-}
-
-#endif /* included_gre_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/gre/gre_api.c b/src/vnet/gre/gre_api.c
deleted file mode 100644
index 59a1d3d738d..00000000000
--- a/src/vnet/gre/gre_api.c
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- *------------------------------------------------------------------
- * gre_api.c - gre api
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <vnet/vnet.h>
-#include <vlibmemory/api.h>
-
-#include <vnet/interface.h>
-#include <vnet/api_errno.h>
-
-#include <vnet/gre/gre.h>
-#include <vnet/fib/fib_table.h>
-#include <vnet/tunnel/tunnel_types_api.h>
-#include <vnet/ip/ip_types_api.h>
-
-#include <vnet/gre/gre.api_enum.h>
-#include <vnet/gre/gre.api_types.h>
-
-#define REPLY_MSG_ID_BASE gre_main.msg_id_base
-#include <vlibapi/api_helper_macros.h>
-
-static int
-gre_tunnel_type_decode (vl_api_gre_tunnel_type_t in, gre_tunnel_type_t * out)
-{
- switch (in)
- {
-#define _(n, v) \
- case GRE_API_TUNNEL_TYPE_##n: \
- *out = GRE_TUNNEL_TYPE_##n; \
- return (0);
- foreach_gre_tunnel_type
-#undef _
- }
-
- return (VNET_API_ERROR_INVALID_VALUE);
-}
-
-static vl_api_gre_tunnel_type_t
-gre_tunnel_type_encode (gre_tunnel_type_t in)
-{
- vl_api_gre_tunnel_type_t out = GRE_API_TUNNEL_TYPE_L3;
-
- switch (in)
- {
-#define _(n, v) \
- case GRE_TUNNEL_TYPE_##n: \
- out = GRE_API_TUNNEL_TYPE_##n; \
- break;
- foreach_gre_tunnel_type
-#undef _
- }
-
- return (out);
-}
-
-static void vl_api_gre_tunnel_add_del_t_handler
- (vl_api_gre_tunnel_add_del_t * mp)
-{
- vnet_gre_tunnel_add_del_args_t _a = { }, *a = &_a;
- vl_api_gre_tunnel_add_del_reply_t *rmp;
- tunnel_encap_decap_flags_t flags;
- u32 sw_if_index = ~0;
- ip46_type_t itype[2];
- int rv = 0;
-
- itype[0] = ip_address_decode (&mp->tunnel.src, &a->src);
- itype[1] = ip_address_decode (&mp->tunnel.dst, &a->dst);
-
- if (itype[0] != itype[1])
- {
- rv = VNET_API_ERROR_INVALID_PROTOCOL;
- goto out;
- }
-
- if (ip46_address_is_equal (&a->src, &a->dst))
- {
- rv = VNET_API_ERROR_SAME_SRC_DST;
- goto out;
- }
-
- rv = gre_tunnel_type_decode (mp->tunnel.type, &a->type);
-
- if (rv)
- goto out;
-
- rv = tunnel_mode_decode (mp->tunnel.mode, &a->mode);
-
- if (rv)
- goto out;
-
- rv = tunnel_encap_decap_flags_decode (mp->tunnel.flags, &flags);
-
- if (rv)
- goto out;
-
- a->is_add = mp->is_add;
- a->is_ipv6 = (itype[0] == IP46_TYPE_IP6);
- a->instance = ntohl (mp->tunnel.instance);
- a->session_id = ntohs (mp->tunnel.session_id);
- a->outer_table_id = ntohl (mp->tunnel.outer_table_id);
- a->flags = flags;
-
- rv = vnet_gre_tunnel_add_del (a, &sw_if_index);
-
-out:
- /* *INDENT-OFF* */
- REPLY_MACRO2(VL_API_GRE_TUNNEL_ADD_DEL_REPLY,
- ({
- rmp->sw_if_index = ntohl (sw_if_index);
- }));
- /* *INDENT-ON* */
-}
-
-static void send_gre_tunnel_details
- (gre_tunnel_t * t, vl_api_gre_tunnel_dump_t * mp)
-{
- vl_api_gre_tunnel_details_t *rmp;
-
- /* *INDENT-OFF* */
- REPLY_MACRO_DETAILS2(VL_API_GRE_TUNNEL_DETAILS,
- ({
- ip_address_encode (&t->tunnel_src, IP46_TYPE_ANY, &rmp->tunnel.src);
- ip_address_encode (&t->tunnel_dst.fp_addr, IP46_TYPE_ANY, &rmp->tunnel.dst);
-
- rmp->tunnel.outer_table_id =
- htonl (fib_table_get_table_id
- (t->outer_fib_index, t->tunnel_dst.fp_proto));
-
- rmp->tunnel.type = gre_tunnel_type_encode (t->type);
- rmp->tunnel.mode = tunnel_mode_encode (t->mode);
- rmp->tunnel.flags = tunnel_encap_decap_flags_encode (t->flags);
- rmp->tunnel.instance = htonl (t->user_instance);
- rmp->tunnel.sw_if_index = htonl (t->sw_if_index);
- rmp->tunnel.session_id = htons (t->session_id);
- }));
- /* *INDENT-ON* */
-}
-
-static void
-vl_api_gre_tunnel_dump_t_handler (vl_api_gre_tunnel_dump_t * mp)
-{
- vl_api_registration_t *reg;
- gre_main_t *gm = &gre_main;
- gre_tunnel_t *t;
- u32 sw_if_index;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- sw_if_index = ntohl (mp->sw_if_index);
-
- if (~0 == sw_if_index)
- {
- /* *INDENT-OFF* */
- pool_foreach (t, gm->tunnels)
- {
- send_gre_tunnel_details(t, mp);
- }
- /* *INDENT-ON* */
- }
-
- else
- {
- if ((sw_if_index >= vec_len (gm->tunnel_index_by_sw_if_index)) ||
- (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index]))
- {
- return;
- }
- t = &gm->tunnels[gm->tunnel_index_by_sw_if_index[sw_if_index]];
- send_gre_tunnel_details (t, mp);
- }
-}
-
-/*
- * gre_api_hookup
- * Add vpe's API message handlers to the table.
- * vlib has already mapped shared memory and
- * added the client registration handlers.
- * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
- */
-/* API definitions */
-#include <vnet/format_fns.h>
-#include <vnet/gre/gre.api.c>
-
-static clib_error_t *
-gre_api_hookup (vlib_main_t * vm)
-{
- /*
- * Set up the (msg_name, crc, message-id) table
- */
- gre_main.msg_id_base = setup_message_id_table ();
-
- return 0;
-}
-
-VLIB_API_INIT_FUNCTION (gre_api_hookup);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/gre/interface.c b/src/vnet/gre/interface.c
deleted file mode 100644
index bb0be865664..00000000000
--- a/src/vnet/gre/interface.c
+++ /dev/null
@@ -1,845 +0,0 @@
-/*
- * gre_interface.c: gre interfaces
- *
- * Copyright (c) 2012 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vnet/vnet.h>
-#include <vnet/gre/gre.h>
-#include <vnet/ip/format.h>
-#include <vnet/fib/fib_table.h>
-#include <vnet/adj/adj_midchain.h>
-#include <vnet/adj/adj_nbr.h>
-#include <vnet/mpls/mpls.h>
-#include <vnet/l2/l2_input.h>
-#include <vnet/teib/teib.h>
-
-u8 *
-format_gre_tunnel_type (u8 * s, va_list * args)
-{
- gre_tunnel_type_t type = va_arg (*args, int);
-
- switch (type)
- {
-#define _(n, v) case GRE_TUNNEL_TYPE_##n: \
- s = format (s, "%s", v); \
- break;
- foreach_gre_tunnel_type
-#undef _
- }
-
- return (s);
-}
-
-static u8 *
-format_gre_tunnel (u8 * s, va_list * args)
-{
- gre_tunnel_t *t = va_arg (*args, gre_tunnel_t *);
-
- s = format (s, "[%d] instance %d src %U dst %U fib-idx %d sw-if-idx %d ",
- t->dev_instance, t->user_instance,
- format_ip46_address, &t->tunnel_src, IP46_TYPE_ANY,
- format_ip46_address, &t->tunnel_dst.fp_addr, IP46_TYPE_ANY,
- t->outer_fib_index, t->sw_if_index);
-
- s = format (s, "payload %U ", format_gre_tunnel_type, t->type);
- s = format (s, "%U ", format_tunnel_mode, t->mode);
-
- if (t->type == GRE_TUNNEL_TYPE_ERSPAN)
- s = format (s, "session %d ", t->session_id);
-
- if (t->type != GRE_TUNNEL_TYPE_L3)
- s = format (s, "l2-adj-idx %d ", t->l2_adj_index);
-
- return s;
-}
-
-static gre_tunnel_t *
-gre_tunnel_db_find (const vnet_gre_tunnel_add_del_args_t * a,
- u32 outer_fib_index, gre_tunnel_key_t * key)
-{
- gre_main_t *gm = &gre_main;
- uword *p;
-
- if (!a->is_ipv6)
- {
- gre_mk_key4 (a->src.ip4, a->dst.ip4, outer_fib_index,
- a->type, a->mode, a->session_id, &key->gtk_v4);
- p = hash_get_mem (gm->tunnel_by_key4, &key->gtk_v4);
- }
- else
- {
- gre_mk_key6 (&a->src.ip6, &a->dst.ip6, outer_fib_index,
- a->type, a->mode, a->session_id, &key->gtk_v6);
- p = hash_get_mem (gm->tunnel_by_key6, &key->gtk_v6);
- }
-
- if (NULL == p)
- return (NULL);
-
- return (pool_elt_at_index (gm->tunnels, p[0]));
-}
-
-static void
-gre_tunnel_db_add (gre_tunnel_t * t, gre_tunnel_key_t * key)
-{
- gre_main_t *gm = &gre_main;
-
- if (t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6)
- {
- hash_set_mem_alloc (&gm->tunnel_by_key6, &key->gtk_v6, t->dev_instance);
- }
- else
- {
- hash_set_mem_alloc (&gm->tunnel_by_key4, &key->gtk_v4, t->dev_instance);
- }
-}
-
-static void
-gre_tunnel_db_remove (gre_tunnel_t * t, gre_tunnel_key_t * key)
-{
- gre_main_t *gm = &gre_main;
-
- if (t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6)
- {
- hash_unset_mem_free (&gm->tunnel_by_key6, &key->gtk_v6);
- }
- else
- {
- hash_unset_mem_free (&gm->tunnel_by_key4, &key->gtk_v4);
- }
-}
-
-/**
- * gre_tunnel_stack
- *
- * 'stack' (resolve the recursion for) the tunnel's midchain adjacency
- */
-void
-gre_tunnel_stack (adj_index_t ai)
-{
- gre_main_t *gm = &gre_main;
- ip_adjacency_t *adj;
- gre_tunnel_t *gt;
- u32 sw_if_index;
-
- adj = adj_get (ai);
- sw_if_index = adj->rewrite_header.sw_if_index;
-
- if ((vec_len (gm->tunnel_index_by_sw_if_index) <= sw_if_index) ||
- (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index]))
- return;
-
- gt = pool_elt_at_index (gm->tunnels,
- gm->tunnel_index_by_sw_if_index[sw_if_index]);
-
- if ((vnet_hw_interface_get_flags (vnet_get_main (), gt->hw_if_index) &
- VNET_HW_INTERFACE_FLAG_LINK_UP) == 0)
- {
- adj_midchain_delegate_unstack (ai);
- }
- else
- {
- adj_midchain_delegate_stack (ai, gt->outer_fib_index, &gt->tunnel_dst);
- }
-}
-
-/**
- * mgre_tunnel_stack
- *
- * 'stack' (resolve the recursion for) the tunnel's midchain adjacency
- */
-static void
-mgre_tunnel_stack (adj_index_t ai)
-{
- gre_main_t *gm = &gre_main;
- const ip_adjacency_t *adj;
- const gre_tunnel_t *gt;
- u32 sw_if_index;
-
- adj = adj_get (ai);
- sw_if_index = adj->rewrite_header.sw_if_index;
-
- if ((vec_len (gm->tunnel_index_by_sw_if_index) <= sw_if_index) ||
- (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index]))
- return;
-
- gt = pool_elt_at_index (gm->tunnels,
- gm->tunnel_index_by_sw_if_index[sw_if_index]);
-
- if ((vnet_hw_interface_get_flags (vnet_get_main (), gt->hw_if_index) &
- VNET_HW_INTERFACE_FLAG_LINK_UP) == 0)
- {
- adj_midchain_delegate_unstack (ai);
- }
- else
- {
- const teib_entry_t *ne;
-
- ne = teib_entry_find_46 (sw_if_index, adj->ia_nh_proto,
- &adj->sub_type.nbr.next_hop);
- if (NULL != ne)
- teib_entry_adj_stack (ne, ai);
- }
-}
-
-/**
- * @brief Call back when restacking all adjacencies on a GRE interface
- */
-static adj_walk_rc_t
-gre_adj_walk_cb (adj_index_t ai, void *ctx)
-{
- gre_tunnel_stack (ai);
-
- return (ADJ_WALK_RC_CONTINUE);
-}
-static adj_walk_rc_t
-mgre_adj_walk_cb (adj_index_t ai, void *ctx)
-{
- mgre_tunnel_stack (ai);
-
- return (ADJ_WALK_RC_CONTINUE);
-}
-
-static void
-gre_tunnel_restack (gre_tunnel_t * gt)
-{
- fib_protocol_t proto;
-
- /*
- * walk all the adjacencies on th GRE interface and restack them
- */
- FOR_EACH_FIB_IP_PROTOCOL (proto)
- {
- switch (gt->mode)
- {
- case TUNNEL_MODE_P2P:
- adj_nbr_walk (gt->sw_if_index, proto, gre_adj_walk_cb, NULL);
- break;
- case TUNNEL_MODE_MP:
- adj_nbr_walk (gt->sw_if_index, proto, mgre_adj_walk_cb, NULL);
- break;
- }
- }
-}
-
-static void
-gre_teib_mk_key (const gre_tunnel_t * t,
- const teib_entry_t * ne, gre_tunnel_key_t * key)
-{
- const fib_prefix_t *nh;
-
- nh = teib_entry_get_nh (ne);
-
- /* construct the key using mode P2P so it can be found in the DP */
- if (FIB_PROTOCOL_IP4 == nh->fp_proto)
- gre_mk_key4 (t->tunnel_src.ip4,
- nh->fp_addr.ip4,
- teib_entry_get_fib_index (ne),
- t->type, TUNNEL_MODE_P2P, 0, &key->gtk_v4);
- else
- gre_mk_key6 (&t->tunnel_src.ip6,
- &nh->fp_addr.ip6,
- teib_entry_get_fib_index (ne),
- t->type, TUNNEL_MODE_P2P, 0, &key->gtk_v6);
-}
-
-/**
- * An TEIB entry has been added
- */
-static void
-gre_teib_entry_added (const teib_entry_t * ne)
-{
- gre_main_t *gm = &gre_main;
- const ip_address_t *nh;
- gre_tunnel_key_t key;
- gre_tunnel_t *t;
- u32 sw_if_index;
- u32 t_idx;
-
- sw_if_index = teib_entry_get_sw_if_index (ne);
- if (vec_len (gm->tunnel_index_by_sw_if_index) < sw_if_index)
- return;
-
- t_idx = gm->tunnel_index_by_sw_if_index[sw_if_index];
-
- if (INDEX_INVALID == t_idx)
- return;
-
- /* entry has been added on an interface for which there is a GRE tunnel */
- t = pool_elt_at_index (gm->tunnels, t_idx);
-
- if (t->mode != TUNNEL_MODE_MP)
- return;
-
- /* the next-hop (underlay) of the NHRP entry will form part of the key for
- * ingress lookup to match packets to this interface */
- gre_teib_mk_key (t, ne, &key);
- gre_tunnel_db_add (t, &key);
-
- /* update the rewrites for each of the adjacencies for this peer (overlay)
- * using the next-hop (underlay) */
- mgre_walk_ctx_t ctx = {
- .t = t,
- .ne = ne
- };
- nh = teib_entry_get_peer (ne);
- adj_nbr_walk_nh (teib_entry_get_sw_if_index (ne),
- (AF_IP4 == ip_addr_version (nh) ?
- FIB_PROTOCOL_IP4 :
- FIB_PROTOCOL_IP6),
- &ip_addr_46 (nh), mgre_mk_complete_walk, &ctx);
-}
-
-static void
-gre_teib_entry_deleted (const teib_entry_t * ne)
-{
- gre_main_t *gm = &gre_main;
- const ip_address_t *nh;
- gre_tunnel_key_t key;
- gre_tunnel_t *t;
- u32 sw_if_index;
- u32 t_idx;
-
- sw_if_index = teib_entry_get_sw_if_index (ne);
- if (vec_len (gm->tunnel_index_by_sw_if_index) < sw_if_index)
- return;
-
- t_idx = gm->tunnel_index_by_sw_if_index[sw_if_index];
-
- if (INDEX_INVALID == t_idx)
- return;
-
- t = pool_elt_at_index (gm->tunnels, t_idx);
-
- /* remove the next-hop as an ingress lookup key */
- gre_teib_mk_key (t, ne, &key);
- gre_tunnel_db_remove (t, &key);
-
- nh = teib_entry_get_peer (ne);
-
- /* make all the adjacencies incomplete */
- adj_nbr_walk_nh (teib_entry_get_sw_if_index (ne),
- (AF_IP4 == ip_addr_version (nh) ?
- FIB_PROTOCOL_IP4 :
- FIB_PROTOCOL_IP6),
- &ip_addr_46 (nh), mgre_mk_incomplete_walk, t);
-}
-
-static walk_rc_t
-gre_tunnel_delete_teib_walk (index_t nei, void *ctx)
-{
- gre_tunnel_t *t = ctx;
- gre_tunnel_key_t key;
-
- gre_teib_mk_key (t, teib_entry_get (nei), &key);
- gre_tunnel_db_remove (t, &key);
-
- return (WALK_CONTINUE);
-}
-
-static walk_rc_t
-gre_tunnel_add_teib_walk (index_t nei, void *ctx)
-{
- gre_tunnel_t *t = ctx;
- gre_tunnel_key_t key = {};
-
- gre_teib_mk_key (t, teib_entry_get (nei), &key);
- gre_tunnel_db_add (t, &key);
-
- return (WALK_CONTINUE);
-}
-
-static int
-vnet_gre_tunnel_add (vnet_gre_tunnel_add_del_args_t * a,
- u32 outer_fib_index, u32 * sw_if_indexp)
-{
- gre_main_t *gm = &gre_main;
- vnet_main_t *vnm = gm->vnet_main;
- gre_tunnel_t *t;
- vnet_hw_interface_t *hi;
- u32 hw_if_index, sw_if_index;
- u8 is_ipv6 = a->is_ipv6;
- gre_tunnel_key_t key;
-
- t = gre_tunnel_db_find (a, outer_fib_index, &key);
- if (NULL != t)
- return VNET_API_ERROR_IF_ALREADY_EXISTS;
-
- pool_get_aligned (gm->tunnels, t, CLIB_CACHE_LINE_BYTES);
- clib_memset (t, 0, sizeof (*t));
-
- /* Reconcile the real dev_instance and a possible requested instance */
- u32 t_idx = t - gm->tunnels; /* tunnel index (or instance) */
- u32 u_idx = a->instance; /* user specified instance */
- if (u_idx == ~0)
- u_idx = t_idx;
- if (hash_get (gm->instance_used, u_idx))
- {
- pool_put (gm->tunnels, t);
- return VNET_API_ERROR_INSTANCE_IN_USE;
- }
- hash_set (gm->instance_used, u_idx, 1);
-
- t->dev_instance = t_idx; /* actual */
- t->user_instance = u_idx; /* name */
-
- t->type = a->type;
- t->mode = a->mode;
- t->flags = a->flags;
- if (t->type == GRE_TUNNEL_TYPE_ERSPAN)
- t->session_id = a->session_id;
-
- if (t->type == GRE_TUNNEL_TYPE_L3)
- {
- if (t->mode == TUNNEL_MODE_P2P)
- hw_if_index =
- vnet_register_interface (vnm, gre_device_class.index, t_idx,
- gre_hw_interface_class.index, t_idx);
- else
- hw_if_index =
- vnet_register_interface (vnm, gre_device_class.index, t_idx,
- mgre_hw_interface_class.index, t_idx);
- }
- else
- {
- vnet_eth_interface_registration_t eir = {};
-
- /* Default MAC address (d00b:eed0:0000 + sw_if_index) */
- u8 address[6] =
- { 0xd0, 0x0b, 0xee, 0xd0, (u8) (t_idx >> 8), (u8) t_idx };
-
- eir.dev_class_index = gre_device_class.index;
- eir.dev_instance = t_idx;
- eir.address = address;
- hw_if_index = vnet_eth_register_interface (vnm, &eir);
- }
-
- /* Set GRE tunnel interface output node (not used for L3 payload) */
- if (GRE_TUNNEL_TYPE_ERSPAN == t->type)
- vnet_set_interface_output_node (vnm, hw_if_index,
- gre_erspan_encap_node.index);
- else
- vnet_set_interface_output_node (vnm, hw_if_index,
- gre_teb_encap_node.index);
-
- hi = vnet_get_hw_interface (vnm, hw_if_index);
- sw_if_index = hi->sw_if_index;
-
- t->hw_if_index = hw_if_index;
- t->outer_fib_index = outer_fib_index;
- t->sw_if_index = sw_if_index;
- t->l2_adj_index = ADJ_INDEX_INVALID;
-
- vec_validate_init_empty (gm->tunnel_index_by_sw_if_index, sw_if_index, ~0);
- gm->tunnel_index_by_sw_if_index[sw_if_index] = t_idx;
-
- if (!is_ipv6)
- {
- hi->frame_overhead = sizeof (gre_header_t) + sizeof (ip4_header_t);
- hi->min_frame_size = hi->frame_overhead + 64;
- }
- else
- {
- hi->frame_overhead = sizeof (gre_header_t) + sizeof (ip6_header_t);
- hi->min_frame_size = hi->frame_overhead + 64;
- }
-
- /* Standard default gre MTU. */
- vnet_sw_interface_set_mtu (vnm, sw_if_index, 9000);
-
- /*
- * source the FIB entry for the tunnel's destination
- * and become a child thereof. The tunnel will then get poked
- * when the forwarding for the entry updates, and the tunnel can
- * re-stack accordingly
- */
-
- clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src));
- t->tunnel_dst.fp_len = !is_ipv6 ? 32 : 128;
- t->tunnel_dst.fp_proto = !is_ipv6 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
- t->tunnel_dst.fp_addr = a->dst;
-
- gre_tunnel_db_add (t, &key);
-
- if (t->mode == TUNNEL_MODE_MP)
- teib_walk_itf (t->sw_if_index, gre_tunnel_add_teib_walk, t);
-
- if (t->type == GRE_TUNNEL_TYPE_ERSPAN)
- {
- gre_sn_key_t skey;
- gre_sn_t *gre_sn;
-
- gre_mk_sn_key (t, &skey);
- gre_sn = (gre_sn_t *) hash_get_mem (gm->seq_num_by_key, &skey);
- if (gre_sn != NULL)
- {
- gre_sn->ref_count++;
- t->gre_sn = gre_sn;
- }
- else
- {
- gre_sn = clib_mem_alloc (sizeof (gre_sn_t));
- gre_sn->seq_num = 0;
- gre_sn->ref_count = 1;
- t->gre_sn = gre_sn;
- hash_set_mem_alloc (&gm->seq_num_by_key, &skey, (uword) gre_sn);
- }
- }
-
- if (t->type != GRE_TUNNEL_TYPE_L3)
- {
- t->l2_adj_index = adj_nbr_add_or_lock
- (t->tunnel_dst.fp_proto, VNET_LINK_ETHERNET, &zero_addr, sw_if_index);
- vnet_set_interface_l3_output_node (gm->vlib_main, sw_if_index,
- (u8 *) "tunnel-output-no-count");
- gre_update_adj (vnm, t->sw_if_index, t->l2_adj_index);
- }
- else
- {
- vnet_set_interface_l3_output_node (gm->vlib_main, sw_if_index,
- (u8 *) "tunnel-output");
- }
- if (sw_if_indexp)
- *sw_if_indexp = sw_if_index;
-
- /* register gre46-input nodes */
- ip4_register_protocol (IP_PROTOCOL_GRE, gre4_input_node.index);
- ip6_register_protocol (IP_PROTOCOL_GRE, gre6_input_node.index);
-
- return 0;
-}
-
-static int
-vnet_gre_tunnel_delete (vnet_gre_tunnel_add_del_args_t * a,
- u32 outer_fib_index, u32 * sw_if_indexp)
-{
- gre_main_t *gm = &gre_main;
- vnet_main_t *vnm = gm->vnet_main;
- gre_tunnel_t *t;
- gre_tunnel_key_t key;
- u32 sw_if_index;
-
- t = gre_tunnel_db_find (a, outer_fib_index, &key);
- if (NULL == t)
- return VNET_API_ERROR_NO_SUCH_ENTRY;
-
- if (t->mode == TUNNEL_MODE_MP)
- teib_walk_itf (t->sw_if_index, gre_tunnel_delete_teib_walk, t);
-
- sw_if_index = t->sw_if_index;
- vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */ );
-
- /* make sure tunnel is removed from l2 bd or xconnect */
- set_int_l2_mode (gm->vlib_main, vnm, MODE_L3, sw_if_index, 0,
- L2_BD_PORT_TYPE_NORMAL, 0, 0);
- gm->tunnel_index_by_sw_if_index[sw_if_index] = ~0;
-
- if (t->type == GRE_TUNNEL_TYPE_L3)
- vnet_delete_hw_interface (vnm, t->hw_if_index);
- else
- ethernet_delete_interface (vnm, t->hw_if_index);
-
- if (t->l2_adj_index != ADJ_INDEX_INVALID)
- {
- adj_midchain_delegate_unstack (t->l2_adj_index);
- adj_unlock (t->l2_adj_index);
- }
-
- ASSERT ((t->type != GRE_TUNNEL_TYPE_ERSPAN) || (t->gre_sn != NULL));
- if ((t->type == GRE_TUNNEL_TYPE_ERSPAN) && (t->gre_sn->ref_count-- == 1))
- {
- gre_sn_key_t skey;
- gre_mk_sn_key (t, &skey);
- hash_unset_mem_free (&gm->seq_num_by_key, &skey);
- clib_mem_free (t->gre_sn);
- }
-
- vnet_reset_interface_l3_output_node (gm->vlib_main, sw_if_index);
- hash_unset (gm->instance_used, t->user_instance);
- gre_tunnel_db_remove (t, &key);
- pool_put (gm->tunnels, t);
-
- if (sw_if_indexp)
- *sw_if_indexp = sw_if_index;
-
- return 0;
-}
-
-int
-vnet_gre_tunnel_add_del (vnet_gre_tunnel_add_del_args_t * a,
- u32 * sw_if_indexp)
-{
- u32 outer_fib_index;
-
- outer_fib_index = fib_table_find ((a->is_ipv6 ?
- FIB_PROTOCOL_IP6 :
- FIB_PROTOCOL_IP4), a->outer_table_id);
-
- if (~0 == outer_fib_index)
- return VNET_API_ERROR_NO_SUCH_FIB;
-
- if (a->session_id > GTK_SESSION_ID_MAX)
- return VNET_API_ERROR_INVALID_SESSION_ID;
-
- if (a->mode == TUNNEL_MODE_MP && !ip46_address_is_zero (&a->dst))
- return (VNET_API_ERROR_INVALID_DST_ADDRESS);
-
- if (a->is_add)
- return (vnet_gre_tunnel_add (a, outer_fib_index, sw_if_indexp));
- else
- return (vnet_gre_tunnel_delete (a, outer_fib_index, sw_if_indexp));
-}
-
-clib_error_t *
-gre_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
-{
- gre_main_t *gm = &gre_main;
- vnet_hw_interface_t *hi;
- gre_tunnel_t *t;
- u32 ti;
-
- hi = vnet_get_hw_interface (vnm, hw_if_index);
-
- if (NULL == gm->tunnel_index_by_sw_if_index ||
- hi->sw_if_index >= vec_len (gm->tunnel_index_by_sw_if_index))
- return (NULL);
-
- ti = gm->tunnel_index_by_sw_if_index[hi->sw_if_index];
-
- if (~0 == ti)
- /* not one of ours */
- return (NULL);
-
- t = pool_elt_at_index (gm->tunnels, ti);
-
- if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
- vnet_hw_interface_set_flags (vnm, hw_if_index,
- VNET_HW_INTERFACE_FLAG_LINK_UP);
- else
- vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */ );
-
- gre_tunnel_restack (t);
-
- return /* no error */ 0;
-}
-
-static clib_error_t *
-create_gre_tunnel_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- vnet_gre_tunnel_add_del_args_t _a, *a = &_a;
- ip46_address_t src = ip46_address_initializer, dst =
- ip46_address_initializer;
- u32 instance = ~0;
- u32 outer_table_id = 0;
- gre_tunnel_type_t t_type = GRE_TUNNEL_TYPE_L3;
- tunnel_mode_t t_mode = TUNNEL_MODE_P2P;
- tunnel_encap_decap_flags_t flags = TUNNEL_ENCAP_DECAP_FLAG_NONE;
- u32 session_id = 0;
- int rv;
- u8 is_add = 1;
- u32 sw_if_index;
- clib_error_t *error = NULL;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "del"))
- is_add = 0;
- else if (unformat (line_input, "instance %d", &instance))
- ;
- else if (unformat (line_input, "src %U", unformat_ip46_address, &src))
- ;
- else if (unformat (line_input, "dst %U", unformat_ip46_address, &dst))
- ;
- else if (unformat (line_input, "outer-table-id %d", &outer_table_id))
- ;
- else if (unformat (line_input, "multipoint"))
- t_mode = TUNNEL_MODE_MP;
- else if (unformat (line_input, "teb"))
- t_type = GRE_TUNNEL_TYPE_TEB;
- else if (unformat (line_input, "erspan %d", &session_id))
- t_type = GRE_TUNNEL_TYPE_ERSPAN;
- else
- if (unformat
- (line_input, "flags %U", unformat_tunnel_encap_decap_flags,
- &flags))
- ;
- else
- {
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
- goto done;
- }
- }
-
- if (ip46_address_is_equal (&src, &dst))
- {
- error = clib_error_return (0, "src and dst are identical");
- goto done;
- }
-
- if (t_mode != TUNNEL_MODE_MP && ip46_address_is_zero (&dst))
- {
- error = clib_error_return (0, "destination address not specified");
- goto done;
- }
-
- if (ip46_address_is_zero (&src))
- {
- error = clib_error_return (0, "source address not specified");
- goto done;
- }
-
- if (ip46_address_is_ip4 (&src) != ip46_address_is_ip4 (&dst))
- {
- error =
- clib_error_return (0, "src and dst address must be the same AF");
- goto done;
- }
-
- clib_memset (a, 0, sizeof (*a));
- a->is_add = is_add;
- a->outer_table_id = outer_table_id;
- a->type = t_type;
- a->mode = t_mode;
- a->session_id = session_id;
- a->is_ipv6 = !ip46_address_is_ip4 (&src);
- a->instance = instance;
- a->flags = flags;
- clib_memcpy (&a->src, &src, sizeof (a->src));
- clib_memcpy (&a->dst, &dst, sizeof (a->dst));
-
- rv = vnet_gre_tunnel_add_del (a, &sw_if_index);
-
- switch (rv)
- {
- case 0:
- vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
- vnet_get_main (), sw_if_index);
- break;
- case VNET_API_ERROR_IF_ALREADY_EXISTS:
- error = clib_error_return (0, "GRE tunnel already exists...");
- goto done;
- case VNET_API_ERROR_NO_SUCH_FIB:
- error = clib_error_return (0, "outer table ID %d doesn't exist\n",
- outer_table_id);
- goto done;
- case VNET_API_ERROR_NO_SUCH_ENTRY:
- error = clib_error_return (0, "GRE tunnel doesn't exist");
- goto done;
- case VNET_API_ERROR_INVALID_SESSION_ID:
- error = clib_error_return (0, "session ID %d out of range\n",
- session_id);
- goto done;
- case VNET_API_ERROR_INSTANCE_IN_USE:
- error = clib_error_return (0, "Instance is in use");
- goto done;
- default:
- error =
- clib_error_return (0, "vnet_gre_tunnel_add_del returned %d", rv);
- goto done;
- }
-
-done:
- unformat_free (line_input);
-
- return error;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (create_gre_tunnel_command, static) = {
- .path = "create gre tunnel",
- .short_help = "create gre tunnel src <addr> dst <addr> [instance <n>] "
- "[outer-fib-id <fib>] [teb | erspan <session-id>] [del] "
- "[multipoint]",
- .function = create_gre_tunnel_command_fn,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-show_gre_tunnel_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- gre_main_t *gm = &gre_main;
- gre_tunnel_t *t;
- u32 ti = ~0;
-
- if (pool_elts (gm->tunnels) == 0)
- vlib_cli_output (vm, "No GRE tunnels configured...");
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "%d", &ti))
- ;
- else
- break;
- }
-
- if (~0 == ti)
- {
- /* *INDENT-OFF* */
- pool_foreach (t, gm->tunnels)
- {
- vlib_cli_output (vm, "%U", format_gre_tunnel, t);
- }
- /* *INDENT-ON* */
- }
- else
- {
- t = pool_elt_at_index (gm->tunnels, ti);
-
- vlib_cli_output (vm, "%U", format_gre_tunnel, t);
- }
-
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (show_gre_tunnel_command, static) = {
- .path = "show gre tunnel",
- .function = show_gre_tunnel_command_fn,
-};
-/* *INDENT-ON* */
-
-const static teib_vft_t gre_teib_vft = {
- .nv_added = gre_teib_entry_added,
- .nv_deleted = gre_teib_entry_deleted,
-};
-
-/* force inclusion from application's main.c */
-clib_error_t *
-gre_interface_init (vlib_main_t * vm)
-{
- teib_register (&gre_teib_vft);
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (gre_interface_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/gre/node.c b/src/vnet/gre/node.c
deleted file mode 100644
index fdd3118bf3c..00000000000
--- a/src/vnet/gre/node.c
+++ /dev/null
@@ -1,598 +0,0 @@
-/*
- * node.c: gre packet processing
- *
- * Copyright (c) 2012 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vlib/vlib.h>
-#include <vnet/pg/pg.h>
-#include <vnet/gre/gre.h>
-#include <vnet/mpls/mpls.h>
-#include <vppinfra/sparse_vec.h>
-
-#define foreach_gre_input_next \
-_(PUNT, "error-punt") \
-_(DROP, "error-drop") \
-_(ETHERNET_INPUT, "ethernet-input") \
-_(IP4_INPUT, "ip4-input") \
-_(IP6_INPUT, "ip6-input") \
-_(MPLS_INPUT, "mpls-input")
-
-typedef enum
-{
-#define _(s,n) GRE_INPUT_NEXT_##s,
- foreach_gre_input_next
-#undef _
- GRE_INPUT_N_NEXT,
-} gre_input_next_t;
-
-typedef struct
-{
- u32 tunnel_id;
- u32 length;
- ip46_address_t src;
- ip46_address_t dst;
-} gre_rx_trace_t;
-
-extern u8 *format_gre_rx_trace (u8 * s, va_list * args);
-
-#ifndef CLIB_MARCH_VARIANT
-u8 *
-format_gre_rx_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- gre_rx_trace_t *t = va_arg (*args, gre_rx_trace_t *);
-
- s = format (s, "GRE: tunnel %d len %d src %U dst %U",
- t->tunnel_id, clib_net_to_host_u16 (t->length),
- format_ip46_address, &t->src, IP46_TYPE_ANY,
- format_ip46_address, &t->dst, IP46_TYPE_ANY);
- return s;
-}
-#endif /* CLIB_MARCH_VARIANT */
-
-typedef struct
-{
- /* Sparse vector mapping gre protocol in network byte order
- to next index. */
- u16 *next_by_protocol;
-} gre_input_runtime_t;
-
-always_inline void
-gre_trace (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t * b,
- u32 tun_sw_if_index, const ip6_header_t * ip6,
- const ip4_header_t * ip4, int is_ipv6)
-{
- gre_rx_trace_t *tr = vlib_add_trace (vm, node,
- b, sizeof (*tr));
- tr->tunnel_id = tun_sw_if_index;
- if (is_ipv6)
- {
- tr->length = ip6->payload_length;
- tr->src.ip6.as_u64[0] = ip6->src_address.as_u64[0];
- tr->src.ip6.as_u64[1] = ip6->src_address.as_u64[1];
- tr->dst.ip6.as_u64[0] = ip6->dst_address.as_u64[0];
- tr->dst.ip6.as_u64[1] = ip6->dst_address.as_u64[1];
- }
- else
- {
- tr->length = ip4->length;
- tr->src.as_u64[0] = tr->src.as_u64[1] = 0;
- tr->dst.as_u64[0] = tr->dst.as_u64[1] = 0;
- tr->src.ip4.as_u32 = ip4->src_address.as_u32;
- tr->dst.ip4.as_u32 = ip4->dst_address.as_u32;
- }
-}
-
-always_inline void
-gre_tunnel_get (const gre_main_t * gm, vlib_node_runtime_t * node,
- vlib_buffer_t * b, u16 * next, const gre_tunnel_key_t * key,
- gre_tunnel_key_t * cached_key, u32 * tun_sw_if_index,
- u32 * cached_tun_sw_if_index, int is_ipv6)
-{
- const uword *p;
- p = is_ipv6 ? hash_get_mem (gm->tunnel_by_key6, &key->gtk_v6)
- : hash_get_mem (gm->tunnel_by_key4, &key->gtk_v4);
- if (PREDICT_FALSE (!p))
- {
- *next = GRE_INPUT_NEXT_DROP;
- b->error = node->errors[GRE_ERROR_NO_SUCH_TUNNEL];
- *tun_sw_if_index = ~0;
- }
- else
- {
- const gre_tunnel_t *tun;
- tun = pool_elt_at_index (gm->tunnels, *p);
- *cached_tun_sw_if_index = *tun_sw_if_index = tun->sw_if_index;
- if (is_ipv6)
- cached_key->gtk_v6 = key->gtk_v6;
- else
- cached_key->gtk_v4 = key->gtk_v4;
- }
-}
-
-always_inline uword
-gre_input (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame,
- const int is_ipv6)
-{
- gre_main_t *gm = &gre_main;
- u32 *from, n_left_from;
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
- u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
- u16 cached_protocol = ~0;
- u32 cached_next_index = SPARSE_VEC_INVALID_INDEX;
- u32 cached_tun_sw_if_index = ~0;
- gre_tunnel_key_t cached_key;
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- vlib_get_buffers (vm, from, bufs, n_left_from);
-
- if (is_ipv6)
- clib_memset (&cached_key.gtk_v6, 0xff, sizeof (cached_key.gtk_v6));
- else
- clib_memset (&cached_key.gtk_v4, 0xff, sizeof (cached_key.gtk_v4));
-
- while (n_left_from >= 2)
- {
- const ip6_header_t *ip6[2];
- const ip4_header_t *ip4[2];
- const gre_header_t *gre[2];
- u32 nidx[2];
- next_info_t ni[2];
- u8 type[2];
- u16 version[2];
- u32 len[2];
- gre_tunnel_key_t key[2];
- u8 matched[2];
- u32 tun_sw_if_index[2];
-
- if (PREDICT_TRUE (n_left_from >= 6))
- {
- vlib_prefetch_buffer_data (b[2], LOAD);
- vlib_prefetch_buffer_data (b[3], LOAD);
- vlib_prefetch_buffer_header (b[4], STORE);
- vlib_prefetch_buffer_header (b[5], STORE);
- }
-
- if (is_ipv6)
- {
- /* ip6_local hands us the ip header, not the gre header */
- ip6[0] = vlib_buffer_get_current (b[0]);
- ip6[1] = vlib_buffer_get_current (b[1]);
- gre[0] = (void *) (ip6[0] + 1);
- gre[1] = (void *) (ip6[1] + 1);
- vlib_buffer_advance (b[0], sizeof (*ip6[0]) + sizeof (*gre[0]));
- vlib_buffer_advance (b[1], sizeof (*ip6[0]) + sizeof (*gre[0]));
- }
- else
- {
- /* ip4_local hands us the ip header, not the gre header */
- ip4[0] = vlib_buffer_get_current (b[0]);
- ip4[1] = vlib_buffer_get_current (b[1]);
- gre[0] = (void *) (ip4[0] + 1);
- gre[1] = (void *) (ip4[1] + 1);
- vlib_buffer_advance (b[0], sizeof (*ip4[0]) + sizeof (*gre[0]));
- vlib_buffer_advance (b[1], sizeof (*ip4[0]) + sizeof (*gre[0]));
- }
-
- if (PREDICT_TRUE (cached_protocol == gre[0]->protocol))
- {
- nidx[0] = cached_next_index;
- }
- else
- {
- cached_next_index = nidx[0] =
- sparse_vec_index (gm->next_by_protocol, gre[0]->protocol);
- cached_protocol = gre[0]->protocol;
- }
- if (PREDICT_TRUE (cached_protocol == gre[1]->protocol))
- {
- nidx[1] = cached_next_index;
- }
- else
- {
- cached_next_index = nidx[1] =
- sparse_vec_index (gm->next_by_protocol, gre[1]->protocol);
- cached_protocol = gre[1]->protocol;
- }
-
- ni[0] = vec_elt (gm->next_by_protocol, nidx[0]);
- ni[1] = vec_elt (gm->next_by_protocol, nidx[1]);
- next[0] = ni[0].next_index;
- next[1] = ni[1].next_index;
- type[0] = ni[0].tunnel_type;
- type[1] = ni[1].tunnel_type;
-
- b[0]->error = nidx[0] == SPARSE_VEC_INVALID_INDEX
- ? node->errors[GRE_ERROR_UNKNOWN_PROTOCOL]
- : node->errors[GRE_ERROR_NONE];
- b[1]->error = nidx[1] == SPARSE_VEC_INVALID_INDEX
- ? node->errors[GRE_ERROR_UNKNOWN_PROTOCOL]
- : node->errors[GRE_ERROR_NONE];
-
- version[0] = clib_net_to_host_u16 (gre[0]->flags_and_version);
- version[1] = clib_net_to_host_u16 (gre[1]->flags_and_version);
- version[0] &= GRE_VERSION_MASK;
- version[1] &= GRE_VERSION_MASK;
-
- b[0]->error = version[0]
- ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] : b[0]->error;
- next[0] = version[0] ? GRE_INPUT_NEXT_DROP : next[0];
- b[1]->error = version[1]
- ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] : b[1]->error;
- next[1] = version[1] ? GRE_INPUT_NEXT_DROP : next[1];
-
- len[0] = vlib_buffer_length_in_chain (vm, b[0]);
- len[1] = vlib_buffer_length_in_chain (vm, b[1]);
-
- /* always search for P2P types in the DP */
- if (is_ipv6)
- {
- gre_mk_key6 (&ip6[0]->dst_address,
- &ip6[0]->src_address,
- vnet_buffer (b[0])->ip.fib_index,
- type[0], TUNNEL_MODE_P2P, 0, &key[0].gtk_v6);
- gre_mk_key6 (&ip6[1]->dst_address,
- &ip6[1]->src_address,
- vnet_buffer (b[1])->ip.fib_index,
- type[1], TUNNEL_MODE_P2P, 0, &key[1].gtk_v6);
- matched[0] = gre_match_key6 (&cached_key.gtk_v6, &key[0].gtk_v6);
- matched[1] = gre_match_key6 (&cached_key.gtk_v6, &key[1].gtk_v6);
- }
- else
- {
- gre_mk_key4 (ip4[0]->dst_address,
- ip4[0]->src_address,
- vnet_buffer (b[0])->ip.fib_index,
- type[0], TUNNEL_MODE_P2P, 0, &key[0].gtk_v4);
- gre_mk_key4 (ip4[1]->dst_address,
- ip4[1]->src_address,
- vnet_buffer (b[1])->ip.fib_index,
- type[1], TUNNEL_MODE_P2P, 0, &key[1].gtk_v4);
- matched[0] = gre_match_key4 (&cached_key.gtk_v4, &key[0].gtk_v4);
- matched[1] = gre_match_key4 (&cached_key.gtk_v4, &key[1].gtk_v4);
- }
-
- tun_sw_if_index[0] = cached_tun_sw_if_index;
- tun_sw_if_index[1] = cached_tun_sw_if_index;
- if (PREDICT_FALSE (!matched[0]))
- gre_tunnel_get (gm, node, b[0], &next[0], &key[0], &cached_key,
- &tun_sw_if_index[0], &cached_tun_sw_if_index,
- is_ipv6);
- if (PREDICT_FALSE (!matched[1]))
- gre_tunnel_get (gm, node, b[1], &next[1], &key[1], &cached_key,
- &tun_sw_if_index[1], &cached_tun_sw_if_index,
- is_ipv6);
-
- if (PREDICT_TRUE (next[0] > GRE_INPUT_NEXT_DROP))
- {
- vlib_increment_combined_counter (&gm->vnet_main->
- interface_main.combined_sw_if_counters
- [VNET_INTERFACE_COUNTER_RX],
- vm->thread_index,
- tun_sw_if_index[0],
- 1 /* packets */ ,
- len[0] /* bytes */ );
- vnet_buffer (b[0])->sw_if_index[VLIB_RX] = tun_sw_if_index[0];
- }
- if (PREDICT_TRUE (next[1] > GRE_INPUT_NEXT_DROP))
- {
- vlib_increment_combined_counter (&gm->vnet_main->
- interface_main.combined_sw_if_counters
- [VNET_INTERFACE_COUNTER_RX],
- vm->thread_index,
- tun_sw_if_index[1],
- 1 /* packets */ ,
- len[1] /* bytes */ );
- vnet_buffer (b[1])->sw_if_index[VLIB_RX] = tun_sw_if_index[1];
- }
-
- vnet_buffer (b[0])->sw_if_index[VLIB_TX] = (u32) ~0;
- vnet_buffer (b[1])->sw_if_index[VLIB_TX] = (u32) ~0;
-
- if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
- gre_trace (vm, node, b[0], tun_sw_if_index[0], ip6[0], ip4[0],
- is_ipv6);
- if (PREDICT_FALSE (b[1]->flags & VLIB_BUFFER_IS_TRACED))
- gre_trace (vm, node, b[1], tun_sw_if_index[1], ip6[1], ip4[1],
- is_ipv6);
-
- b += 2;
- next += 2;
- n_left_from -= 2;
- }
-
- while (n_left_from >= 1)
- {
- const ip6_header_t *ip6[1];
- const ip4_header_t *ip4[1];
- const gre_header_t *gre[1];
- u32 nidx[1];
- next_info_t ni[1];
- u8 type[1];
- u16 version[1];
- u32 len[1];
- gre_tunnel_key_t key[1];
- u8 matched[1];
- u32 tun_sw_if_index[1];
-
- if (PREDICT_TRUE (n_left_from >= 3))
- {
- vlib_prefetch_buffer_data (b[1], LOAD);
- vlib_prefetch_buffer_header (b[2], STORE);
- }
-
- if (is_ipv6)
- {
- /* ip6_local hands us the ip header, not the gre header */
- ip6[0] = vlib_buffer_get_current (b[0]);
- gre[0] = (void *) (ip6[0] + 1);
- vlib_buffer_advance (b[0], sizeof (*ip6[0]) + sizeof (*gre[0]));
- }
- else
- {
- /* ip4_local hands us the ip header, not the gre header */
- ip4[0] = vlib_buffer_get_current (b[0]);
- gre[0] = (void *) (ip4[0] + 1);
- vlib_buffer_advance (b[0], sizeof (*ip4[0]) + sizeof (*gre[0]));
- }
-
- if (PREDICT_TRUE (cached_protocol == gre[0]->protocol))
- {
- nidx[0] = cached_next_index;
- }
- else
- {
- cached_next_index = nidx[0] =
- sparse_vec_index (gm->next_by_protocol, gre[0]->protocol);
- cached_protocol = gre[0]->protocol;
- }
-
- ni[0] = vec_elt (gm->next_by_protocol, nidx[0]);
- next[0] = ni[0].next_index;
- type[0] = ni[0].tunnel_type;
-
- b[0]->error = nidx[0] == SPARSE_VEC_INVALID_INDEX
- ? node->errors[GRE_ERROR_UNKNOWN_PROTOCOL]
- : node->errors[GRE_ERROR_NONE];
-
- version[0] = clib_net_to_host_u16 (gre[0]->flags_and_version);
- version[0] &= GRE_VERSION_MASK;
-
- b[0]->error = version[0]
- ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] : b[0]->error;
- next[0] = version[0] ? GRE_INPUT_NEXT_DROP : next[0];
-
- len[0] = vlib_buffer_length_in_chain (vm, b[0]);
-
- if (is_ipv6)
- {
- gre_mk_key6 (&ip6[0]->dst_address,
- &ip6[0]->src_address,
- vnet_buffer (b[0])->ip.fib_index,
- type[0], TUNNEL_MODE_P2P, 0, &key[0].gtk_v6);
- matched[0] = gre_match_key6 (&cached_key.gtk_v6, &key[0].gtk_v6);
- }
- else
- {
- gre_mk_key4 (ip4[0]->dst_address,
- ip4[0]->src_address,
- vnet_buffer (b[0])->ip.fib_index,
- type[0], TUNNEL_MODE_P2P, 0, &key[0].gtk_v4);
- matched[0] = gre_match_key4 (&cached_key.gtk_v4, &key[0].gtk_v4);
- }
-
- tun_sw_if_index[0] = cached_tun_sw_if_index;
- if (PREDICT_FALSE (!matched[0]))
- gre_tunnel_get (gm, node, b[0], &next[0], &key[0], &cached_key,
- &tun_sw_if_index[0], &cached_tun_sw_if_index,
- is_ipv6);
-
- if (PREDICT_TRUE (next[0] > GRE_INPUT_NEXT_DROP))
- {
- vlib_increment_combined_counter (&gm->vnet_main->
- interface_main.combined_sw_if_counters
- [VNET_INTERFACE_COUNTER_RX],
- vm->thread_index,
- tun_sw_if_index[0],
- 1 /* packets */ ,
- len[0] /* bytes */ );
- vnet_buffer (b[0])->sw_if_index[VLIB_RX] = tun_sw_if_index[0];
- }
-
- vnet_buffer (b[0])->sw_if_index[VLIB_TX] = (u32) ~0;
-
- if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
- gre_trace (vm, node, b[0], tun_sw_if_index[0], ip6[0], ip4[0],
- is_ipv6);
-
- b += 1;
- next += 1;
- n_left_from -= 1;
- }
-
- vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
-
- vlib_node_increment_counter (vm,
- is_ipv6 ? gre6_input_node.index :
- gre4_input_node.index, GRE_ERROR_PKTS_DECAP,
- n_left_from);
-
- return frame->n_vectors;
-}
-
-VLIB_NODE_FN (gre4_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return gre_input (vm, node, from_frame, /* is_ip6 */ 0);
-}
-
-VLIB_NODE_FN (gre6_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return gre_input (vm, node, from_frame, /* is_ip6 */ 1);
-}
-
-static char *gre_error_strings[] = {
-#define gre_error(n,s) s,
-#include "error.def"
-#undef gre_error
-};
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (gre4_input_node) = {
- .name = "gre4-input",
- /* Takes a vector of packets. */
- .vector_size = sizeof (u32),
-
- .n_errors = GRE_N_ERROR,
- .error_strings = gre_error_strings,
-
- .n_next_nodes = GRE_INPUT_N_NEXT,
- .next_nodes = {
-#define _(s,n) [GRE_INPUT_NEXT_##s] = n,
- foreach_gre_input_next
-#undef _
- },
-
- .format_buffer = format_gre_header_with_length,
- .format_trace = format_gre_rx_trace,
- .unformat_buffer = unformat_gre_header,
-};
-
-VLIB_REGISTER_NODE (gre6_input_node) = {
- .name = "gre6-input",
- /* Takes a vector of packets. */
- .vector_size = sizeof (u32),
-
- .runtime_data_bytes = sizeof (gre_input_runtime_t),
-
- .n_errors = GRE_N_ERROR,
- .error_strings = gre_error_strings,
-
- .n_next_nodes = GRE_INPUT_N_NEXT,
- .next_nodes = {
-#define _(s,n) [GRE_INPUT_NEXT_##s] = n,
- foreach_gre_input_next
-#undef _
- },
-
- .format_buffer = format_gre_header_with_length,
- .format_trace = format_gre_rx_trace,
- .unformat_buffer = unformat_gre_header,
-};
-/* *INDENT-ON* */
-
-#ifndef CLIB_MARCH_VARIANT
-void
-gre_register_input_protocol (vlib_main_t * vm,
- gre_protocol_t protocol, u32 node_index,
- gre_tunnel_type_t tunnel_type)
-{
- gre_main_t *em = &gre_main;
- gre_protocol_info_t *pi;
- next_info_t *n;
- u32 i;
-
- {
- clib_error_t *error = vlib_call_init_function (vm, gre_input_init);
- if (error)
- clib_error_report (error);
- }
-
- pi = gre_get_protocol_info (em, protocol);
- pi->node_index = node_index;
- pi->tunnel_type = tunnel_type;
- pi->next_index = vlib_node_add_next (vm, gre4_input_node.index, node_index);
- i = vlib_node_add_next (vm, gre6_input_node.index, node_index);
- ASSERT (i == pi->next_index);
-
- /* Setup gre protocol -> next index sparse vector mapping. */
- n = sparse_vec_validate (em->next_by_protocol,
- clib_host_to_net_u16 (protocol));
- n->next_index = pi->next_index;
- n->tunnel_type = tunnel_type;
-}
-
-static void
-gre_setup_node (vlib_main_t * vm, u32 node_index)
-{
- vlib_node_t *n = vlib_get_node (vm, node_index);
- pg_node_t *pn = pg_get_node (node_index);
-
- n->format_buffer = format_gre_header_with_length;
- n->unformat_buffer = unformat_gre_header;
- pn->unformat_edit = unformat_pg_gre_header;
-}
-
-static clib_error_t *
-gre_input_init (vlib_main_t * vm)
-{
- gre_main_t *gm = &gre_main;
- vlib_node_t *ethernet_input, *ip4_input, *ip6_input, *mpls_unicast_input;
-
- {
- clib_error_t *error;
- error = vlib_call_init_function (vm, gre_init);
- if (error)
- clib_error_report (error);
- }
-
- gre_setup_node (vm, gre4_input_node.index);
- gre_setup_node (vm, gre6_input_node.index);
-
- gm->next_by_protocol = sparse_vec_new
- ( /* elt bytes */ sizeof (gm->next_by_protocol[0]),
- /* bits in index */ BITS (((gre_header_t *) 0)->protocol));
-
- /* These could be moved to the supported protocol input node defn's */
- ethernet_input = vlib_get_node_by_name (vm, (u8 *) "ethernet-input");
- ASSERT (ethernet_input);
- ip4_input = vlib_get_node_by_name (vm, (u8 *) "ip4-input");
- ASSERT (ip4_input);
- ip6_input = vlib_get_node_by_name (vm, (u8 *) "ip6-input");
- ASSERT (ip6_input);
- mpls_unicast_input = vlib_get_node_by_name (vm, (u8 *) "mpls-input");
- ASSERT (mpls_unicast_input);
-
- gre_register_input_protocol (vm, GRE_PROTOCOL_teb,
- ethernet_input->index, GRE_TUNNEL_TYPE_TEB);
-
- gre_register_input_protocol (vm, GRE_PROTOCOL_ip4,
- ip4_input->index, GRE_TUNNEL_TYPE_L3);
-
- gre_register_input_protocol (vm, GRE_PROTOCOL_ip6,
- ip6_input->index, GRE_TUNNEL_TYPE_L3);
-
- gre_register_input_protocol (vm, GRE_PROTOCOL_mpls_unicast,
- mpls_unicast_input->index, GRE_TUNNEL_TYPE_L3);
-
- return 0;
-}
-
-VLIB_INIT_FUNCTION (gre_input_init);
-
-#endif /* CLIB_MARCH_VARIANT */
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/gre/packet.h b/src/vnet/gre/packet.h
index bbd67d565c5..bbda2df3f68 100644
--- a/src/vnet/gre/packet.h
+++ b/src/vnet/gre/packet.h
@@ -138,7 +138,6 @@ typedef struct
This field is platform dependent.
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u32 seq_num;
union
@@ -158,7 +157,6 @@ typedef CLIB_PACKED (struct {
erspan_t2_t erspan;
}) erspan_t2_header_t;
-/* *INDENT-ON* */
/* u64 template for ERSPAN type 2 header with both EN bits set */
#define ERSPAN_HDR2 0x1000180000000000ul
diff --git a/src/vnet/gre/pg.c b/src/vnet/gre/pg.c
deleted file mode 100644
index 38a3a07ebad..00000000000
--- a/src/vnet/gre/pg.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * hdlc_pg.c: packet generator gre interface
- *
- * Copyright (c) 2012 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vlib/vlib.h>
-#include <vnet/pg/pg.h>
-#include <vnet/gre/gre.h>
-
-typedef struct
-{
- pg_edit_t flags_and_version;
- pg_edit_t protocol;
-} pg_gre_header_t;
-
-static inline void
-pg_gre_header_init (pg_gre_header_t * e)
-{
- pg_edit_init (&e->flags_and_version, gre_header_t, flags_and_version);
- pg_edit_init (&e->protocol, gre_header_t, protocol);
-}
-
-uword
-unformat_pg_gre_header (unformat_input_t * input, va_list * args)
-{
- pg_stream_t *s = va_arg (*args, pg_stream_t *);
- pg_gre_header_t *h;
- u32 group_index, error;
-
- h = pg_create_edit_group (s, sizeof (h[0]), sizeof (gre_header_t),
- &group_index);
- pg_gre_header_init (h);
-
- pg_edit_set_fixed (&h->flags_and_version, 0);
-
- error = 1;
- if (!unformat (input, "%U",
- unformat_pg_edit,
- unformat_gre_protocol_net_byte_order, &h->protocol))
- goto done;
-
- {
- gre_main_t *pm = &gre_main;
- gre_protocol_info_t *pi = 0;
- pg_node_t *pg_node = 0;
-
- if (h->protocol.type == PG_EDIT_FIXED)
- {
- u16 t = *(u16 *) h->protocol.values[PG_EDIT_LO];
- pi = gre_get_protocol_info (pm, clib_net_to_host_u16 (t));
- if (pi && pi->node_index != ~0)
- pg_node = pg_get_node (pi->node_index);
- }
-
- if (pg_node && pg_node->unformat_edit
- && unformat_user (input, pg_node->unformat_edit, s))
- ;
- }
-
- error = 0;
-done:
- if (error)
- pg_free_edit_group (s);
- return error == 0;
-}
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/gso/FEATURE.yaml b/src/vnet/gso/FEATURE.yaml
index d3db0cc23e3..5f6275caca2 100644
--- a/src/vnet/gso/FEATURE.yaml
+++ b/src/vnet/gso/FEATURE.yaml
@@ -1,6 +1,6 @@
---
name: VNET GSO
-maintainer: ayourtch@gmail.com sykazmi@cisco.com
+maintainer: ayourtch@gmail.com mohsin.kazmi14@gmail.com
features:
- Basic GSO support
- GSO for VLAN tagged packets
diff --git a/src/vnet/gso/cli.c b/src/vnet/gso/cli.c
index 060ce812fad..11dbaad728f 100644
--- a/src/vnet/gso/cli.c
+++ b/src/vnet/gso/cli.c
@@ -76,13 +76,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_feature_gso_command, static) = {
.path = "set interface feature gso",
.short_help = "set interface feature gso <intfc> [enable | disable]",
.function = set_interface_feature_gso_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/gso/gro_func.h b/src/vnet/gso/gro_func.h
index c7649318c43..e2e4e93850b 100644
--- a/src/vnet/gso/gro_func.h
+++ b/src/vnet/gso/gro_func.h
@@ -384,6 +384,7 @@ gro_fixup_header (vlib_main_t *vm, vlib_buffer_t *b0, u32 ack_number, u8 is_l2)
1 /* is_ip6 */ );
vnet_buffer2 (b0)->gso_size = b0->current_length - gho0.hdr_sz;
+ vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
if (gho0.gho_flags & GHO_F_IP4)
{
@@ -392,6 +393,7 @@ gro_fixup_header (vlib_main_t *vm, vlib_buffer_t *b0, u32 ack_number, u8 is_l2)
ip4->length =
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
gho0.l3_hdr_offset);
+ vnet_buffer (b0)->l3_hdr_offset = (u8 *) ip4 - b0->data;
b0->flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP4);
vnet_buffer_offload_flags_set (b0, (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM |
VNET_BUFFER_OFFLOAD_F_IP_CKSUM));
@@ -403,12 +405,15 @@ gro_fixup_header (vlib_main_t *vm, vlib_buffer_t *b0, u32 ack_number, u8 is_l2)
ip6->payload_length =
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
gho0.l4_hdr_offset);
+ vnet_buffer (b0)->l3_hdr_offset = (u8 *) ip6 - b0->data;
b0->flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP6);
vnet_buffer_offload_flags_set (b0, VNET_BUFFER_OFFLOAD_F_TCP_CKSUM);
}
tcp_header_t *tcp0 =
(tcp_header_t *) (vlib_buffer_get_current (b0) + gho0.l4_hdr_offset);
+ vnet_buffer (b0)->l4_hdr_offset = (u8 *) tcp0 - b0->data;
+ vnet_buffer2 (b0)->gso_l4_hdr_sz = tcp_header_bytes (tcp0);
tcp0->ack_number = ack_number;
b0->flags &= ~VLIB_BUFFER_IS_TRACED;
}
diff --git a/src/vnet/gso/gso.h b/src/vnet/gso/gso.h
index 041fab3bcc4..dee5da5c70b 100644
--- a/src/vnet/gso/gso.h
+++ b/src/vnet/gso/gso.h
@@ -39,13 +39,13 @@ gso_init_bufs_from_template_base (vlib_buffer_t **bufs, vlib_buffer_t *b0,
u32 flags, u16 n_bufs, u16 hdr_sz)
{
u32 i = n_bufs;
- while (i >= 4)
+ while (i >= 6)
{
/* prefetches */
CLIB_PREFETCH (bufs[2], 2 * CLIB_CACHE_LINE_BYTES, LOAD);
CLIB_PREFETCH (bufs[3], 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- vlib_prefetch_buffer_data (bufs[2], LOAD);
- vlib_prefetch_buffer_data (bufs[3], LOAD);
+ vlib_prefetch_buffer_data (bufs[4], LOAD);
+ vlib_prefetch_buffer_data (bufs[5], LOAD);
/* copying objects from cacheline 0 */
bufs[0]->current_data = 0;
@@ -70,10 +70,26 @@ gso_init_bufs_from_template_base (vlib_buffer_t **bufs, vlib_buffer_t *b0,
bufs[0]->total_length_not_including_first_buffer = 0;
bufs[1]->total_length_not_including_first_buffer = 0;
+ clib_memcpy_fast (&bufs[0]->opaque2, &b0->opaque2, sizeof (b0->opaque2));
+ clib_memcpy_fast (&bufs[1]->opaque2, &b0->opaque2, sizeof (b0->opaque2));
+
/* copying data */
clib_memcpy_fast (bufs[0]->data, vlib_buffer_get_current (b0), hdr_sz);
clib_memcpy_fast (bufs[1]->data, vlib_buffer_get_current (b0), hdr_sz);
+ /* header offset fixup */
+ vnet_buffer (bufs[0])->l2_hdr_offset -= b0->current_data;
+ vnet_buffer (bufs[0])->l3_hdr_offset -= b0->current_data;
+ vnet_buffer (bufs[0])->l4_hdr_offset -= b0->current_data;
+ vnet_buffer2 (bufs[0])->outer_l3_hdr_offset -= b0->current_data;
+ vnet_buffer2 (bufs[0])->outer_l4_hdr_offset -= b0->current_data;
+
+ vnet_buffer (bufs[1])->l2_hdr_offset -= b0->current_data;
+ vnet_buffer (bufs[1])->l3_hdr_offset -= b0->current_data;
+ vnet_buffer (bufs[1])->l4_hdr_offset -= b0->current_data;
+ vnet_buffer2 (bufs[1])->outer_l3_hdr_offset -= b0->current_data;
+ vnet_buffer2 (bufs[1])->outer_l4_hdr_offset -= b0->current_data;
+
bufs += 2;
i -= 2;
}
@@ -92,10 +108,18 @@ gso_init_bufs_from_template_base (vlib_buffer_t **bufs, vlib_buffer_t *b0,
/* copying objects from cacheline 1 */
bufs[0]->trace_handle = b0->trace_handle;
bufs[0]->total_length_not_including_first_buffer = 0;
+ clib_memcpy_fast (&bufs[0]->opaque2, &b0->opaque2, sizeof (b0->opaque2));
/* copying data */
clib_memcpy_fast (bufs[0]->data, vlib_buffer_get_current (b0), hdr_sz);
+ /* header offset fixup */
+ vnet_buffer (bufs[0])->l2_hdr_offset -= b0->current_data;
+ vnet_buffer (bufs[0])->l3_hdr_offset -= b0->current_data;
+ vnet_buffer (bufs[0])->l4_hdr_offset -= b0->current_data;
+ vnet_buffer2 (bufs[0])->outer_l3_hdr_offset -= b0->current_data;
+ vnet_buffer2 (bufs[0])->outer_l4_hdr_offset -= b0->current_data;
+
bufs++;
i--;
}
@@ -103,27 +127,41 @@ gso_init_bufs_from_template_base (vlib_buffer_t **bufs, vlib_buffer_t *b0,
static_always_inline void
gso_fixup_segmented_buf (vlib_main_t *vm, vlib_buffer_t *b0, u32 next_tcp_seq,
- int is_l2, int is_ip6, generic_header_offset_t *gho,
- clib_ip_csum_t *c, u8 tcp_flags)
+ int is_l2, u8 oflags, u16 hdr_sz, u16 l4_hdr_sz,
+ clib_ip_csum_t *c, u8 tcp_flags, u8 is_prefetch,
+ vlib_buffer_t *b1)
{
- ip4_header_t *ip4 =
- (ip4_header_t *) (vlib_buffer_get_current (b0) + gho->l3_hdr_offset +
- gho->outer_hdr_sz);
- ip6_header_t *ip6 =
- (ip6_header_t *) (vlib_buffer_get_current (b0) + gho->l3_hdr_offset +
- gho->outer_hdr_sz);
- tcp_header_t *tcp =
- (tcp_header_t *) (vlib_buffer_get_current (b0) + gho->l4_hdr_offset +
- gho->outer_hdr_sz);
+ i16 l3_hdr_offset = vnet_buffer (b0)->l3_hdr_offset;
+ i16 l4_hdr_offset = vnet_buffer (b0)->l4_hdr_offset;
+
+ ip4_header_t *ip4 = (ip4_header_t *) (b0->data + l3_hdr_offset);
+ ip6_header_t *ip6 = (ip6_header_t *) (b0->data + l3_hdr_offset);
+ tcp_header_t *tcp = (tcp_header_t *) (b0->data + l4_hdr_offset);
tcp->flags = tcp_flags;
tcp->seq_number = clib_host_to_net_u32 (next_tcp_seq);
+ c->odd = 0;
- if (is_ip6)
+ if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
+ {
+ ip4->length =
+ clib_host_to_net_u16 (b0->current_length - hdr_sz +
+ (l4_hdr_offset - l3_hdr_offset) + l4_hdr_sz);
+ ip4->checksum = 0;
+ ip4->checksum = ip4_header_checksum (ip4);
+ vnet_buffer_offload_flags_clear (b0, (VNET_BUFFER_OFFLOAD_F_IP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TCP_CKSUM));
+ c->sum += clib_mem_unaligned (&ip4->src_address, u32);
+ c->sum += clib_mem_unaligned (&ip4->dst_address, u32);
+ c->sum += clib_host_to_net_u32 (
+ (clib_net_to_host_u16 (ip4->length) - ip4_header_bytes (ip4)) +
+ (ip4->protocol << 16));
+ }
+ else
{
- ip6->payload_length = clib_host_to_net_u16 (
- b0->current_length - gho->l4_hdr_offset - gho->outer_hdr_sz);
+ ip6->payload_length =
+ clib_host_to_net_u16 (b0->current_length - hdr_sz + l4_hdr_sz);
vnet_buffer_offload_flags_clear (b0, VNET_BUFFER_OFFLOAD_F_TCP_CKSUM);
ip6_psh_t psh = { 0 };
u32 *p = (u32 *) &psh;
@@ -134,24 +172,15 @@ gso_fixup_segmented_buf (vlib_main_t *vm, vlib_buffer_t *b0, u32 next_tcp_seq,
for (int i = 0; i < 10; i++)
c->sum += p[i];
}
- else
- {
- ip4->length = clib_host_to_net_u16 (
- b0->current_length - gho->l3_hdr_offset - gho->outer_hdr_sz);
- if (gho->gho_flags & GHO_F_IP4)
- ip4->checksum = ip4_header_checksum (ip4);
- vnet_buffer_offload_flags_clear (b0, (VNET_BUFFER_OFFLOAD_F_IP_CKSUM |
- VNET_BUFFER_OFFLOAD_F_TCP_CKSUM));
- c->sum += clib_mem_unaligned (&ip4->src_address, u32);
- c->sum += clib_mem_unaligned (&ip4->dst_address, u32);
- c->sum += clib_host_to_net_u32 (
- (clib_net_to_host_u16 (ip4->length) - ip4_header_bytes (ip4)) +
- (ip4->protocol << 16));
- }
- clib_ip_csum_chunk (c, (u8 *) tcp, gho->l4_hdr_sz);
+
+ if (is_prefetch)
+ CLIB_PREFETCH (vlib_buffer_get_current (b1) + hdr_sz,
+ CLIB_CACHE_LINE_BYTES, LOAD);
+
+ clib_ip_csum_chunk (c, (u8 *) tcp, l4_hdr_sz);
tcp->checksum = clib_ip_csum_fold (c);
- if (!is_l2 && ((gho->gho_flags & GHO_F_TUNNEL) == 0))
+ if (!is_l2 && ((oflags & VNET_BUFFER_OFFLOAD_F_TNL_MASK) == 0))
{
u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
@@ -168,16 +197,20 @@ gso_fixup_segmented_buf (vlib_main_t *vm, vlib_buffer_t *b0, u32 next_tcp_seq,
static_always_inline u32
gso_segment_buffer_inline (vlib_main_t *vm,
vnet_interface_per_thread_data_t *ptd,
- vlib_buffer_t *b, generic_header_offset_t *gho,
- int is_l2, int is_ip6)
+ vlib_buffer_t *b, int is_l2)
{
vlib_buffer_t **bufs = 0;
u32 n_tx_bytes = 0;
+
+ u8 oflags = vnet_buffer (b)->oflags;
+ i16 l4_hdr_offset = vnet_buffer (b)->l4_hdr_offset;
u16 gso_size = vnet_buffer2 (b)->gso_size;
+ u16 l4_hdr_sz = vnet_buffer2 (b)->gso_l4_hdr_sz;
+
u8 tcp_flags = 0, tcp_flags_no_fin_psh = 0;
u32 default_bflags =
b->flags & ~(VNET_BUFFER_F_GSO | VLIB_BUFFER_NEXT_PRESENT);
- u16 hdr_sz = gho->hdr_sz + gho->outer_hdr_sz;
+ u16 hdr_sz = (l4_hdr_offset - b->current_data) + l4_hdr_sz;
u32 next_tcp_seq = 0, tcp_seq = 0;
u32 data_size = vlib_buffer_length_in_chain (vm, b) - hdr_sz;
u16 size =
@@ -199,9 +232,8 @@ gso_segment_buffer_inline (vlib_main_t *vm,
vec_validate (bufs, n_bufs - 1);
vlib_get_buffers (vm, ptd->split_buffers, bufs, n_bufs);
- tcp_header_t *tcp =
- (tcp_header_t *) (vlib_buffer_get_current (b) + gho->l4_hdr_offset +
- gho->outer_hdr_sz);
+ tcp_header_t *tcp = (tcp_header_t *) (b->data + l4_hdr_offset);
+
tcp_seq = next_tcp_seq = clib_net_to_host_u32 (tcp->seq_number);
/* store original flags for last packet and reset FIN and PSH */
tcp_flags = tcp->flags;
@@ -246,11 +278,11 @@ gso_segment_buffer_inline (vlib_main_t *vm,
if (0 == dst_left && data_size)
{
vlib_prefetch_buffer_header (bufs[i + 1], LOAD);
- vlib_prefetch_buffer_data (bufs[i + 1], LOAD);
n_tx_bytes += bufs[i]->current_length;
- gso_fixup_segmented_buf (vm, bufs[i], tcp_seq, is_l2, is_ip6, gho,
- &c, tcp_flags_no_fin_psh);
+ gso_fixup_segmented_buf (vm, bufs[i], tcp_seq, is_l2, oflags, hdr_sz,
+ l4_hdr_sz, &c, tcp_flags_no_fin_psh, 1,
+ bufs[i + 1]);
i++;
dst_left = size;
dst_ptr = vlib_buffer_get_current (bufs[i]) + hdr_sz;
@@ -263,8 +295,8 @@ gso_segment_buffer_inline (vlib_main_t *vm,
ASSERT ((i + 1) == n_alloc);
n_tx_bytes += bufs[i]->current_length;
- gso_fixup_segmented_buf (vm, bufs[i], tcp_seq, is_l2, is_ip6, gho, &c,
- tcp_flags);
+ gso_fixup_segmented_buf (vm, bufs[i], tcp_seq, is_l2, oflags, hdr_sz,
+ l4_hdr_sz, &c, tcp_flags, 0, NULL);
vec_free (bufs);
return n_tx_bytes;
diff --git a/src/vnet/gso/gso.rst b/src/vnet/gso/gso.rst
new file mode 100644
index 00000000000..78788f82216
--- /dev/null
+++ b/src/vnet/gso/gso.rst
@@ -0,0 +1,154 @@
+.. _gso_doc:
+
+Generic Segmentation Offload
+============================
+
+Overview
+________
+
+Modern physical NICs provide offload capabilities to software based network
+stacks to transfer some type of the packet processing from CPU to physical
+NICs. TCP Segmentation Offload (TSO) is one among many which is provided by
+modern physical NICs. Software based network stack can offload big (up to 64KB)
+TCP packets to NIC and NIC will segment them into Maximum Segment Size packets.
+Hence network stack save CPU cycles by processing few big packets instead of
+processing many small packets.
+
+GSO is software based analogous to TSO which is used by virtual interfaces
+i.e. tap, virtio, af_packet, vhost-user etc. Typically, virtual interfaces
+provide capability to offload big packets (64KB size). But in reality, they
+just pass the packet as it is to the other end without segmenting it. Hence, it
+is necessary to validate the support of GSO offloading in whole setup otherwise
+packet will be dropped when it will be processed by virtual entity which does
+not support GSO.
+
+The GSO Infrastructure
+_______________________
+
+Software based network stacks implements GSO packet segmentation in software
+where egress interface (virtual or physical) does not support GSO or TSO
+offload. VPP implements GSO stack to provide support for software based packet
+chunking of GSO packets when egress interface does not support GSO or TSO
+offload.
+
+It is implemented as a feature node on interface-output feature arc. It
+implements support for basic GSO, GSO with VXLAN tunnel and GSO with IPIP
+tunnel. GSO with Geneve and GSO with NVGRE are not supported today. But one can
+enable GSO feature node on tunnel interfaces i.e. IPSEC etc to segment GSO
+packets before they will be tunneled.
+
+Virtual interfaces does not support GSO with tunnels. So, special care is
+needed when user configures tunnel(s) along with GSO in the setup. In such case,
+either enable GSO feature node on tunnel interface (mean chunk the GSO packets
+before they will be encapsulated in tunnel) or disable the GSO offload on the
+egress interface (only work for VXLAN tunnel and IPIP tunnel), if it is enabled,
+should work fine.
+
+Similarly, many physical interfaces does not support GSO with tunnels too. User
+can do the same configuration as it is mentioned previously for virtual
+interfaces.
+
+Data structures
+^^^^^^^^^^^^^^^
+
+VPP ``vlib_buffer_t`` uses ``VNET_BUFFER_F_GSO`` flags to mark the buffer carrying GSO
+packet and also contain metadata fields with respect to GSO:
+
+.. code:: c
+
+ i16 l2_hdr_offset;
+ i16 l3_hdr_offset;
+ i16 l4_hdr_offset;
+
+ u16 gso_size;
+ u16 gso_l4_hdr_sz;
+ i16 outer_l3_hdr_offset;
+ i16 outer_l4_hdr_offset;
+
+Packet header offsets are computed from the reference of ``vlib_buffer_t`` data
+pointer.
+
+``l2_hdr_offset``, ``l3_hdr_offset`` and ``l4_hdr_offset`` are set on input of checksum
+offload or GSO enabled interfaces or features i.e. host stack. Appropriate
+offload flags are also set to ``vnet_buffer_oflags_t`` to reflect the actual packet
+offloads which will be used later at egress interface tx node or
+interface-output node or GSO node to process the packet appropriately. These
+fields are present in 1st cache line and does not incur extra cycles as most of
+the VPP features fetch the ``vlib_buffer_t`` 1st cache line to access ``current_data``
+or ``current_length`` fields of the packet.
+
+Please note that ``gso_size``, ``gso_l4_hdr_sz``, ``outer_l3_hdr_offset`` and
+``outer_l4_hdr_offset`` are in second cache line of ``vlib_buffer_t``. Accessing them in
+data plane will incur some extra cycles but cost of these cycles will be
+amortized over (up to 64KB) packet.
+
+The ``gso_size`` and ``gso_l4_hdr_sz`` are set on input of GSO enabled interfaces (tap,
+virtio, af_packet etc) or features (vpp host stack), when we receive a GSO
+packet (a chain of buffers with the first one having ``VNET_BUFFER_F_GSO`` bit set),
+and needs to persist all the way to the interface-output, in case the egress
+interface is not GSO-enabled - then we need to perform the segmentation, and use
+these values to chunk the payload appropriately.
+
+``outer_l3_hdr_offset`` and ``outer_l4_hdr_offset`` are used in case of tunneled packet
+(i.e. VXLAN or IPIP). ``outer_l3_hdr_offset`` will point to outer l3 header of the
+tunnel headers and ``outer_l4_hdr_offset`` will point to outer l4 header of the
+tunnel headers, if any.
+
+Following are the helper functions used to set and clear the offload flags from
+``vlib_buffer_t`` metadata:
+
+.. code:: c
+
+ static_always_inline void
+ vnet_buffer_offload_flags_set (vlib_buffer_t *b, vnet_buffer_oflags_t oflags)
+ {
+ if (b->flags & VNET_BUFFER_F_OFFLOAD)
+ {
+ /* add a flag to existing offload */
+ vnet_buffer (b)->oflags |= oflags;
+ }
+ else
+ {
+ /* no offload yet: reset offload flags to new value */
+ vnet_buffer (b)->oflags = oflags;
+ b->flags |= VNET_BUFFER_F_OFFLOAD;
+ }
+ }
+
+ static_always_inline void
+ vnet_buffer_offload_flags_clear (vlib_buffer_t *b, vnet_buffer_oflags_t oflags)
+ {
+ vnet_buffer (b)->oflags &= ~oflags;
+ if (0 == vnet_buffer (b)->oflags)
+ b->flags &= ~VNET_BUFFER_F_OFFLOAD;
+ }
+
+
+ENABLE GSO FEATURE NODE
+-----------------------
+
+GSO feature node is not enabled by default when egress interface does not
+support GSO. User has to enable it explicitly using api or cli.
+
+GSO API
+^^^^^^^
+
+This API message is used to enable GSO feature node on an interface.
+
+.. code:: c
+
+ autoreply define feature_gso_enable_disable
+ {
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ bool enable_disable;
+ option vat_help = "<intfc> | sw_if_index <nn> [enable | disable]";
+ };
+
+GSO CLI
+^^^^^^^
+
+::
+
+ set interface feature gso <intfc> [enable | disable]
diff --git a/src/vnet/gso/hdr_offset_parser.h b/src/vnet/gso/hdr_offset_parser.h
index 999a27880af..08037f57ea0 100644
--- a/src/vnet/gso/hdr_offset_parser.h
+++ b/src/vnet/gso/hdr_offset_parser.h
@@ -23,7 +23,8 @@
#include <vnet/udp/udp_packet.h>
#include <vnet/tcp/tcp_packet.h>
#include <vnet/vnet.h>
-#include <vnet/vxlan/vxlan_packet.h>
+
+#define VXLAN_HEADER_SIZE 8
#define foreach_gho_flag \
_( 0, IP4) \
@@ -437,7 +438,7 @@ vnet_generic_outer_header_parser_inline (vlib_buffer_t * b0,
if (UDP_DST_PORT_vxlan == clib_net_to_host_u16 (udp->dst_port))
{
gho->gho_flags |= GHO_F_VXLAN_TUNNEL;
- gho->hdr_sz += sizeof (vxlan_header_t);
+ gho->hdr_sz += VXLAN_HEADER_SIZE;
}
else if (UDP_DST_PORT_geneve == clib_net_to_host_u16 (udp->dst_port))
{
diff --git a/src/vnet/gso/node.c b/src/vnet/gso/node.c
index d755784d0cb..c1d4459476e 100644
--- a/src/vnet/gso/node.c
+++ b/src/vnet/gso/node.c
@@ -80,113 +80,108 @@ format_gso_trace (u8 * s, va_list * args)
return s;
}
-static_always_inline u16
-tso_segment_ipip_tunnel_fixup (vlib_main_t * vm,
- vnet_interface_per_thread_data_t * ptd,
- vlib_buffer_t * sb0,
- generic_header_offset_t * gho)
+static_always_inline void
+tso_segment_ipip_tunnel_fixup (vlib_main_t *vm,
+ vnet_interface_per_thread_data_t *ptd,
+ vlib_buffer_t *sb0)
{
u16 n_tx_bufs = vec_len (ptd->split_buffers);
- u16 i = 0, n_tx_bytes = 0;
+ u16 i = 0;
while (i < n_tx_bufs)
{
vlib_buffer_t *b0 = vlib_get_buffer (vm, ptd->split_buffers[i]);
+ i16 outer_l3_hdr_offset = vnet_buffer2 (b0)->outer_l3_hdr_offset;
+ i16 l3_hdr_offset = vnet_buffer (b0)->l3_hdr_offset;
- ip4_header_t *ip4 =
- (ip4_header_t *) (vlib_buffer_get_current (b0) +
- gho->outer_l3_hdr_offset);
- ip6_header_t *ip6 =
- (ip6_header_t *) (vlib_buffer_get_current (b0) +
- gho->outer_l3_hdr_offset);
+ ip4_header_t *ip4 = (ip4_header_t *) (b0->data + outer_l3_hdr_offset);
+ ip6_header_t *ip6 = (ip6_header_t *) (b0->data + outer_l3_hdr_offset);
- if (gho->gho_flags & GHO_F_OUTER_IP4)
+ if (vnet_buffer (b0)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM)
{
- ip4->length =
- clib_host_to_net_u16 (b0->current_length -
- gho->outer_l3_hdr_offset);
+ ip4->length = clib_host_to_net_u16 (
+ b0->current_length - (outer_l3_hdr_offset - b0->current_data));
ip4->checksum = ip4_header_checksum (ip4);
+ vnet_buffer_offload_flags_clear (
+ b0, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
}
- else if (gho->gho_flags & GHO_F_OUTER_IP6)
+ else
{
- ip6->payload_length =
- clib_host_to_net_u16 (b0->current_length -
- gho->outer_l4_hdr_offset);
+ ip6->payload_length = clib_host_to_net_u16 (
+ b0->current_length - (l3_hdr_offset - b0->current_data));
+ vnet_buffer_offload_flags_clear (b0, VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
}
- n_tx_bytes += gho->outer_hdr_sz;
i++;
}
- return n_tx_bytes;
}
static_always_inline void
-tso_segment_vxlan_tunnel_headers_fixup (vlib_main_t * vm, vlib_buffer_t * b,
- generic_header_offset_t * gho)
+tso_segment_vxlan_tunnel_headers_fixup (vlib_main_t *vm, vlib_buffer_t *b)
{
- u8 proto = 0;
ip4_header_t *ip4 = 0;
ip6_header_t *ip6 = 0;
udp_header_t *udp = 0;
+ i16 outer_l3_hdr_offset = vnet_buffer2 (b)->outer_l3_hdr_offset;
+ i16 outer_l4_hdr_offset = vnet_buffer2 (b)->outer_l4_hdr_offset;
- ip4 =
- (ip4_header_t *) (vlib_buffer_get_current (b) + gho->outer_l3_hdr_offset);
- ip6 =
- (ip6_header_t *) (vlib_buffer_get_current (b) + gho->outer_l3_hdr_offset);
- udp =
- (udp_header_t *) (vlib_buffer_get_current (b) + gho->outer_l4_hdr_offset);
+ ip4 = (ip4_header_t *) (b->data + outer_l3_hdr_offset);
+ ip6 = (ip6_header_t *) (b->data + outer_l3_hdr_offset);
+ udp = (udp_header_t *) (b->data + outer_l4_hdr_offset);
- if (gho->gho_flags & GHO_F_OUTER_IP4)
+ if (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM)
{
- proto = ip4->protocol;
- ip4->length =
- clib_host_to_net_u16 (b->current_length - gho->outer_l3_hdr_offset);
+ ip4->length = clib_host_to_net_u16 (
+ b->current_length - (outer_l3_hdr_offset - b->current_data));
ip4->checksum = ip4_header_checksum (ip4);
+ if (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM)
+ {
+ udp->length = clib_host_to_net_u16 (
+ b->current_length - (outer_l4_hdr_offset - b->current_data));
+ // udp checksum is 0, in udp tunnel
+ udp->checksum = 0;
+ }
+ vnet_buffer_offload_flags_clear (
+ b, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TNL_VXLAN);
}
- else if (gho->gho_flags & GHO_F_OUTER_IP6)
- {
- proto = ip6->protocol;
- ip6->payload_length =
- clib_host_to_net_u16 (b->current_length - gho->outer_l4_hdr_offset);
- }
- if (proto == IP_PROTOCOL_UDP)
+ else
{
- int bogus;
- udp->length =
- clib_host_to_net_u16 (b->current_length - gho->outer_l4_hdr_offset);
- udp->checksum = 0;
- if (gho->gho_flags & GHO_F_OUTER_IP6)
+ ip6->payload_length = clib_host_to_net_u16 (
+ b->current_length - (outer_l4_hdr_offset - b->current_data));
+
+ if (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM)
{
+ int bogus;
+ udp->length = ip6->payload_length;
+ // udp checksum is 0, in udp tunnel
+ udp->checksum = 0;
udp->checksum =
ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6, &bogus);
+ vnet_buffer_offload_flags_clear (
+ b, VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TNL_VXLAN);
}
- else if (gho->gho_flags & GHO_F_OUTER_IP4)
- {
- udp->checksum = ip4_tcp_udp_compute_checksum (vm, b, ip4);
- }
- /* FIXME: it should be OUTER_UDP_CKSUM */
- vnet_buffer_offload_flags_clear (b, VNET_BUFFER_OFFLOAD_F_UDP_CKSUM);
}
}
-static_always_inline u16
-tso_segment_vxlan_tunnel_fixup (vlib_main_t * vm,
- vnet_interface_per_thread_data_t * ptd,
- vlib_buffer_t * sb0,
- generic_header_offset_t * gho)
+static_always_inline void
+tso_segment_vxlan_tunnel_fixup (vlib_main_t *vm,
+ vnet_interface_per_thread_data_t *ptd,
+ vlib_buffer_t *sb0)
{
u16 n_tx_bufs = vec_len (ptd->split_buffers);
- u16 i = 0, n_tx_bytes = 0;
+ u16 i = 0;
while (i < n_tx_bufs)
{
vlib_buffer_t *b0 = vlib_get_buffer (vm, ptd->split_buffers[i]);
- tso_segment_vxlan_tunnel_headers_fixup (vm, b0, gho);
- n_tx_bytes += gho->outer_hdr_sz;
+ tso_segment_vxlan_tunnel_headers_fixup (vm, b0);
i++;
}
- return n_tx_bytes;
}
static_always_inline u16
@@ -682,32 +677,10 @@ vnet_gso_node_inline (vlib_main_t * vm,
to_next -= 1;
n_left_to_next += 1;
/* undo the counting. */
- generic_header_offset_t gho = { 0 };
u32 n_tx_bytes = 0;
- u32 inner_is_ip6 = is_ip6;
-
- vnet_generic_header_offset_parser (b[0], &gho, is_l2,
- is_ip4, is_ip6);
-
- if (PREDICT_FALSE (gho.gho_flags & GHO_F_TUNNEL))
- {
- if (PREDICT_FALSE
- (gho.gho_flags & (GHO_F_GRE_TUNNEL |
- GHO_F_GENEVE_TUNNEL)))
- {
- /* not supported yet */
- drop_one_buffer_and_count (vm, vnm, node, from - 1,
- hi->sw_if_index,
- GSO_ERROR_UNHANDLED_TYPE);
- b += 1;
- continue;
- }
- inner_is_ip6 = (gho.gho_flags & GHO_F_IP6) != 0;
- }
-
- n_tx_bytes = gso_segment_buffer_inline (vm, ptd, b[0], &gho,
- is_l2, inner_is_ip6);
+ n_tx_bytes =
+ gso_segment_buffer_inline (vm, ptd, b[0], is_l2);
if (PREDICT_FALSE (n_tx_bytes == 0))
{
@@ -718,19 +691,15 @@ vnet_gso_node_inline (vlib_main_t * vm,
continue;
}
-
- if (PREDICT_FALSE (gho.gho_flags & GHO_F_VXLAN_TUNNEL))
+ if (PREDICT_FALSE (vnet_buffer (b[0])->oflags &
+ VNET_BUFFER_OFFLOAD_F_TNL_VXLAN))
{
- n_tx_bytes +=
- tso_segment_vxlan_tunnel_fixup (vm, ptd, b[0], &gho);
+ tso_segment_vxlan_tunnel_fixup (vm, ptd, b[0]);
}
- else
- if (PREDICT_FALSE
- (gho.gho_flags & (GHO_F_IPIP_TUNNEL |
- GHO_F_IPIP6_TUNNEL)))
+ else if (PREDICT_FALSE (vnet_buffer (b[0])->oflags &
+ VNET_BUFFER_OFFLOAD_F_TNL_IPIP))
{
- n_tx_bytes +=
- tso_segment_ipip_tunnel_fixup (vm, ptd, b[0], &gho);
+ tso_segment_ipip_tunnel_fixup (vm, ptd, b[0]);
}
u16 n_tx_bufs = vec_len (ptd->split_buffers);
@@ -838,7 +807,6 @@ VLIB_NODE_FN (gso_ip6_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1 /* ip6 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (gso_l2_ip4_node) = {
.vector_size = sizeof (u32),
diff --git a/src/vnet/handoff.c b/src/vnet/handoff.c
index 5d4ef6f5c1b..e9c3bb6de67 100644
--- a/src/vnet/handoff.c
+++ b/src/vnet/handoff.c
@@ -244,6 +244,8 @@ interface_handoff_enable_disable (vlib_main_t *vm, u32 sw_if_index,
vnet_feature_enable_disable ("device-input", "worker-handoff",
sw_if_index, enable_disable, 0, 0);
+ vnet_feature_enable_disable ("port-rx-eth", "worker-handoff", sw_if_index,
+ enable_disable, 0, 0);
return rv;
}
@@ -310,14 +312,12 @@ set_interface_handoff_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_handoff_command, static) = {
.path = "set interface handoff",
.short_help = "set interface handoff <interface-name> workers <workers-list>"
" [symmetrical|asymmetrical]",
.function = set_interface_handoff_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
handoff_init (vlib_main_t * vm)
diff --git a/src/vnet/hash/FEATURE.yaml b/src/vnet/hash/FEATURE.yaml
index 1e3d23ea882..d5b9a069c27 100644
--- a/src/vnet/hash/FEATURE.yaml
+++ b/src/vnet/hash/FEATURE.yaml
@@ -1,6 +1,6 @@
---
name: Hash infrastructure
-maintainer: Mohsin Kazmi <sykazmi@cisco.com>, Damjan Marion <damarion@cisco.com>
+maintainer: Mohsin Kazmi <mohsin.kazmi14@gmail.com>, Damjan Marion <damarion@cisco.com>
features:
- Ethernet
- IP
diff --git a/src/vnet/hash/hash.rst b/src/vnet/hash/hash.rst
new file mode 100644
index 00000000000..3db74e2f093
--- /dev/null
+++ b/src/vnet/hash/hash.rst
@@ -0,0 +1,90 @@
+.. _hash_doc:
+
+Hash Infra
+==========
+
+Overview
+________
+
+Modern physical NICs uses packet flow hash for different purposes, i.e. Receive
+Side Scaling, flow steering and interface bonding etc. NICs can also provide
+packet flow hash prepended to data packet as metadata which can be used by
+applications without recomputing the packet flow hash.
+
+As more and more services are deployed in virtualized environment, making use of
+virtual interfaces to interconnect those services.
+
+The Hash Infrastructure
+_______________________
+
+VPP implements software based hashing functionality which can be used for different
+purposes. It also provides users a centralized way to registry custom hash functions
+based on traffic profile to be used in different vpp features i.e. Multi-TXQ,
+software RSS or bonding driver.
+
+Data structures
+^^^^^^^^^^^^^^^
+
+Hashing infra provides two types of hashing functions:
+``VNET_HASH_FN_TYPE_ETHERNET`` and ``VNET_HASH_FN_TYPE_IP`` for ethernet traffic and
+IP traffic respectively.
+Hashing infra provides uniform signature to the functions to be implemented:
+
+.. code:: c
+
+ void (*vnet_hash_fn_t) (void **p, u32 *h, u32 n_packets);
+
+Here ``**p`` is the array of pointers pointing to the beginning of packet headers
+(either ethernet or ip).
+``*h`` is an empty array of size n_packets. On return, it will contain hashes.
+``n_packets`` is the number of packets pass to this function.
+
+Custom hashing functions can be registered through ``VNET_REGISTER_HASH_FUNCTION``.
+Users need to provide a name, description, priority and hashing functions for
+registration.
+
+Default hashing function is selected based on the highest priority among the registered
+hashing functions.
+
+.. code:: c
+
+ typedef struct vnet_hash_function_registration
+ {
+ const char *name;
+ const char *description;
+ int priority;
+ vnet_hash_fn_t function[VNET_HASH_FN_TYPE_N];
+
+ struct vnet_hash_function_registration *next;
+ } vnet_hash_function_registration_t;
+
+For example, ``crc32c_5tuple`` provides two hashing functions: for IP traffic and for
+ethernet traffic. It uses 5 tuples from the flow to compute the crc32 hash on it.
+
+.. code:: c
+
+ void vnet_crc32c_5tuple_ip_func (void **p, u32 *hash, u32 n_packets);
+ void vnet_crc32c_5tuple_ethernet_func (void **p, u32 *hash, u32 n_packets);
+
+ VNET_REGISTER_HASH_FUNCTION (crc32c_5tuple, static) = {
+ .name = "crc32c-5tuple",
+ .description = "IPv4/IPv6 header and TCP/UDP ports",
+ .priority = 50,
+ .function[VNET_HASH_FN_TYPE_ETHERNET] = vnet_crc32c_5tuple_ethernet_func,
+ .function[VNET_HASH_FN_TYPE_IP] = vnet_crc32c_5tuple_ip_func,
+ };
+
+
+Users can see all the registered hash functions along with priority and description.
+
+Hash API
+^^^^^^^^
+
+There is no Hash API at the moment.
+
+Hash CLI
+^^^^^^^^
+
+::
+
+ show hash
diff --git a/src/vnet/hdlc/hdlc.c b/src/vnet/hdlc/hdlc.c
index fa1e7cd5eaf..443a0396e9e 100644
--- a/src/vnet/hdlc/hdlc.c
+++ b/src/vnet/hdlc/hdlc.c
@@ -197,7 +197,6 @@ hdlc_build_rewrite (vnet_main_t * vnm,
return (rewrite);
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (hdlc_hw_interface_class) = {
.name = "HDLC",
.format_header = format_hdlc_header_with_length,
@@ -205,7 +204,6 @@ VNET_HW_INTERFACE_CLASS (hdlc_hw_interface_class) = {
.build_rewrite = hdlc_build_rewrite,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
-/* *INDENT-ON* */
static void
add_protocol (hdlc_main_t * pm, hdlc_protocol_t protocol, char *protocol_name)
diff --git a/src/vnet/hdlc/node.c b/src/vnet/hdlc/node.c
index 8bb621231c7..48269a3b8d3 100644
--- a/src/vnet/hdlc/node.c
+++ b/src/vnet/hdlc/node.c
@@ -279,7 +279,6 @@ static char *hdlc_error_strings[] = {
#undef hdlc_error
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (hdlc_input_node) = {
.function = hdlc_input,
.name = "hdlc-input",
@@ -302,7 +301,6 @@ VLIB_REGISTER_NODE (hdlc_input_node) = {
.format_trace = format_hdlc_input_trace,
.unformat_buffer = unformat_hdlc_header,
};
-/* *INDENT-ON* */
static clib_error_t *
hdlc_input_runtime_init (vlib_main_t * vm)
diff --git a/src/vnet/interface.api b/src/vnet/interface.api
index 172f6afb818..eea86aa1ac8 100644
--- a/src/vnet/interface.api
+++ b/src/vnet/interface.api
@@ -733,6 +733,61 @@ autoreply define collect_detailed_interface_stats
bool enable_disable;
};
+/** \brief pcap_set_filter_function
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param filter_function_name - the name of the filter function
+ to set for pcap capture
+*/
+autoreply define pcap_set_filter_function
+{
+ u32 client_index;
+ u32 context;
+
+ string filter_function_name[];
+};
+
+/** \brief pcap_trace_on
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param capture_rx - capture received packets
+ @param capture_tx - capture transmitted packets
+ @param capture_drop - capture dropped packets
+ @param filter - is a filter is being used on this capture
+ @param preallocate_data - preallocate the data buffer
+ @param free_data - free the data buffer
+ @param max_packets - depth of local buffer
+ @param max_bytes_per_packet - maximum number of bytes to capture
+ for each packet
+ @param sw_if_index - specify a given interface, or 0 for any
+ @param error - filter packets based on a specific error.
+ @param filename - output filename, will be placed in /tmp
+*/
+autoreply define pcap_trace_on
+{
+ u32 client_index;
+ u32 context;
+ bool capture_rx;
+ bool capture_tx;
+ bool capture_drop;
+ bool filter;
+ bool preallocate_data;
+ bool free_data;
+ u32 max_packets [default=1000];
+ u32 max_bytes_per_packet [default=512];
+ vl_api_interface_index_t sw_if_index;
+ string error[128];
+ string filename[64];
+
+ option vat_help = "pcap_trace_on [capture_rx] [capture_tx] [capture_drop] [max_packets <nn>] [sw_if_index <sw_if_index>|0 for any] [error <node>.<error>] [filename <name>] [max_bytes_per_packet <nnnn>] [filter] [preallocate_data] [free_data]";
+};
+
+autoreply define pcap_trace_off
+{
+ u32 client_index;
+ u32 context;
+};
+
/*
* Local Variables:
* eval: (c-set-style "gnu")
diff --git a/src/vnet/interface.c b/src/vnet/interface.c
index dd4399864f7..5fb2ff65fa2 100644
--- a/src/vnet/interface.c
+++ b/src/vnet/interface.c
@@ -45,11 +45,9 @@
#include <vnet/interface/rx_queue_funcs.h>
#include <vnet/interface/tx_queue_funcs.h>
-/* *INDENT-OFF* */
VLIB_REGISTER_LOG_CLASS (if_default_log, static) = {
.class_name = "interface",
};
-/* *INDENT-ON* */
#define log_debug(fmt,...) vlib_log_debug(if_default_log.class, fmt, __VA_ARGS__)
#define log_err(fmt,...) vlib_log_err(if_default_log.class, fmt, __VA_ARGS__)
@@ -141,15 +139,12 @@ serialize_vnet_interface_state (serialize_main_t * m, va_list * va)
/* Serialize hardware interface classes since they may have changed.
Must do this before sending up/down flags. */
- /* *INDENT-OFF* */
pool_foreach (hif, im->hw_interfaces) {
vnet_hw_interface_class_t * hw_class = vnet_get_hw_interface_class (vnm, hif->hw_class_index);
serialize_cstring (m, hw_class->name);
}
- /* *INDENT-ON* */
/* Send sw/hw interface state when non-zero. */
- /* *INDENT-OFF* */
pool_foreach (sif, im->sw_interfaces) {
if (sif->flags != 0)
{
@@ -158,14 +153,12 @@ serialize_vnet_interface_state (serialize_main_t * m, va_list * va)
st->flags = sif->flags;
}
}
- /* *INDENT-ON* */
vec_serialize (m, sts, serialize_vec_vnet_sw_hw_interface_state);
if (sts)
vec_set_len (sts, 0);
- /* *INDENT-OFF* */
pool_foreach (hif, im->hw_interfaces) {
if (hif->flags != 0)
{
@@ -174,7 +167,6 @@ serialize_vnet_interface_state (serialize_main_t * m, va_list * va)
st->flags = vnet_hw_interface_flags_to_sw(hif->flags);
}
}
- /* *INDENT-ON* */
vec_serialize (m, sts, serialize_vec_vnet_sw_hw_interface_state);
@@ -206,7 +198,6 @@ unserialize_vnet_interface_state (serialize_main_t * m, va_list * va)
uword *p;
clib_error_t *error;
- /* *INDENT-OFF* */
pool_foreach (hif, im->hw_interfaces) {
unserialize_cstring (m, &class_name);
p = hash_get_mem (im->hw_interface_class_by_name, class_name);
@@ -222,7 +213,6 @@ unserialize_vnet_interface_state (serialize_main_t * m, va_list * va)
clib_error_report (error);
vec_free (class_name);
}
- /* *INDENT-ON* */
}
vec_unserialize (m, &sts, unserialize_vec_vnet_sw_hw_interface_state);
@@ -655,6 +645,7 @@ vnet_create_sw_interface (vnet_main_t * vnm, vnet_sw_interface_t * template,
/* undo the work done by vnet_create_sw_interface_no_callbacks() */
log_err ("create_sw_interface: set flags failed\n %U",
format_clib_error, error);
+ call_sw_interface_add_del_callbacks (vnm, *sw_if_index, 0);
vnet_sw_interface_t *sw =
pool_elt_at_index (im->sw_interfaces, *sw_if_index);
pool_put (im->sw_interfaces, sw);
@@ -776,8 +767,7 @@ vnet_hw_interface_set_max_frame_size (vnet_main_t *vnm, u32 hw_if_index,
vnet_hw_interface_class_t *hw_if_class =
vnet_get_hw_interface_class (vnm, hi->hw_class_index);
clib_error_t *err = 0;
-
- log_debug ("set_max_frame_size: interface %s, max_frame_size %u -> %u",
+ log_debug ("set_max_frame_size: interface %v, max_frame_size %u -> %u",
hi->name, hi->max_frame_size, fs);
if (hw_if_class->set_max_frame_size == 0)
@@ -1116,7 +1106,6 @@ vnet_delete_hw_interface (vnet_main_t * vnm, u32 hw_if_index)
/* Delete any sub-interfaces. */
{
u32 id, sw_if_index;
- /* *INDENT-OFF* */
hash_foreach (id, sw_if_index, hw->sub_interface_sw_if_index_by_id,
({
vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
@@ -1126,7 +1115,6 @@ vnet_delete_hw_interface (vnet_main_t * vnm, u32 hw_if_index)
vnet_delete_sw_interface (vnm, sw_if_index);
}));
hash_free (hw->sub_interface_sw_if_index_by_id);
- /* *INDENT-ON* */
}
/* Delete software interface corresponding to hardware interface. */
@@ -1177,14 +1165,12 @@ vnet_hw_interface_walk_sw (vnet_main_t * vnm,
if (WALK_STOP == fn (vnm, hi->sw_if_index, ctx))
return;
- /* *INDENT-OFF* */
hash_foreach (id, sw_if_index,
hi->sub_interface_sw_if_index_by_id,
({
if (WALK_STOP == fn (vnm, sw_if_index, ctx))
break;
}));
- /* *INDENT-ON* */
}
void
@@ -1196,13 +1182,11 @@ vnet_hw_interface_walk (vnet_main_t * vnm,
im = &vnm->interface_main;
- /* *INDENT-OFF* */
pool_foreach (hi, im->hw_interfaces)
{
if (WALK_STOP == fn(vnm, hi->hw_if_index, ctx))
break;
}
- /* *INDENT-ON* */
}
void
@@ -1214,13 +1198,11 @@ vnet_sw_interface_walk (vnet_main_t * vnm,
im = &vnm->interface_main;
- /* *INDENT-OFF* */
pool_foreach (si, im->sw_interfaces)
{
if (WALK_STOP == fn (vnm, si, ctx))
break;
}
- /* *INDENT-ON* */
}
void
@@ -1358,7 +1340,10 @@ vnet_hw_interface_compare (vnet_main_t * vnm,
int
vnet_sw_interface_is_p2p (vnet_main_t * vnm, u32 sw_if_index)
{
- vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+ vnet_sw_interface_t *si = vnet_get_sw_interface_or_null (vnm, sw_if_index);
+ if (si == NULL)
+ return -1;
+
if ((si->type == VNET_SW_INTERFACE_TYPE_P2P) ||
(si->type == VNET_SW_INTERFACE_TYPE_PIPE))
return 1;
@@ -1403,6 +1388,26 @@ vnet_sw_interface_supports_addressing (vnet_main_t *vnm, u32 sw_if_index)
return NULL;
}
+u32
+vnet_register_device_class (vlib_main_t *vm, vnet_device_class_t *c)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ c->index = vec_len (im->device_classes);
+ hash_set_mem (im->device_class_by_name, c->name, c->index);
+
+ /* to avoid confusion, please remove ".tx_function" statement
+ from VNET_DEVICE_CLASS() if using function candidates */
+ ASSERT (c->tx_fn_registrations == 0 || c->tx_function == 0);
+
+ if (c->tx_fn_registrations)
+ c->tx_function =
+ vlib_node_get_preferred_node_fn_variant (vm, c->tx_fn_registrations);
+
+ vec_add1 (im->device_classes, c[0]);
+ return c->index;
+}
+
clib_error_t *
vnet_interface_init (vlib_main_t * vm)
{
@@ -1449,28 +1454,10 @@ vnet_interface_init (vlib_main_t * vm)
im->device_class_by_name = hash_create_string ( /* size */ 0,
sizeof (uword));
- {
- vnet_device_class_t *c;
-
- c = vnm->device_class_registrations;
-
- while (c)
- {
- c->index = vec_len (im->device_classes);
- hash_set_mem (im->device_class_by_name, c->name, c->index);
- /* to avoid confusion, please remove ".tx_function" statement
- from VNET_DEVICE_CLASS() if using function candidates */
- ASSERT (c->tx_fn_registrations == 0 || c->tx_function == 0);
-
- if (c->tx_fn_registrations)
- c->tx_function = vlib_node_get_preferred_node_fn_variant (
- vm, c->tx_fn_registrations);
-
- vec_add1 (im->device_classes, c[0]);
- c = c->next_class_registration;
- }
- }
+ for (vnet_device_class_t *c = vnm->device_class_registrations; c;
+ c = c->next_class_registration)
+ vnet_register_device_class (vm, c);
im->hw_interface_class_by_name = hash_create_string ( /* size */ 0,
sizeof (uword));
@@ -1940,13 +1927,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (collect_detailed_interface_stats_command, static) = {
.path = "interface collect detailed-stats",
.short_help = "interface collect detailed-stats <enable|disable>",
.function = collect_detailed_interface_stats_cli,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/interface.h b/src/vnet/interface.h
index c8fbc61ec7b..f0cb540f979 100644
--- a/src/vnet/interface.h
+++ b/src/vnet/interface.h
@@ -292,6 +292,8 @@ typedef struct _vnet_device_class
} vnet_device_class_t;
+u32 vnet_register_device_class (vlib_main_t *, vnet_device_class_t *);
+
#ifndef CLIB_MARCH_VARIANT
#define VNET_DEVICE_CLASS(x,...) \
__VA_ARGS__ vnet_device_class_t x; \
@@ -320,7 +322,8 @@ static __clib_unused vnet_device_class_t __clib_unused_##x
#endif
#define VNET_DEVICE_CLASS_TX_FN(devclass) \
- uword CLIB_MARCH_SFX (devclass##_tx_fn) (); \
+ uword CLIB_MARCH_SFX (devclass##_tx_fn) ( \
+ vlib_main_t *, vlib_node_runtime_t *, vlib_frame_t *); \
static vlib_node_fn_registration_t CLIB_MARCH_SFX ( \
devclass##_tx_fn_registration) = { \
.function = &CLIB_MARCH_SFX (devclass##_tx_fn), \
diff --git a/src/vnet/interface/runtime.c b/src/vnet/interface/runtime.c
index 5c215e88501..a88a23bd4c9 100644
--- a/src/vnet/interface/runtime.c
+++ b/src/vnet/interface/runtime.c
@@ -289,10 +289,9 @@ vnet_hw_if_update_runtime_data (vnet_main_t *vnm, u32 hw_if_index)
{
void *in = rt->rxq_interrupts;
int int_num = -1;
- while ((int_num = clib_interrupt_get_next (in, int_num)) !=
- -1)
+ while ((int_num = clib_interrupt_get_next_and_clear (
+ in, int_num)) != -1)
{
- clib_interrupt_clear (in, int_num);
pending_int = clib_bitmap_set (pending_int, int_num, 1);
last_int = clib_max (last_int, int_num);
}
diff --git a/src/vnet/interface/rx_queue.c b/src/vnet/interface/rx_queue.c
index cec0296519c..b1fc82f38e9 100644
--- a/src/vnet/interface/rx_queue.c
+++ b/src/vnet/interface/rx_queue.c
@@ -124,7 +124,10 @@ vnet_hw_if_unregister_all_rx_queues (vnet_main_t *vnm, u32 hw_if_index)
vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
vnet_interface_main_t *im = &vnm->interface_main;
vnet_hw_if_rx_queue_t *rxq;
+ vlib_main_t *vm;
+ vnet_hw_if_rx_node_runtime_t *rt;
u64 key;
+ u32 queue_index;
log_debug ("unregister_all: interface %v", hi->name);
@@ -132,6 +135,15 @@ vnet_hw_if_unregister_all_rx_queues (vnet_main_t *vnm, u32 hw_if_index)
{
rxq = vnet_hw_if_get_rx_queue (vnm, hi->rx_queue_indices[i]);
key = rx_queue_key (rxq->hw_if_index, rxq->queue_id);
+ if (PREDICT_FALSE (rxq->mode == VNET_HW_IF_RX_MODE_INTERRUPT ||
+ rxq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
+ {
+ vm = vlib_get_main_by_index (rxq->thread_index);
+ queue_index = vnet_hw_if_get_rx_queue_index_by_id (vnm, hw_if_index,
+ rxq->queue_id);
+ rt = vlib_node_get_runtime_data (vm, hi->input_node_index);
+ clib_interrupt_clear (rt->rxq_interrupts, queue_index);
+ }
hash_unset_mem_free (&im->rxq_index_by_hw_if_index_and_queue_id, &key);
pool_put_index (im->hw_if_rx_queues, hi->rx_queue_indices[i]);
@@ -240,14 +252,12 @@ vnet_hw_if_generate_rxq_int_poll_vector (vlib_main_t *vm,
vec_reset_length (rt->rxq_vector_int);
- while ((int_num = clib_interrupt_get_next (rt->rxq_interrupts, int_num)) !=
- -1)
+ while ((int_num = clib_interrupt_get_next_and_clear (rt->rxq_interrupts,
+ int_num)) != -1)
{
vnet_hw_if_rx_queue_t *rxq = vnet_hw_if_get_rx_queue (vnm, int_num);
vnet_hw_if_rxq_poll_vector_t *pv;
- clib_interrupt_clear (rt->rxq_interrupts, int_num);
-
vec_add2 (rt->rxq_vector_int, pv, 1);
pv->dev_instance = rxq->dev_instance;
pv->queue_id = rxq->queue_id;
diff --git a/src/vnet/interface/stats.c b/src/vnet/interface/stats.c
index f58ffa32586..4f3213aafc3 100644
--- a/src/vnet/interface/stats.c
+++ b/src/vnet/interface/stats.c
@@ -25,6 +25,8 @@ static struct
static clib_error_t *
statseg_sw_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_add)
{
+ u8 *name = 0;
+
if (if_names == 0)
{
if_names = vlib_stats_add_string_vector ("/if/names");
@@ -42,7 +44,6 @@ statseg_sw_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_add)
{
vnet_sw_interface_t *si, *si_sup;
vnet_hw_interface_t *hi_sup;
- u8 *name;
si = vnet_get_sw_interface (vnm, sw_if_index);
si_sup = vnet_get_sup_sw_interface (vnm, si->sw_if_index);
@@ -63,16 +64,18 @@ statseg_sw_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_add)
ASSERT (index != ~0);
vec_add1 (dir_entry_indices[sw_if_index], index);
}
-
- vec_free (name);
}
else
{
+ name = format (0, "%s", "deleted");
+ vlib_stats_set_string_vector (&if_names, sw_if_index, "%v", name);
for (u32 i = 0; i < vec_len (dir_entry_indices[sw_if_index]); i++)
vlib_stats_remove_entry (dir_entry_indices[sw_if_index][i]);
vec_free (dir_entry_indices[sw_if_index]);
}
+ vec_free (name);
+
vlib_stats_segment_unlock ();
return 0;
diff --git a/src/vnet/interface/tx_queue.rst b/src/vnet/interface/tx_queue.rst
new file mode 100644
index 00000000000..e8f0e039b8e
--- /dev/null
+++ b/src/vnet/interface/tx_queue.rst
@@ -0,0 +1,159 @@
+.. _TX_Queue_doc:
+
+Transmit Queues
+===============
+
+Overview
+________
+
+VPP implements Transmit queues infra to access and manage them. It provides
+common registration functions to register or unregister interfaces’ transmit
+queues. It also provides functions for queues placement on given thread(s).
+
+The TXQ Infrastructure
+_______________________
+
+Infra registers each queue using a unique key which is formed by concatenating
+the hardware interface index ``hw_if_index`` and unique queue identifier for
+given interface ``queue_id``. As a result of registration of queue, infra
+returns back a unique global ``queue_index`` which can be used by driver to
+access that queue later.
+
+Interface output node uses pre-computed ``output_node_thread_runtime`` data
+which provides essential information related to queue placements on given
+thread of given interface. Transmit queue infra implements an algorithm to
+pre-compute this information. It also pre-computes scalar arguments of frame
+``vnet_hw_if_tx_frame_t``. It also pre-calculates a ``lookup_table`` for
+thread if there are multiple transmit queues are placed on that thread.
+Interface drivers call ``vnet_hw_if_update_runtime_data()`` to execute that
+algorithm after registering the transmit queues to TXQ infra.
+
+The algorithm makes the copy of existing runtime data and iterate through them
+for each vpp main and worker thread. In each iteration, algorithm loop through
+all the tx queues of given interface to fill the information in the frame data
+structure ``vnet_hw_if_tx_frame_t``. Algorithm also updates the information
+related to number of transmit queues of given interface on given vpp thread in
+data structure ``output_node_thread_runtime``. As a consequence of any update
+to the copy, triggers the function to update the actual working copy by taking
+the worker barrier and free the old copy of ``output_node_thread_runtime``.
+
+Multi-TXQ infra
+^^^^^^^^^^^^^^^
+
+Interface output node uses packet flow hash using hash infra in case of multi-txq
+on given thread. Each hardware interface class contains type of the hash required
+for interfaces from that hardware interface class i.e. ethernet interface hardware
+class contains type ``VNET_HASH_FN_TYPE_ETHERNET``. Though, the hash function
+itself is contained by hardware interface data structure of given interface. Default
+hashing function is selected upon interface creation based on priority. User can
+configure a different hash to an interface for multi-txq use case.
+
+Interface output node uses packet flow hash as an index to the pre-calculated lookup
+table to get the queue identifier for given transmit queue. Interface output node
+enqueues the packets to respective frame and also copies the ``vnet_hw_if_tx_frame_t``
+to frame scalar arguments. Drivers use scalar arguments ``vnet_hw_if_tx_frame_t``
+of the given frame to extract the information about the transmit queue to be used to
+transmit the packets. Drivers may need to acquire a lock on given queue before
+transmitting the packets based on the ``shared_queue`` bit status.
+
+Data structures
+^^^^^^^^^^^^^^^
+
+Queue information is stored in data structure ``vnet_hw_if_tx_queue_t``:
+
+.. code:: c
+
+ typedef struct
+ {
+ /* either this queue is shared among multiple threads */
+ u8 shared_queue : 1;
+ /* hw interface index */
+ u32 hw_if_index;
+
+ /* hardware queue identifier */
+ u32 queue_id;
+
+ /* bitmap of threads which use this queue */
+ clib_bitmap_t *threads;
+ } vnet_hw_if_tx_queue_t;
+
+
+Frame information is stored in data structure: ``vnet_hw_if_tx_frame_t``:
+
+.. code:: c
+
+ typedef enum
+ {
+ VNET_HW_IF_TX_FRAME_HINT_NOT_CHAINED = (1 << 0),
+ VNET_HW_IF_TX_FRAME_HINT_NO_GSO = (1 << 1),
+ VNET_HW_IF_TX_FRAME_HINT_NO_CKSUM_OFFLOAD = (1 << 2),
+ } vnet_hw_if_tx_frame_hint_t;
+
+ typedef struct
+ {
+ u8 shared_queue : 1;
+ vnet_hw_if_tx_frame_hint_t hints : 16;
+ u32 queue_id;
+ } vnet_hw_if_tx_frame_t;
+
+Output node runtime information is stored in data structure: ``output_node_thread_runtime``:
+
+.. code:: c
+
+ typedef struct
+ {
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ vnet_hw_if_tx_frame_t *frame;
+ u32 *lookup_table;
+ u32 n_queues;
+ } vnet_hw_if_output_node_runtime_t;
+
+
+MultiTXQ API
+^^^^^^^^^^^^
+
+This API message is used to place tx queue of an interface to vpp main or worker(s) thread(s).
+
+.. code:: c
+
+ autoendian autoreply define sw_interface_set_tx_placement
+ {
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ u32 queue_id;
+ u32 array_size;
+ u32 threads[array_size];
+ option vat_help = "<interface | sw_if_index <index>> queue <n> [threads <list> | mask <hex>]";
+ };
+
+Multi-TXQ CLI
+^^^^^^^^^^^^^
+
+::
+
+ set interface tx-queue set interface tx-queue <interface> queue <n> [threads <list>]
+ set interface tx-hash set interface tx-hash <interface> hash-name <hash-name>
+
+::
+
+ show hardware-interfaces
+
+ Name Idx Link Hardware
+ tap0 1 up tap0
+ Link speed: unknown
+ RX Queues:
+ queue thread mode
+ 0 main (0) polling
+ TX Queues:
+ TX Hash: [name: crc32c-5tuple priority: 50 description: IPv4/IPv6 header and TCP/UDP ports]
+ queue shared thread(s)
+ 0 no 0
+ Ethernet address 02:fe:27:69:5a:b5
+ VIRTIO interface
+ instance 0
+ RX QUEUE : Total Packets
+ 0 : 0
+ TX QUEUE : Total Packets
+ 0 : 0
+
diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c
index 5766f2ca21f..c727e519138 100644
--- a/src/vnet/interface_api.c
+++ b/src/vnet/interface_api.c
@@ -17,6 +17,9 @@
*------------------------------------------------------------------
*/
+#define _GNU_SOURCE
+#include <string.h>
+
#include <vnet/vnet.h>
#include <vlibmemory/api.h>
@@ -384,8 +387,6 @@ vl_api_sw_interface_dump_t_handler (vl_api_sw_interface_dump_t * mp)
vec_add1 (filter, 0); /* Ensure it's a C string for strcasecmp() */
}
- char *strcasestr (char *, char *); /* lnx hdr file botch */
- /* *INDENT-OFF* */
pool_foreach (swif, im->sw_interfaces)
{
if (!vnet_swif_is_api_visible (swif))
@@ -399,7 +400,6 @@ vl_api_sw_interface_dump_t_handler (vl_api_sw_interface_dump_t * mp)
send_sw_interface_details (am, rp, swif, name, mp->context);
}
- /* *INDENT-ON* */
vec_free (name);
vec_free (filter);
@@ -808,14 +808,12 @@ link_state_process (vlib_main_t * vm,
if (event_by_sw_if_index[i] == 0)
continue;
- /* *INDENT-OFF* */
pool_foreach (reg, vam->interface_events_registrations)
{
vl_reg = vl_api_client_index_to_registration (reg->client_index);
if (vl_reg)
send_sw_interface_event (vam, reg, vl_reg, i, event_by_sw_if_index[i]);
}
- /* *INDENT-ON* */
}
vec_reset_length (event_by_sw_if_index);
}
@@ -831,13 +829,11 @@ static clib_error_t *sw_interface_add_del_function (vnet_main_t * vm,
u32 sw_if_index,
u32 flags);
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (link_state_process_node,static) = {
.function = link_state_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "vpe-link-state-process",
};
-/* *INDENT-ON* */
VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (admin_up_down_function);
VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (link_up_down_function);
@@ -1024,21 +1020,19 @@ vl_api_sw_interface_set_interface_name_t_handler (
{
vl_api_sw_interface_set_interface_name_reply_t *rmp;
vnet_main_t *vnm = vnet_get_main ();
- u32 sw_if_index = ntohl (mp->sw_if_index);
- vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
clib_error_t *error;
int rv = 0;
+ VALIDATE_SW_IF_INDEX (mp);
+
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+
if (mp->name[0] == 0)
{
rv = VNET_API_ERROR_INVALID_VALUE;
goto out;
}
- if (si == 0)
- {
- rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
- goto out;
- }
error = vnet_rename_interface (vnm, si->hw_if_index, (char *) mp->name);
if (error)
@@ -1048,6 +1042,7 @@ vl_api_sw_interface_set_interface_name_t_handler (
}
out:
+ BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_SW_INTERFACE_SET_INTERFACE_NAME_REPLY);
}
@@ -1214,7 +1209,7 @@ out:
static void
send_interface_tx_placement_details (vnet_hw_if_tx_queue_t **all_queues,
u32 index, vl_api_registration_t *rp,
- u32 native_context)
+ u32 context)
{
vnet_main_t *vnm = vnet_get_main ();
vl_api_sw_interface_tx_placement_details_t *rmp;
@@ -1223,29 +1218,24 @@ send_interface_tx_placement_details (vnet_hw_if_tx_queue_t **all_queues,
uword *bitmap = q[0]->threads;
u32 hw_if_index = q[0]->hw_if_index;
vnet_hw_interface_t *hw_if = vnet_get_hw_interface (vnm, hw_if_index);
- u32 context = clib_host_to_net_u32 (native_context);
n_bits = clib_bitmap_count_set_bits (bitmap);
u32 n = n_bits * sizeof (u32);
- /*
- * FIXME: Use the REPLY_MACRO_DETAILS5_END once endian handler is registered
- * and available.
- */
- REPLY_MACRO_DETAILS5 (
- VL_API_SW_INTERFACE_TX_PLACEMENT_DETAILS, n, rp, context, ({
- rmp->sw_if_index = clib_host_to_net_u32 (hw_if->sw_if_index);
- rmp->queue_id = clib_host_to_net_u32 (q[0]->queue_id);
- rmp->shared = q[0]->shared_queue;
- rmp->array_size = clib_host_to_net_u32 (n_bits);
-
- v = clib_bitmap_first_set (bitmap);
- for (u32 i = 0; i < n_bits; i++)
- {
- rmp->threads[i] = clib_host_to_net_u32 (v);
- v = clib_bitmap_next_set (bitmap, v + 1);
- }
- }));
+ REPLY_MACRO_DETAILS5_END (VL_API_SW_INTERFACE_TX_PLACEMENT_DETAILS, n, rp,
+ context, ({
+ rmp->sw_if_index = hw_if->sw_if_index;
+ rmp->queue_id = q[0]->queue_id;
+ rmp->shared = q[0]->shared_queue;
+ rmp->array_size = n_bits;
+
+ v = clib_bitmap_first_set (bitmap);
+ for (u32 i = 0; i < n_bits; i++)
+ {
+ rmp->threads[i] = v;
+ v = clib_bitmap_next_set (bitmap, v + 1);
+ }
+ }));
}
static void
@@ -1480,12 +1470,10 @@ vl_api_create_subif_t_handler (vl_api_create_subif_t * mp)
BAD_SW_IF_INDEX_LABEL;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_CREATE_SUBIF_REPLY,
({
rmp->sw_if_index = ntohl(sub_sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -1527,12 +1515,10 @@ vl_api_create_loopback_t_handler (vl_api_create_loopback_t * mp)
mac_address_decode (mp->mac_address, &mac);
rv = vnet_create_loopback_interface (&sw_if_index, (u8 *) & mac, 0, 0);
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_CREATE_LOOPBACK_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
}));
- /* *INDENT-ON* */
}
static void vl_api_create_loopback_instance_t_handler
@@ -1549,12 +1535,10 @@ static void vl_api_create_loopback_instance_t_handler
rv = vnet_create_loopback_interface (&sw_if_index, (u8 *) & mac,
is_specified, user_instance);
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_CREATE_LOOPBACK_INSTANCE_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -1608,6 +1592,92 @@ static void
REPLY_MACRO (VL_API_SW_INTERFACE_ADDRESS_REPLACE_END_REPLY);
}
+static void
+vl_api_pcap_set_filter_function_t_handler (
+ vl_api_pcap_set_filter_function_t *mp)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_pcap_t *pp = &vnm->pcap;
+ vl_api_pcap_set_filter_function_reply_t *rmp;
+ unformat_input_t input = { 0 };
+ vlib_is_packet_traced_fn_t *f;
+ char *filter_name;
+ int rv = 0;
+ filter_name = vl_api_from_api_to_new_c_string (&mp->filter_function_name);
+ unformat_init_cstring (&input, filter_name);
+ if (unformat (&input, "%U", unformat_vlib_trace_filter_function, &f) == 0)
+ {
+ rv = -1;
+ goto done;
+ }
+
+ pp->current_filter_function = f;
+
+done:
+ unformat_free (&input);
+ vec_free (filter_name);
+ REPLY_MACRO (VL_API_PCAP_SET_FILTER_FUNCTION_REPLY);
+}
+
+static void
+vl_api_pcap_trace_on_t_handler (vl_api_pcap_trace_on_t *mp)
+{
+ vl_api_pcap_trace_on_reply_t *rmp;
+ unformat_input_t filename, drop_err_name;
+ vnet_pcap_dispatch_trace_args_t capture_args;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ unformat_init_cstring (&filename, (char *) mp->filename);
+ if (!unformat_user (&filename, unformat_vlib_tmpfile,
+ &capture_args.filename))
+ {
+ rv = VNET_API_ERROR_ILLEGAL_NAME;
+ goto out;
+ }
+
+ capture_args.rx_enable = mp->capture_rx;
+ capture_args.tx_enable = mp->capture_tx;
+ capture_args.preallocate_data = mp->preallocate_data;
+ capture_args.free_data = mp->free_data;
+ capture_args.drop_enable = mp->capture_drop;
+ capture_args.status = 0;
+ capture_args.packets_to_capture = ntohl (mp->max_packets);
+ capture_args.sw_if_index = ntohl (mp->sw_if_index);
+ capture_args.filter = mp->filter;
+ capture_args.max_bytes_per_pkt = ntohl (mp->max_bytes_per_packet);
+ capture_args.drop_err = ~0;
+
+ unformat_init_cstring (&drop_err_name, (char *) mp->error);
+ unformat_user (&drop_err_name, unformat_vlib_error, vlib_get_main (),
+ &capture_args.drop_err);
+
+ rv = vnet_pcap_dispatch_trace_configure (&capture_args);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+out:
+ unformat_free (&filename);
+ unformat_free (&drop_err_name);
+
+ REPLY_MACRO (VL_API_PCAP_TRACE_ON_REPLY);
+}
+
+static void
+vl_api_pcap_trace_off_t_handler (vl_api_pcap_trace_off_t *mp)
+{
+ vl_api_pcap_trace_off_reply_t *rmp;
+ vnet_pcap_dispatch_trace_args_t capture_args;
+ int rv = 0;
+
+ clib_memset (&capture_args, 0, sizeof (capture_args));
+
+ rv = vnet_pcap_dispatch_trace_configure (&capture_args);
+
+ REPLY_MACRO (VL_API_PCAP_TRACE_OFF_REPLY);
+}
+
/*
* vpe_api_hookup
* Add vpe's API message handlers to the table.
diff --git a/src/vnet/interface_cli.c b/src/vnet/interface_cli.c
index 3515c395e53..c56eb9777cf 100644
--- a/src/vnet/interface_cli.c
+++ b/src/vnet/interface_cli.c
@@ -54,6 +54,9 @@
#include <vnet/interface/rx_queue_funcs.h>
#include <vnet/interface/tx_queue_funcs.h>
#include <vnet/hash/hash.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/dev_funcs.h>
+
static int
compare_interface_names (void *a1, void *a2)
{
@@ -146,14 +149,12 @@ skip_unformat:
vlib_cli_output (vm, "%U\n", format_vnet_hw_interface, vnm,
hi, verbose);
- /* *INDENT-OFF* */
clib_bitmap_foreach (hw_idx, hi->bond_info)
{
shi = vnet_get_hw_interface(vnm, hw_idx);
vlib_cli_output (vm, "%U\n",
format_vnet_hw_interface, vnm, shi, verbose);
}
- /* *INDENT-ON* */
}
}
}
@@ -247,14 +248,12 @@ clear_hw_interfaces (vlib_main_t * vm,
* cpu socket 0
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_hw_interfaces_command, static) = {
.path = "show hardware-interfaces",
.short_help = "show hardware-interfaces [brief|verbose|detail] [bond] "
"[<interface> [<interface> [..]]] [<sw_idx> [<sw_idx> [..]]]",
.function = show_hw_interfaces,
};
-/* *INDENT-ON* */
/*?
@@ -268,14 +267,12 @@ VLIB_CLI_COMMAND (show_hw_interfaces_command, static) = {
* name and software index (where 2 is the software index):
* @cliexcmd{clear hardware-interfaces GigabitEthernet7/0/0 2}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_hw_interface_counters_command, static) = {
.path = "clear hardware-interfaces",
.short_help = "clear hardware-interfaces "
"[<interface> [<interface> [..]]] [<sw_idx> [<sw_idx> [..]]]",
.function = clear_hw_interfaces,
};
-/* *INDENT-ON* */
static int
sw_interface_name_compare (void *a1, void *a2)
@@ -417,14 +414,12 @@ show_sw_interfaces (vlib_main_t * vm,
sorted_sis =
vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces));
vec_set_len (sorted_sis, 0);
- /* *INDENT-OFF* */
pool_foreach (si, im->sw_interfaces)
{
int visible = vnet_swif_is_api_visible (si);
if (visible)
vec_add1 (sorted_sis, si[0]);
}
- /* *INDENT-ON* */
/* Sort by name. */
vec_sort_with_function (sorted_sis, sw_interface_name_compare);
}
@@ -466,7 +461,6 @@ show_sw_interfaces (vlib_main_t * vm,
/* Display any L2 info */
vlib_cli_output (vm, "%U", format_l2_input, si->sw_if_index);
- /* *INDENT-OFF* */
/* Display any IP4 addressing info */
foreach_ip_interface_address (lm4, ia, si->sw_if_index,
1 /* honor unnumbered */,
@@ -481,9 +475,7 @@ show_sw_interfaces (vlib_main_t * vm,
vlib_cli_output (vm, " L3 %U/%d",
format_ip4_address, r4, ia->address_length);
}));
- /* *INDENT-ON* */
- /* *INDENT-OFF* */
/* Display any IP6 addressing info */
foreach_ip_interface_address (lm6, ia, si->sw_if_index,
1 /* honor unnumbered */,
@@ -498,7 +490,6 @@ show_sw_interfaces (vlib_main_t * vm,
vlib_cli_output (vm, " L3 %U/%d",
format_ip6_address, r6, ia->address_length);
}));
- /* *INDENT-ON* */
}
}
else
@@ -514,29 +505,24 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_sw_interfaces_command, static) = {
.path = "show interface",
- .short_help = "show interface [address|addr|features|feat|vtr] [<interface> [<interface> [..]]] [verbose]",
+ .short_help = "show interface [address|addr|features|feat|vtr|tag] "
+ "[<interface> [<interface> [..]]] [verbose]",
.function = show_sw_interfaces,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/* Root of all interface commands. */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vnet_cli_interface_command, static) = {
.path = "interface",
.short_help = "Interface commands",
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vnet_cli_set_interface_command, static) = {
.path = "set interface",
.short_help = "Interface commands",
};
-/* *INDENT-ON* */
static clib_error_t *
clear_interface_counters (vlib_main_t * vm,
@@ -577,13 +563,11 @@ clear_interface_counters (vlib_main_t * vm,
* Example of how to clear the statistics for all interfaces:
* @cliexcmd{clear interfaces}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_interface_counters_command, static) = {
.path = "clear interfaces",
.short_help = "clear interfaces",
.function = clear_interface_counters,
};
-/* *INDENT-ON* */
/**
* Parse subinterface names.
@@ -908,7 +892,6 @@ done:
* @cliexcmd{set interface GigabitEthernet2/0/0.7 up}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_sub_interfaces_command, static) = {
.path = "create sub-interfaces",
.short_help = "create sub-interfaces <interface> "
@@ -917,7 +900,6 @@ VLIB_CLI_COMMAND (create_sub_interfaces_command, static) = {
"{<subId> dot1q|dot1ad <vlanId>|any [inner-dot1q <vlanId>|any] [exact-match]}",
.function = create_sub_interfaces,
};
-/* *INDENT-ON* */
static clib_error_t *
set_state (vlib_main_t * vm,
@@ -966,13 +948,11 @@ done:
'<em>down</em>':
* @cliexcmd{set interface state GigabitEthernet2/0/0 down}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_state_command, static) = {
.path = "set interface state",
.short_help = "set interface state <interface> [up|down|punt|enable]",
.function = set_state,
};
-/* *INDENT-ON* */
static clib_error_t *
set_unnumbered (vlib_main_t * vm,
@@ -1022,13 +1002,11 @@ set_unnumbered (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_unnumbered_command, static) = {
.path = "set interface unnumbered",
.short_help = "set interface unnumbered [<interface> use <interface> | del <interface>]",
.function = set_unnumbered,
};
-/* *INDENT-ON* */
@@ -1065,13 +1043,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_hw_class_command, static) = {
.path = "set interface hw-class",
.short_help = "Set interface hardware class",
.function = set_hw_class,
};
-/* *INDENT-ON* */
static clib_error_t *
vnet_interface_cli_init (vlib_main_t * vm)
@@ -1115,13 +1091,11 @@ renumber_interface_command_fn (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (renumber_interface_command, static) = {
.path = "renumber interface",
.short_help = "renumber interface <interface> <new-dev-instance>",
.function = renumber_interface_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
promiscuous_cmd (vlib_main_t * vm,
@@ -1151,13 +1125,11 @@ promiscuous_cmd (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_promiscuous_cmd, static) = {
.path = "set interface promiscuous",
.short_help = "set interface promiscuous [on|off] <interface>",
.function = promiscuous_cmd,
};
-/* *INDENT-ON* */
static clib_error_t *
mtu_cmd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
@@ -1208,13 +1180,11 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_mtu_cmd, static) = {
.path = "set interface mtu",
.short_help = "set interface mtu [packet|ip4|ip6|mpls] <value> <interface>",
.function = mtu_cmd,
};
-/* *INDENT-ON* */
static clib_error_t *
show_interface_sec_mac_addr_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -1238,14 +1208,12 @@ show_interface_sec_mac_addr_fn (vlib_main_t * vm, unformat_input_t * input,
sorted_sis =
vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces));
vec_set_len (sorted_sis, 0);
- /* *INDENT-OFF* */
pool_foreach (si, im->sw_interfaces)
{
int visible = vnet_swif_is_api_visible (si);
if (visible)
vec_add1 (sorted_sis, si[0]);
}
- /* *INDENT-ON* */
/* Sort by name. */
vec_sort_with_function (sorted_sis, sw_interface_name_compare);
}
@@ -1286,13 +1254,11 @@ show_interface_sec_mac_addr_fn (vlib_main_t * vm, unformat_input_t * input,
* @cliexstart{show interface secondary-mac-address}
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_interface_sec_mac_addr, static) = {
.path = "show interface secondary-mac-address",
.short_help = "show interface secondary-mac-address [<interface>]",
.function = show_interface_sec_mac_addr_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
interface_add_del_mac_address (vlib_main_t * vm, unformat_input_t * input,
@@ -1360,13 +1326,11 @@ done:
* @cliexcmd{set interface secondary-mac-address GigabitEthernet0/8/0 aa:bb:cc:dd:ee:01 del}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (interface_add_del_mac_address_cmd, static) = {
.path = "set interface secondary-mac-address",
.short_help = "set interface secondary-mac-address <interface> <mac-address> [(add|del)]",
.function = interface_add_del_mac_address,
};
-/* *INDENT-ON* */
static clib_error_t *
set_interface_mac_address (vlib_main_t * vm, unformat_input_t * input,
@@ -1410,13 +1374,11 @@ done:
* @cliexcmd{set interface mac address pg0 aa:bb:cc:dd:ee:04}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_mac_address_cmd, static) = {
.path = "set interface mac address",
.short_help = "set interface mac address <interface> <mac-address>",
.function = set_interface_mac_address,
};
-/* *INDENT-ON* */
static clib_error_t *
set_tag (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
@@ -1435,13 +1397,11 @@ set_tag (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_tag_command, static) = {
.path = "set interface tag",
.short_help = "set interface tag <interface> <tag>",
.function = set_tag,
};
-/* *INDENT-ON* */
static clib_error_t *
clear_tag (vlib_main_t * vm, unformat_input_t * input,
@@ -1459,13 +1419,11 @@ clear_tag (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_tag_command, static) = {
.path = "clear interface tag",
.short_help = "clear interface tag <interface>",
.function = clear_tag,
};
-/* *INDENT-ON* */
static clib_error_t *
set_ip_directed_broadcast (vlib_main_t * vm,
@@ -1499,13 +1457,11 @@ set_ip_directed_broadcast (vlib_main_t * vm,
* subnet broadcast address will be sent L2 broadcast on the interface,
* otherwise it is dropped.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ip_directed_broadcast_command, static) = {
.path = "set interface ip directed-broadcast",
.short_help = "set interface enable <interface> <enable|disable>",
.function = set_ip_directed_broadcast,
};
-/* *INDENT-ON* */
clib_error_t *
set_hw_interface_change_rx_mode (vnet_main_t * vnm, u32 hw_if_index,
@@ -1515,6 +1471,33 @@ set_hw_interface_change_rx_mode (vnet_main_t * vnm, u32 hw_if_index,
clib_error_t *error = 0;
vnet_hw_interface_t *hw;
u32 *queue_indices = 0;
+ vnet_dev_port_t *port;
+
+ port = vnet_dev_get_port_from_hw_if_index (hw_if_index);
+
+ if (port)
+ {
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_rv_t rv;
+
+ vnet_dev_port_cfg_change_req_t req = {
+ .type = mode == VNET_HW_IF_RX_MODE_POLLING ?
+ VNET_DEV_PORT_CFG_RXQ_INTR_MODE_DISABLE :
+ VNET_DEV_PORT_CFG_RXQ_INTR_MODE_ENABLE,
+ .queue_id = queue_id_valid ? queue_id : 0,
+ .all_queues = queue_id_valid ? 0 : 1,
+ };
+
+ if ((rv = vnet_dev_port_cfg_change_req_validate (vm, port, &req)))
+ return vnet_dev_port_err (
+ vm, port, rv, "rx queue interupt mode enable/disable not supported");
+
+ if ((rv = vnet_dev_process_port_cfg_change_req (vm, port, &req)))
+ return vnet_dev_port_err (
+ vm, port, rv,
+ "device failed to enable/disable queue interrupt mode");
+ return 0;
+ }
hw = vnet_get_hw_interface (vnm, hw_if_index);
@@ -1634,13 +1617,11 @@ set_interface_rx_mode (vlib_main_t * vm, unformat_input_t * input,
* VirtualEthernet0/0/13 queue 3 (polling)
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_set_if_rx_mode,static) = {
.path = "set interface rx-mode",
.short_help = "set interface rx-mode <interface> [queue <n>] [polling | interrupt | adaptive]",
.function = set_interface_rx_mode,
};
-/* *INDENT-ON* */
static clib_error_t *
show_interface_rx_placement_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -1706,13 +1687,11 @@ show_interface_rx_placement_fn (vlib_main_t * vm, unformat_input_t * input,
* VirtualEthernet0/0/13 queue 3 (polling)
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_interface_rx_placement, static) = {
.path = "show interface rx-placement",
.short_help = "show interface rx-placement",
.function = show_interface_rx_placement_fn,
};
-/* *INDENT-ON* */
clib_error_t *
set_hw_interface_rx_placement (u32 hw_if_index, u32 queue_id,
u32 thread_index, u8 is_main)
@@ -1837,7 +1816,6 @@ set_interface_rx_placement (vlib_main_t *vm, unformat_input_t *input,
* VirtualEthernet0/0/13 queue 3 (polling)
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_set_if_rx_placement,static) = {
.path = "set interface rx-placement",
.short_help = "set interface rx-placement <interface> [queue <n>] "
@@ -1845,7 +1823,6 @@ VLIB_CLI_COMMAND (cmd_set_if_rx_placement,static) = {
.function = set_interface_rx_placement,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
int
set_hw_interface_tx_queue (u32 hw_if_index, u32 queue_id, uword *bitmap)
@@ -2030,13 +2007,11 @@ done:
* @cliexstart{set interface rss queues VirtualFunctionEthernet18/1/0 list 0,2-5,7}
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_set_interface_rss_queues,static) = {
.path = "set interface rss queues",
.short_help = "set interface rss queues <interface> <list <queue-list>>",
.function = set_interface_rss_queues_fn,
};
-/* *INDENT-ON* */
static u8 *
format_vnet_pcap (u8 * s, va_list * args)
@@ -2384,13 +2359,13 @@ pcap_trace_command_fn (vlib_main_t * vm,
* packet capture are preserved, so '<em>any</em>' can be used to reset
* the interface setting.
*
- * - <b>filter</b> - Use the pcap rx / tx / drop trace filter, which
+ * - <b>filter</b> - Use the pcap trace rx / tx / drop filter, which
* must be configured. Use <b>classify filter pcap...</b> to configure the
* filter. The filter will only be executed if the per-interface or
* any-interface tests fail.
*
* - <b>error <node>.<error></b> - filter packets based on a specific error.
- * For example: error {ip4-udp-lookup}.{No listener for dst port}
+ * For example: error {ip4-udp-lookup}.{no_listener}
*
* - <b>file <name></b> - Used to specify the output filename. The file will
* be placed in the '<em>/tmp</em>' directory, so only the filename is
@@ -2426,7 +2401,6 @@ pcap_trace_command_fn (vlib_main_t * vm,
* saved to /tmp/vppTest.pcap...
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (pcap_tx_trace_command, static) = {
.path = "pcap trace",
@@ -2436,7 +2410,72 @@ VLIB_CLI_COMMAND (pcap_tx_trace_command, static) = {
" [preallocate-data][free-data]",
.function = pcap_trace_command_fn,
};
-/* *INDENT-ON* */
+
+static clib_error_t *
+set_pcap_filter_function (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_pcap_t *pp = &vnet_get_main ()->pcap;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vlib_is_packet_traced_fn_t *res = 0;
+ clib_error_t *error = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != (uword) UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U", unformat_vlib_trace_filter_function,
+ &res))
+ ;
+ else
+ {
+ error = clib_error_create (
+ "expected valid trace filter function, got `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+ pp->current_filter_function = res;
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (set_pcap_filter_function_cli, static) = {
+ .path = "set pcap filter function",
+ .short_help = "set pcap filter function <func_name>",
+ .function = set_pcap_filter_function,
+};
+
+static clib_error_t *
+show_pcap_filter_function (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_pcap_t *pp = &vnet_get_main ()->pcap;
+ vlib_trace_filter_main_t *tfm = &vlib_trace_filter_main;
+ vlib_is_packet_traced_fn_t *current_trace_filter_fn =
+ pp->current_filter_function;
+ vlib_trace_filter_function_registration_t *reg =
+ tfm->trace_filter_registration;
+
+ while (reg)
+ {
+ vlib_cli_output (vm, "%sname:%s description: %s priority: %u",
+ reg->function == current_trace_filter_fn ? "(*) " : "",
+ reg->name, reg->description, reg->priority);
+ reg = reg->next;
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_pcap_filter_function_cli, static) = {
+ .path = "show pcap filter function",
+ .short_help = "show pcap filter function",
+ .function = show_pcap_filter_function,
+};
static clib_error_t *
set_interface_name (vlib_main_t *vm, unformat_input_t *input,
diff --git a/src/vnet/interface_format.c b/src/vnet/interface_format.c
index 0c051dd4757..0eff8c4597c 100644
--- a/src/vnet/interface_format.c
+++ b/src/vnet/interface_format.c
@@ -143,11 +143,9 @@ format_vnet_hw_interface_rss_queues (u8 * s, va_list * args)
if (bitmap)
{
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, bitmap) {
s = format (s, "%u ", i);
}
- /* *INDENT-ON* */
}
return s;
@@ -290,7 +288,7 @@ format_vnet_sw_if_index_name (u8 * s, va_list * args)
if (NULL == si)
{
- return format (s, "DELETED");
+ return format (s, "DELETED (%u)", sw_if_index);
}
return format (s, "%U", format_vnet_sw_interface_name, vnm, si);
}
@@ -305,7 +303,7 @@ format_vnet_hw_if_index_name (u8 * s, va_list * args)
hi = vnet_get_hw_interface (vnm, hw_if_index);
if (hi == 0)
- return format (s, "DELETED");
+ return format (s, "DELETED (%u)", hw_if_index);
return format (s, "%v", hi->name);
}
diff --git a/src/vnet/interface_funcs.h b/src/vnet/interface_funcs.h
index 02d80996a15..511df4920e4 100644
--- a/src/vnet/interface_funcs.h
+++ b/src/vnet/interface_funcs.h
@@ -483,12 +483,14 @@ unformat_function_t unformat_vnet_sw_interface_flags;
format_function_t format_vtr;
/* Node runtime for interface output function. */
+struct vnet_dev_tx_queue;
typedef struct
{
u32 hw_if_index;
u32 sw_if_index;
u32 dev_instance;
- u32 is_deleted;
+ u8 is_deleted;
+ struct vnet_dev_tx_queue *tx_queue;
} vnet_interface_output_runtime_t;
/* Interface output function. */
diff --git a/src/vnet/interface_output.c b/src/vnet/interface_output.c
index a19bbb867c3..47844dcd68a 100644
--- a/src/vnet/interface_output.c
+++ b/src/vnet/interface_output.c
@@ -85,9 +85,8 @@ format_vnet_interface_output_trace (u8 * s, va_list * va)
else
{
si = vnet_get_sw_interface (vnm, t->sw_if_index);
- s =
- format (s, "%U ", format_vnet_sw_interface_name, vnm, si,
- t->flags);
+ s = format (s, "%U flags 0x%08x", format_vnet_sw_interface_name, vnm,
+ si, t->flags);
}
s =
format (s, "\n%U%U", format_white_space, indent,
@@ -1222,7 +1221,6 @@ VLIB_NODE_FN (interface_punt) (vlib_main_t * vm,
return interface_drop_punt (vm, node, frame, VNET_ERROR_DISPOSITION_PUNT);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (interface_drop) = {
.name = "error-drop",
.vector_size = sizeof (u32),
@@ -1233,9 +1231,7 @@ VLIB_REGISTER_NODE (interface_drop) = {
[0] = "drop",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (interface_punt) = {
.name = "error-punt",
.vector_size = sizeof (u32),
@@ -1246,7 +1242,6 @@ VLIB_REGISTER_NODE (interface_punt) = {
[0] = "punt",
},
};
-/* *INDENT-ON* */
VLIB_REGISTER_NODE (vnet_per_buffer_interface_output_node) = {
.name = "interface-output",
diff --git a/src/vnet/interface_stats.c b/src/vnet/interface_stats.c
index 3afde0ea54f..ff1a2af9130 100644
--- a/src/vnet/interface_stats.c
+++ b/src/vnet/interface_stats.c
@@ -170,7 +170,6 @@ VLIB_NODE_FN (stats_collect_tx_node) (vlib_main_t * vm,
return stats_collect_inline (vm, node, frame, VLIB_TX);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (stats_collect_rx_node) = {
.vector_size = sizeof (u32),
.format_trace = format_stats_collect_trace,
@@ -201,7 +200,6 @@ VNET_FEATURE_INIT (stats_collect_tx_node, static) = {
.runs_before = VNET_FEATURES ("interface-output-arc-end"),
};
-/* *INDENT-ON* */
static clib_error_t *
stats_collect_init (vlib_main_t * vm)
diff --git a/src/vnet/interface_test.c b/src/vnet/interface_test.c
index c3ddcd74cc4..2d0c0ee81d1 100644
--- a/src/vnet/interface_test.c
+++ b/src/vnet/interface_test.c
@@ -1283,6 +1283,30 @@ api_sw_interface_set_interface_name (vat_main_t *vam)
return -1;
}
+static int
+api_pcap_set_filter_function (vat_main_t *vam)
+{
+ vl_api_pcap_set_filter_function_t *mp;
+ int ret;
+
+ M (PCAP_SET_FILTER_FUNCTION, mp);
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_pcap_trace_on (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_pcap_trace_off (vat_main_t *vam)
+{
+ return -1;
+}
+
#include <vnet/interface.api_test.c>
/*
diff --git a/src/vnet/ip-neighbor/ip4_neighbor.c b/src/vnet/ip-neighbor/ip4_neighbor.c
index 5a6e8dd154c..61b9e768fe5 100644
--- a/src/vnet/ip-neighbor/ip4_neighbor.c
+++ b/src/vnet/ip-neighbor/ip4_neighbor.c
@@ -187,12 +187,16 @@ ip4_arp_inline (vlib_main_t * vm,
/* resolve the packet's destination */
ip4_header_t *ip0 = vlib_buffer_get_current (p0);
resolve0 = ip0->dst_address;
- src0 = adj0->sub_type.glean.rx_pfx.fp_addr.ip4;
}
else
+ /* resolve the incomplete adj */
+ resolve0 = adj0->sub_type.nbr.next_hop.ip4;
+
+ if (is_glean && adj0->sub_type.glean.rx_pfx.fp_len)
+ /* the glean is for a connected, local prefix */
+ src0 = adj0->sub_type.glean.rx_pfx.fp_addr.ip4;
+ else
{
- /* resolve the incomplete adj */
- resolve0 = adj0->sub_type.nbr.next_hop.ip4;
/* Src IP address in ARP header. */
if (!fib_sas4_get (sw_if_index0, &resolve0, &src0) &&
!ip4_sas_by_sw_if_index (sw_if_index0, &resolve0, &src0))
@@ -270,7 +274,6 @@ VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return (ip4_arp_inline (vm, node, frame, 1));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_arp_node) =
{
.name = "ip4-arp",
@@ -296,7 +299,6 @@ VLIB_REGISTER_NODE (ip4_glean_node) =
[IP4_ARP_NEXT_DROP] = "ip4-drop",
},
};
-/* *INDENT-ON* */
#define foreach_notrace_ip4_arp_error \
_(THROTTLED) \
@@ -328,7 +330,7 @@ ip4_neighbor_main_loop_enter (vlib_main_t * vm)
vlib_thread_main_t *tm = &vlib_thread_main;
u32 n_vlib_mains = tm->n_vlib_mains;
- throttle_init (&arp_throttle, n_vlib_mains, 1e-3);
+ throttle_init (&arp_throttle, n_vlib_mains, THROTTLE_BITS, 1e-3);
return (NULL);
}
diff --git a/src/vnet/ip-neighbor/ip6_neighbor.c b/src/vnet/ip-neighbor/ip6_neighbor.c
index 576ae570c0f..ca8aed3d4ca 100644
--- a/src/vnet/ip-neighbor/ip6_neighbor.c
+++ b/src/vnet/ip-neighbor/ip6_neighbor.c
@@ -217,13 +217,14 @@ ip6_discover_neighbor_inline (vlib_main_t * vm,
* Choose source address based on destination lookup
* adjacency.
*/
- if (!fib_sas6_get (sw_if_index0, &ip0->dst_address, &src) ||
- !ip6_sas_by_sw_if_index (sw_if_index0, &ip0->dst_address, &src))
+ const ip6_address_t *ll = ip6_get_link_local_address (sw_if_index0);
+ if (!ll)
{
/* There is no address on the interface */
p0->error = node->errors[IP6_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS];
continue;
}
+ ip6_address_copy (&src, ll);
b0 = ip6_neighbor_probe (vm, vnm, sw_if_index0, thread_index, &src,
&ip0->dst_address);
@@ -263,7 +264,6 @@ ip6_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
return (ip6_discover_neighbor_inline (vm, node, frame, 1));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_glean_node) =
{
.function = ip6_glean,
@@ -294,7 +294,6 @@ VLIB_REGISTER_NODE (ip6_discover_neighbor_node) =
[IP6_NBR_NEXT_REPLY_TX] = "ip6-rewrite-mcast",
},
};
-/* *INDENT-ON* */
/* Template used to generate IP6 neighbor solicitation packets. */
vlib_packet_template_t ip6_neighbor_packet_template;
@@ -338,7 +337,7 @@ ip6_nd_main_loop_enter (vlib_main_t * vm)
{
vlib_thread_main_t *tm = &vlib_thread_main;
- throttle_init (&nd_throttle, tm->n_vlib_mains, 1e-3);
+ throttle_init (&nd_throttle, tm->n_vlib_mains, THROTTLE_BITS, 1e-3);
return 0;
}
diff --git a/src/vnet/ip-neighbor/ip_neighbor.api b/src/vnet/ip-neighbor/ip_neighbor.api
index a04fcbc569e..24cddd42fab 100644
--- a/src/vnet/ip-neighbor/ip_neighbor.api
+++ b/src/vnet/ip-neighbor/ip_neighbor.api
@@ -20,7 +20,7 @@
called through a shared memory interface.
*/
-option version = "1.0.0";
+option version = "1.0.1";
import "vnet/ip/ip_types.api";
import "vnet/ethernet/ethernet_types.api";
@@ -126,6 +126,40 @@ autoreply define ip_neighbor_config
bool recycle;
};
+/** \brief Get neighbor database configuration per AF
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param af - Address family (v4/v6)
+*/
+define ip_neighbor_config_get
+{
+ option in_progress;
+ u32 client_index;
+ u32 context;
+ vl_api_address_family_t af;
+};
+
+/** \brief Neighbor database configuration reply
+ @param context - sender context, to match reply w/ request
+ @param retval - error (0 is "no error")
+ @param af - Address family (v4/v6)
+ @param max_number - The maximum number of neighbours that will be created
+ @param max_age - The maximum age (in seconds) before an inactive neighbour
+ is flushed
+ @param recycle - If max_number of neighbours is reached and new ones need
+ to be created, should the oldest neighbour be 'recycled'
+*/
+define ip_neighbor_config_get_reply
+{
+ option in_progress;
+ u32 context;
+ i32 retval;
+ vl_api_address_family_t af;
+ u32 max_number;
+ u32 max_age;
+ bool recycle;
+};
+
/** \brief IP neighbour replace begin
The use-case is that, for some unspecified reason, the control plane
diff --git a/src/vnet/ip-neighbor/ip_neighbor.c b/src/vnet/ip-neighbor/ip_neighbor.c
index b33ca8a3538..d340037a15d 100644
--- a/src/vnet/ip-neighbor/ip_neighbor.c
+++ b/src/vnet/ip-neighbor/ip_neighbor.c
@@ -130,7 +130,6 @@ typedef struct ip_neighbor_db_t_
static vlib_log_class_t ipn_logger;
/* DBs of neighbours one per AF */
-/* *INDENT-OFF* */
static ip_neighbor_db_t ip_neighbor_db[N_AF] = {
[AF_IP4] = {
.ipndb_limit = 50000,
@@ -145,7 +144,6 @@ static ip_neighbor_db_t ip_neighbor_db[N_AF] = {
.ipndb_recycle = false,
}
};
-/* *INDENT-ON* */
#define IP_NEIGHBOR_DBG(...) \
vlib_log_debug (ipn_logger, __VA_ARGS__);
@@ -797,7 +795,7 @@ ip_neighbor_cmd (vlib_main_t * vm,
vnet_main_t *vnm = vnet_get_main ();
ip_neighbor_flags_t flags;
u32 sw_if_index = ~0;
- int is_add = 1;
+ int is_add = 1, is_flush = 0;
int count = 1;
flags = IP_NEIGHBOR_FLAG_DYNAMIC;
@@ -811,6 +809,8 @@ ip_neighbor_cmd (vlib_main_t * vm,
;
else if (unformat (input, "delete") || unformat (input, "del"))
is_add = 0;
+ else if (unformat (input, "flush"))
+ is_flush = 1;
else if (unformat (input, "static"))
{
flags |= IP_NEIGHBOR_FLAG_STATIC;
@@ -824,6 +824,13 @@ ip_neighbor_cmd (vlib_main_t * vm,
break;
}
+ if (is_flush)
+ {
+ ip_neighbor_del_all (AF_IP4, sw_if_index);
+ ip_neighbor_del_all (AF_IP6, sw_if_index);
+ return NULL;
+ }
+
if (sw_if_index == ~0 ||
ip_address_is_zero (&ip) || mac_address_is_zero (&mac))
return clib_error_return (0,
@@ -846,11 +853,10 @@ ip_neighbor_cmd (vlib_main_t * vm,
return NULL;
}
-/* *INDENT-OFF* */
/*?
* Add or delete IPv4 ARP cache entries.
*
- * @note 'set ip neighbor' options (e.g. delete, static, 'fib-id <id>',
+ * @note 'set ip neighbor' options (e.g. delete, static,
* 'count <number>', 'interface ip4_addr mac_addr') can be added in
* any order and combination.
*
@@ -859,35 +865,39 @@ ip_neighbor_cmd (vlib_main_t * vm,
* Add or delete IPv4 ARP cache entries as follows. MAC Address can be in
* either aa:bb:cc:dd:ee:ff format or aabb.ccdd.eeff format.
* @cliexcmd{set ip neighbor GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
- * @cliexcmd{set ip neighbor delete GigabitEthernet2/0/0 6.0.0.3 de:ad:be:ef:ba:be}
+ * @cliexcmd{set ip neighbor delete GigabitEthernet2/0/0 6.0.0.3
+ * de:ad:be:ef:ba:be}
*
- * To add or delete an IPv4 ARP cache entry to or from a specific fib
+ * To add or delete an IPv4 ARP cache entry
* table:
- * @cliexcmd{set ip neighbor fib-id 1 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
- * @cliexcmd{set ip neighbor fib-id 1 delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
+ * @cliexcmd{set ip neighbor GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
+ * @cliexcmd{set ip neighbor delete GigabitEthernet2/0/0 6.0.0.3
+ * dead.beef.babe}
*
* Add or delete IPv4 static ARP cache entries as follows:
- * @cliexcmd{set ip neighbor static GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
- * @cliexcmd{set ip neighbor static delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
+ * @cliexcmd{set ip neighbor static GigabitEthernet2/0/0 6.0.0.3
+ * dead.beef.babe}
+ * @cliexcmd{set ip neighbor static delete GigabitEthernet2/0/0 6.0.0.3
+ * dead.beef.babe}
*
* For testing / debugging purposes, the 'set ip neighbor' command can add or
* delete multiple entries. Supply the 'count N' parameter:
- * @cliexcmd{set ip neighbor count 10 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
+ * @cliexcmd{set ip neighbor count 10 GigabitEthernet2/0/0 6.0.0.3
+ * dead.beef.babe}
* @endparblock
?*/
VLIB_CLI_COMMAND (ip_neighbor_command, static) = {
.path = "set ip neighbor",
- .short_help =
- "set ip neighbor [del] <intfc> <ip-address> <mac-address> [static] [no-fib-entry] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
+ .short_help = "set ip neighbor [del] <intfc> <ip-address> <mac-address> "
+ "[static] [no-fib-entry] [count <count>]",
.function = ip_neighbor_cmd,
};
VLIB_CLI_COMMAND (ip_neighbor_command2, static) = {
.path = "ip neighbor",
- .short_help =
- "ip neighbor [del] <intfc> <ip-address> <mac-address> [static] [no-fib-entry] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
+ .short_help = "ip neighbor [del] [flush] <intfc> <ip-address> <mac-address> "
+ "[static] [no-fib-entry] [count <count>]",
.function = ip_neighbor_cmd,
};
-/* *INDENT-ON* */
static int
ip_neighbor_sort (void *a1, void *a2)
@@ -913,7 +923,6 @@ ip_neighbor_entries (u32 sw_if_index, ip_address_family_t af)
index_t *ipnis = NULL;
ip_neighbor_t *ipn;
- /* *INDENT-OFF* */
pool_foreach (ipn, ip_neighbor_pool)
{
if ((sw_if_index == ~0 ||
@@ -923,7 +932,6 @@ ip_neighbor_entries (u32 sw_if_index, ip_address_family_t af)
vec_add1 (ipnis, ip_neighbor_get_index(ipn));
}
- /* *INDENT-ON* */
if (ipnis)
vec_sort_with_function (ipnis, ip_neighbor_sort);
@@ -943,7 +951,6 @@ ip_neighbor_show_sorted_i (vlib_main_t * vm,
vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Time", "IP",
"Flags", "Ethernet", "Interface");
- /* *INDENT-OFF*/
/* the list is time sorted, newest first, so start from the back
* and work forwards. Stop when we get to one that is alive */
clib_llist_foreach_reverse(ip_neighbor_elt_pool,
@@ -951,7 +958,6 @@ ip_neighbor_show_sorted_i (vlib_main_t * vm,
({
vlib_cli_output (vm, "%U", format_ip_neighbor, elt->ipne_index);
}));
- /* *INDENT-ON*/
return (NULL);
}
@@ -1033,7 +1039,6 @@ ip4_neighbor_show_sorted (vlib_main_t * vm,
* Fib_index 0 6.0.0.1 - 6.0.0.11
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip_neighbors_cmd_node, static) = {
.path = "show ip neighbors",
.function = ip_neighbor_show,
@@ -1074,7 +1079,6 @@ VLIB_CLI_COMMAND (show_ip6_neighbor_sorted_cmd_node, static) = {
.function = ip6_neighbor_show_sorted,
.short_help = "show ip6 neighbor-sorted",
};
-/* *INDENT-ON* */
static ip_neighbor_vft_t ip_nbr_vfts[N_AF];
@@ -1124,13 +1128,11 @@ ip_neighbor_walk (ip_address_family_t af,
vec_foreach (hash, ip_neighbor_db[af].ipndb_hash)
{
- /* *INDENT-OFF* */
hash_foreach (key, ipni, *hash,
({
if (WALK_STOP == cb (ipni, ctx))
break;
}));
- /* *INDENT-ON* */
}
}
else
@@ -1141,13 +1143,11 @@ ip_neighbor_walk (ip_address_family_t af,
return;
hash = ip_neighbor_db[af].ipndb_hash[sw_if_index];
- /* *INDENT-OFF* */
hash_foreach (key, ipni, hash,
({
if (WALK_STOP == cb (ipni, ctx))
break;
}));
- /* *INDENT-ON* */
}
}
@@ -1226,14 +1226,12 @@ ip_neighbor_populate (ip_address_family_t af, u32 sw_if_index)
format_vnet_sw_if_index_name, vnet_get_main (),
sw_if_index, format_ip_address_family, af);
- /* *INDENT-OFF* */
pool_foreach (ipn, ip_neighbor_pool)
{
if (ip_neighbor_get_af(ipn) == af &&
ipn->ipn_key->ipnk_sw_if_index == sw_if_index)
vec_add1 (ipnis, ipn - ip_neighbor_pool);
}
- /* *INDENT-ON* */
vec_foreach (ipni, ipnis)
{
@@ -1259,7 +1257,6 @@ ip_neighbor_flush (ip_address_family_t af, u32 sw_if_index)
format_vnet_sw_if_index_name, vnet_get_main (),
sw_if_index, format_ip_address_family, af);
- /* *INDENT-OFF* */
pool_foreach (ipn, ip_neighbor_pool)
{
if (ip_neighbor_get_af(ipn) == af &&
@@ -1267,7 +1264,6 @@ ip_neighbor_flush (ip_address_family_t af, u32 sw_if_index)
ip_neighbor_is_dynamic (ipn))
vec_add1 (ipnis, ipn - ip_neighbor_pool);
}
- /* *INDENT-ON* */
vec_foreach (ipni, ipnis) ip_neighbor_destroy (ip_neighbor_get (*ipni));
vec_free (ipnis);
@@ -1447,7 +1443,6 @@ ip_neighbor_add_del_interface_address_v4 (ip4_main_t * im,
if (is_del)
{
- /* *INDENT-OFF* */
ip_neighbor_walk_covered_ctx_t ctx = {
.addr = {
.ip.ip4 = *address,
@@ -1455,7 +1450,6 @@ ip_neighbor_add_del_interface_address_v4 (ip4_main_t * im,
},
.length = address_length,
};
- /* *INDENT-ON* */
index_t *ipni;
ip_neighbor_walk (AF_IP4, sw_if_index, ip_neighbor_walk_covered, &ctx);
@@ -1489,7 +1483,6 @@ ip_neighbor_add_del_interface_address_v6 (ip6_main_t * im,
if (is_del)
{
- /* *INDENT-OFF* */
ip_neighbor_walk_covered_ctx_t ctx = {
.addr = {
.ip.ip6 = *address,
@@ -1497,7 +1490,6 @@ ip_neighbor_add_del_interface_address_v6 (ip6_main_t * im,
},
.length = address_length,
};
- /* *INDENT-ON* */
index_t *ipni;
ip_neighbor_walk (AF_IP6, sw_if_index, ip_neighbor_walk_covered, &ctx);
@@ -1593,8 +1585,8 @@ ip_neighbour_age_out (index_t ipni, f64 now, f64 * wait)
}
else
{
- ip_neighbor_probe_dst (ip_neighbor_get_sw_if_index (ipn), af,
- vlib_get_thread_index (),
+ ip_neighbor_probe_dst (ip_neighbor_get_sw_if_index (ipn),
+ vlib_get_thread_index (), af,
&ip_addr_46 (&ipn->ipn_key->ipnk_ip));
ipn->ipn_n_probes++;
@@ -1653,7 +1645,6 @@ ip_neighbor_age_loop (vlib_main_t * vm,
head = pool_elt_at_index (ip_neighbor_elt_pool,
ip_neighbor_list_head[af]);
- /* *INDENT-OFF*/
/* the list is time sorted, newest first, so start from the back
* and work forwards. Stop when we get to one that is alive */
restart:
@@ -1678,7 +1669,6 @@ ip_neighbor_age_loop (vlib_main_t * vm,
timeout = clib_min (wait, timeout);
}));
- /* *INDENT-ON* */
break;
}
case IP_NEIGHBOR_AGE_PROCESS_WAKEUP:
@@ -1725,7 +1715,6 @@ ip6_neighbor_age_process (vlib_main_t * vm,
return (ip_neighbor_age_loop (vm, rt, f, AF_IP6));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_neighbor_age_process_node,static) = {
.function = ip4_neighbor_age_process,
.type = VLIB_NODE_TYPE_PROCESS,
@@ -1736,7 +1725,6 @@ VLIB_REGISTER_NODE (ip6_neighbor_age_process_node,static) = {
.type = VLIB_NODE_TYPE_PROCESS,
.name = "ip6-neighbor-age-process",
};
-/* *INDENT-ON* */
int
ip_neighbor_config (ip_address_family_t af, u32 limit, u32 age, bool recycle)
@@ -1754,13 +1742,23 @@ ip_neighbor_config (ip_address_family_t af, u32 limit, u32 age, bool recycle)
return (0);
}
+int
+ip_neighbor_get_config (ip_address_family_t af, u32 *limit, u32 *age,
+ bool *recycle)
+{
+ *limit = ip_neighbor_db[af].ipndb_limit;
+ *age = ip_neighbor_db[af].ipndb_age;
+ *recycle = ip_neighbor_db[af].ipndb_recycle;
+
+ return (0);
+}
+
static clib_error_t *
ip_neighbor_config_show (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
ip_address_family_t af;
- /* *INDENT-OFF* */
FOR_EACH_IP_ADDRESS_FAMILY(af) {
vlib_cli_output (vm, "%U:", format_ip_address_family, af);
vlib_cli_output (vm, " limit:%d, age:%d, recycle:%d",
@@ -1769,7 +1767,6 @@ ip_neighbor_config_show (vlib_main_t * vm,
ip_neighbor_db[af].ipndb_recycle);
}
- /* *INDENT-ON* */
return (NULL);
}
@@ -1861,7 +1858,6 @@ ip_neighbor_stats_show (vlib_main_t *vm, unformat_input_t *input,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip_neighbor_cfg_cmd_node, static) = {
.path = "show ip neighbor-config",
.function = ip_neighbor_config_show,
@@ -1878,7 +1874,6 @@ VLIB_CLI_COMMAND (show_ip_neighbor_stats_cmd_node, static) = {
.function = ip_neighbor_stats_show,
.short_help = "show ip neighbor-stats [interface]",
};
-/* *INDENT-ON* */
static clib_error_t *
ip_neighbor_init (vlib_main_t * vm)
@@ -1918,12 +1913,10 @@ ip_neighbor_init (vlib_main_t * vm)
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip_neighbor_init) =
{
.runs_after = VLIB_INITS("ip_main_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip-neighbor/ip_neighbor.h b/src/vnet/ip-neighbor/ip_neighbor.h
index 8c07df86ba8..cc888ba2054 100644
--- a/src/vnet/ip-neighbor/ip_neighbor.h
+++ b/src/vnet/ip-neighbor/ip_neighbor.h
@@ -36,6 +36,8 @@ extern int ip_neighbor_del (const ip_address_t * ip, u32 sw_if_index);
extern int ip_neighbor_config (ip_address_family_t af,
u32 limit, u32 age, bool recycle);
+extern int ip_neighbor_get_config (ip_address_family_t af, u32 *limit,
+ u32 *age, bool *recycle);
extern void ip_neighbor_del_all (ip_address_family_t af, u32 sw_if_index);
diff --git a/src/vnet/ip-neighbor/ip_neighbor_api.c b/src/vnet/ip-neighbor/ip_neighbor_api.c
index 81af86211de..2297546f111 100644
--- a/src/vnet/ip-neighbor/ip_neighbor_api.c
+++ b/src/vnet/ip-neighbor/ip_neighbor_api.c
@@ -234,12 +234,10 @@ vl_api_ip_neighbor_add_del_t_handler (vl_api_ip_neighbor_add_del_t * mp,
BAD_SW_IF_INDEX_LABEL;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IP_NEIGHBOR_ADD_DEL_REPLY,
({
rmp->stats_index = htonl (stats_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -314,6 +312,32 @@ vl_api_ip_neighbor_config_t_handler (vl_api_ip_neighbor_config_t * mp)
}
static void
+vl_api_ip_neighbor_config_get_t_handler (vl_api_ip_neighbor_config_get_t *mp)
+{
+ vl_api_ip_neighbor_config_get_reply_t *rmp;
+ int rv;
+ ip_address_family_t af = AF_IP4;
+ u32 max_number = ~0;
+ u32 max_age = ~0;
+ bool recycle = false;
+
+ rv = ip_address_family_decode (mp->af, &af);
+
+ if (!rv)
+ rv = ip_neighbor_get_config (af, &max_number, &max_age, &recycle);
+
+ // clang-format off
+ REPLY_MACRO2 (VL_API_IP_NEIGHBOR_CONFIG_GET_REPLY,
+ ({
+ rmp->af = ip_address_family_encode (af);
+ rmp->max_number = htonl (max_number);
+ rmp->max_age = htonl (max_age);
+ rmp->recycle = recycle;
+ }));
+ // clang-format on
+}
+
+static void
vl_api_ip_neighbor_replace_begin_t_handler (vl_api_ip_neighbor_replace_begin_t
* mp)
{
diff --git a/src/vnet/ip-neighbor/ip_neighbor_watch.c b/src/vnet/ip-neighbor/ip_neighbor_watch.c
index 72908f4e613..74f450114e1 100644
--- a/src/vnet/ip-neighbor/ip_neighbor_watch.c
+++ b/src/vnet/ip-neighbor/ip_neighbor_watch.c
@@ -66,13 +66,11 @@ ip_neighbor_event_process (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip_neighbor_event_process_node) = {
.function = ip_neighbor_event_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "ip-neighbor-event",
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -84,7 +82,6 @@ want_ip_neighbor_events_reaper (u32 client_index)
i32 pos;
/* walk the entire IP neighbour DB and removes the client's registrations */
- /* *INDENT-OFF* */
mhash_foreach(key, v, &ipnw_db.ipnwdb_hash,
({
watchers = (ip_neighbor_watcher_t*) *v;
@@ -97,7 +94,6 @@ want_ip_neighbor_events_reaper (u32 client_index)
if (vec_len(watchers) == 0)
vec_add1 (empty_keys, *key);
}));
- /* *INDENT-OFF* */
vec_foreach (key, empty_keys)
mhash_unset (&ipnw_db.ipnwdb_hash, key, NULL);
@@ -236,7 +232,6 @@ ip_neighbor_watchers_show (vlib_main_t * vm,
ip_neighbor_key_t *key;
uword *v;
- /* *INDENT-OFF* */
mhash_foreach(key, v, &ipnw_db.ipnwdb_hash,
({
watchers = (ip_neighbor_watcher_t*) *v;
@@ -247,17 +242,14 @@ ip_neighbor_watchers_show (vlib_main_t * vm,
vec_foreach (watcher, watchers)
vlib_cli_output (vm, " %U", format_ip_neighbor_watcher, watcher);
}));
- /* *INDENT-ON* */
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip_neighbor_watchers_cmd_node, static) = {
.path = "show ip neighbor-watcher",
.function = ip_neighbor_watchers_show,
.short_help = "show ip neighbors-watcher",
};
-/* *INDENT-ON* */
static clib_error_t *
ip_neighbor_watch_init (vlib_main_t * vm)
@@ -267,12 +259,10 @@ ip_neighbor_watch_init (vlib_main_t * vm)
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip_neighbor_watch_init) =
{
.runs_after = VLIB_INITS("ip_neighbor_init"),
};
-/* *INDENT-ON* */
/*
diff --git a/src/vnet/ip/icmp4.c b/src/vnet/ip/icmp4.c
index 318081b9c9f..fa4a0e12276 100644
--- a/src/vnet/ip/icmp4.c
+++ b/src/vnet/ip/icmp4.c
@@ -204,7 +204,6 @@ ip4_icmp_input (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_icmp_input_node) = {
.function = ip4_icmp_input,
.name = "ip4-icmp-input",
@@ -221,7 +220,6 @@ VLIB_REGISTER_NODE (ip4_icmp_input_node) = {
[ICMP_INPUT_NEXT_ERROR] = "ip4-punt",
},
};
-/* *INDENT-ON* */
typedef enum
{
@@ -318,13 +316,14 @@ ip4_icmp_error (vlib_main_t * vm,
sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
- vlib_buffer_copy_trace_flag (vm, p0, pi0);
+ vlib_buffer_copy_trace_flag (vm, org_p0, pi0);
/* Add IP header and ICMPv4 header including a 4 byte data field */
vlib_buffer_advance (p0,
-sizeof (ip4_header_t) -
sizeof (icmp46_header_t) - 4);
+ p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
p0->current_length =
p0->current_length > 576 ? 576 : p0->current_length;
out_ip0 = vlib_buffer_get_current (p0);
@@ -342,7 +341,7 @@ ip4_icmp_error (vlib_main_t * vm,
/* Prefer a source address from "offending interface" */
if (!ip4_sas_by_sw_if_index (sw_if_index0, &out_ip0->dst_address,
&out_ip0->src_address))
- { /* interface has no IP6 address - should not happen */
+ { /* interface has no IP4 address - should not happen */
next0 = IP4_ICMP_ERROR_NEXT_DROP;
error0 = ICMP4_ERROR_DROP;
}
@@ -387,7 +386,6 @@ ip4_icmp_error (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_icmp_error_node) = {
.function = ip4_icmp_error,
.name = "ip4-icmp-error",
@@ -404,7 +402,6 @@ VLIB_REGISTER_NODE (ip4_icmp_error_node) = {
.format_trace = format_icmp_input_trace,
};
-/* *INDENT-ON* */
static uword
@@ -590,7 +587,7 @@ icmp4_init (vlib_main_t * vm)
vlib_thread_main_t *tm = &vlib_thread_main;
u32 n_vlib_mains = tm->n_vlib_mains;
- throttle_init (&icmp_throttle, n_vlib_mains, 1e-3);
+ throttle_init (&icmp_throttle, n_vlib_mains, THROTTLE_BITS, 1e-5);
return 0;
}
diff --git a/src/vnet/ip/icmp46_packet.h b/src/vnet/ip/icmp46_packet.h
index 0545046fe60..08e73f6cd7d 100644
--- a/src/vnet/ip/icmp46_packet.h
+++ b/src/vnet/ip/icmp46_packet.h
@@ -187,7 +187,6 @@ typedef enum
#undef _
} icmp6_code_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
u8 type;
@@ -195,7 +194,6 @@ typedef CLIB_PACKED (struct
/* IP checksum of icmp header plus data which follows. */
u16 checksum;
}) icmp46_header_t;
-/* *INDENT-ON* */
/* ip6 neighbor discovery */
#define foreach_icmp6_neighbor_discovery_option \
@@ -238,7 +236,6 @@ typedef enum icmp6_neighbor_discovery_option_type
#undef _
} icmp6_neighbor_discovery_option_type_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
/* Option type. */
@@ -357,6 +354,5 @@ typedef CLIB_PACKED (struct
icmp6_neighbor_discovery_ethernet_link_layer_address_option_t
link_layer_option;
}) icmp6_neighbor_solicitation_header_t;
-/* *INDENT-ON* */
#endif /* included_vnet_icmp46_packet_h */
diff --git a/src/vnet/ip/icmp6.c b/src/vnet/ip/icmp6.c
index 4cabc0e083f..b095f679cc8 100644
--- a/src/vnet/ip/icmp6.c
+++ b/src/vnet/ip/icmp6.c
@@ -235,7 +235,6 @@ ip6_icmp_input (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_icmp_input_node) = {
.function = ip6_icmp_input,
.name = "ip6-icmp-input",
@@ -252,7 +251,6 @@ VLIB_REGISTER_NODE (ip6_icmp_input_node) = {
[ICMP_INPUT_NEXT_PUNT] = "ip6-punt",
},
};
-/* *INDENT-ON* */
typedef enum
{
@@ -359,14 +357,13 @@ ip6_icmp_error (vlib_main_t * vm,
sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
- vlib_buffer_copy_trace_flag (vm, p0, pi0);
+ vlib_buffer_copy_trace_flag (vm, org_p0, pi0);
/* Add IP header and ICMPv6 header including a 4 byte data field */
vlib_buffer_advance (p0,
-(sizeof (ip6_header_t) +
sizeof (icmp46_header_t) + 4));
- vnet_buffer (p0)->sw_if_index[VLIB_TX] = ~0;
p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
p0->current_length =
p0->current_length > 1280 ? 1280 : p0->current_length;
@@ -427,7 +424,6 @@ ip6_icmp_error (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_icmp_error_node) = {
.function = ip6_icmp_error,
.name = "ip6-icmp-error",
@@ -444,7 +440,6 @@ VLIB_REGISTER_NODE (ip6_icmp_error_node) = {
.format_trace = format_icmp6_input_trace,
};
-/* *INDENT-ON* */
static uword
@@ -644,7 +639,7 @@ icmp6_init (vlib_main_t * vm)
vlib_thread_main_t *tm = &vlib_thread_main;
u32 n_vlib_mains = tm->n_vlib_mains;
- throttle_init (&icmp_throttle, n_vlib_mains, 1e-3);
+ throttle_init (&icmp_throttle, n_vlib_mains, THROTTLE_BITS, 1e-3);
return (NULL);
}
diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api
index 23e094b48a0..967f56cf917 100644
--- a/src/vnet/ip/ip.api
+++ b/src/vnet/ip/ip.api
@@ -366,6 +366,41 @@ autoreply define set_ip_flow_hash_v2
vl_api_ip_flow_hash_config_t flow_hash_config;
};
+/**
+ @brief flow hash settings for an IP table
+ @param src - include src in flow hash
+ @param dst - include dst in flow hash
+ @param sport - include sport in flow hash
+ @param dport - include dport in flow hash
+ @param proto - include proto in flow hash
+ @param reverse - include reverse in flow hash
+ @param symmetric - include symmetry in flow hash
+ @param flowlabel - include flowlabel in flow hash
+ @param gtpv1teid - include gtpv1teid in flow hash
+*/
+enumflag ip_flow_hash_config_v2
+{
+ IP_API_V2_FLOW_HASH_SRC_IP = 0x01,
+ IP_API_V2_FLOW_HASH_DST_IP = 0x02,
+ IP_API_V2_FLOW_HASH_SRC_PORT = 0x04,
+ IP_API_V2_FLOW_HASH_DST_PORT = 0x08,
+ IP_API_V2_FLOW_HASH_PROTO = 0x10,
+ IP_API_V2_FLOW_HASH_REVERSE = 0x20,
+ IP_API_V2_FLOW_HASH_SYMETRIC = 0x40,
+ IP_API_V2_FLOW_HASH_FLOW_LABEL = 0x80,
+ IP_API_V2_FLOW_HASH_GTPV1_TEID = 0x100,
+};
+
+autoreply define set_ip_flow_hash_v3
+{
+ u32 client_index;
+ u32 context;
+ u32 table_id;
+ vl_api_address_family_t af;
+ vl_api_ip_flow_hash_config_v2_t flow_hash_config;
+ option status="in_progress";
+};
+
/** \brief Set the ip flow hash router ID
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -587,6 +622,7 @@ typedef punt_redirect
autoreply define ip_punt_redirect
{
option deprecated;
+
u32 client_index;
u32 context;
vl_api_punt_redirect_t punt;
@@ -595,6 +631,8 @@ autoreply define ip_punt_redirect
define ip_punt_redirect_dump
{
+ option deprecated;
+
u32 client_index;
u32 context;
vl_api_interface_index_t sw_if_index;
@@ -603,6 +641,8 @@ define ip_punt_redirect_dump
define ip_punt_redirect_details
{
+ option deprecated;
+
u32 context;
vl_api_punt_redirect_t punt;
};
@@ -1020,6 +1060,12 @@ counters ip4 {
units "packets";
description "ip4 ttl <= 1";
};
+ hdr_too_short {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 IHL < 5";
+ };
/* Errors signalled by ip4-rewrite. */
mtu_exceeded {
diff --git a/src/vnet/ip/ip.c b/src/vnet/ip/ip.c
index 0a602b43ac7..586f7dfbc85 100644
--- a/src/vnet/ip/ip.c
+++ b/src/vnet/ip/ip.c
@@ -118,7 +118,6 @@ ip_set (ip46_address_t * dst, void *src, u8 is_ip4)
sizeof (ip6_address_t));
}
-/* *INDENT-OFF* */
static const char *ip_arc_names[N_IP_FEATURE_LOCATIONS][N_AF][N_SAFI] = {
[IP_FEATURE_INPUT] = {
[AF_IP4] = {
@@ -171,7 +170,6 @@ static const char *ip_arc_names[N_IP_FEATURE_LOCATIONS][N_AF][N_SAFI] = {
},
},
};
-/* *INDENT-ON* */
void
ip_feature_enable_disable (ip_address_family_t af,
@@ -203,7 +201,8 @@ ip_feature_enable_disable (ip_address_family_t af,
}
int
-ip_flow_hash_set (ip_address_family_t af, u32 table_id, u32 flow_hash_config)
+ip_flow_hash_set (ip_address_family_t af, u32 table_id,
+ flow_hash_config_t flow_hash_config)
{
fib_protocol_t fproto;
u32 fib_index;
diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h
index e969594ec00..45d07c2e0f6 100644
--- a/src/vnet/ip/ip4.h
+++ b/src/vnet/ip/ip4.h
@@ -211,7 +211,6 @@ ip4_interface_address_matching_destination (ip4_main_t * im,
ip_interface_address_t *ia;
ip4_address_t *result = 0;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm, ia, sw_if_index,
1 /* honor unnumbered */,
({
@@ -222,7 +221,6 @@ ip4_interface_address_matching_destination (ip4_main_t * im,
break;
}
}));
- /* *INDENT-ON* */
if (result_ia)
*result_ia = result ? ia : 0;
return result;
diff --git a/src/vnet/ip/ip46_address.h b/src/vnet/ip/ip46_address.h
index f726178ee63..90f766464f6 100644
--- a/src/vnet/ip/ip46_address.h
+++ b/src/vnet/ip/ip46_address.h
@@ -34,7 +34,6 @@ typedef enum
extern u8 *format_ip46_type (u8 * s, va_list * args);
-/* *INDENT-OFF* */
typedef CLIB_PACKED (union ip46_address_t_ {
struct {
u32 pad[3];
@@ -44,7 +43,6 @@ typedef CLIB_PACKED (union ip46_address_t_ {
u8 as_u8[16];
u64 as_u64[2];
}) ip46_address_t;
-/* *INDENT-ON* */
format_function_t format_ip46_address;
diff --git a/src/vnet/ip/ip46_cli.c b/src/vnet/ip/ip46_cli.c
index f58be898d9b..e3da27914bd 100644
--- a/src/vnet/ip/ip46_cli.c
+++ b/src/vnet/ip/ip46_cli.c
@@ -71,12 +71,10 @@ ip6_address_compare (ip6_address_t * a1, ip6_address_t * a2)
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_command, static) = {
.path = "set interface ip",
.short_help = "IP4/IP6 commands",
};
-/* *INDENT-ON* */
void
ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index)
@@ -90,7 +88,6 @@ ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index)
ip_interface_address_t *ia;
int i;
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im4->lookup_main, ia, sw_if_index,
0 /* honor unnumbered */,
({
@@ -99,9 +96,7 @@ ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index)
vec_add1 (ip4_addrs, x[0]);
vec_add1 (ip4_masks, ia->address_length);
}));
- /* *INDENT-ON* */
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im6->lookup_main, ia, sw_if_index,
0 /* honor unnumbered */,
({
@@ -110,7 +105,6 @@ ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index)
vec_add1 (ip6_addrs, x[0]);
vec_add1 (ip6_masks, ia->address_length);
}));
- /* *INDENT-ON* */
for (i = 0; i < vec_len (ip4_addrs); i++)
ip4_add_del_interface_address (vm, sw_if_index, &ip4_addrs[i],
@@ -212,13 +206,11 @@ done:
* @cliexcmd{set interface ip address del GigabitEthernet2/0/0 all}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_address_command, static) = {
.path = "set interface ip address",
.function = add_del_ip_address,
.short_help = "set interface ip address [del] <interface> <ip-addr>/<mask> | [all]",
};
-/* *INDENT-ON* */
static clib_error_t *
set_reassembly_command_fn (vlib_main_t * vm,
@@ -294,13 +286,11 @@ set_reassembly_command_fn (vlib_main_t * vm,
return NULL;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_reassembly_command, static) = {
.path = "set interface reassembly",
.short_help = "set interface reassembly <interface-name> [on|off|ip4|ip6]",
.function = set_reassembly_command_fn,
};
-/* *INDENT-ON* */
/* Dummy init function to get us linked in. */
static clib_error_t *
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index 826fa573e9c..ff74b52eb18 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -103,7 +103,6 @@ VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_lookup_node) =
{
.name = "ip4-lookup",
@@ -112,7 +111,6 @@ VLIB_REGISTER_NODE (ip4_lookup_node) =
.n_next_nodes = IP_LOOKUP_N_NEXT,
.next_nodes = IP4_LOOKUP_NEXT_NODES,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -268,7 +266,6 @@ VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_load_balance_node) =
{
.name = "ip4-load-balance",
@@ -276,7 +273,6 @@ VLIB_REGISTER_NODE (ip4_load_balance_node) =
.sibling_of = "ip4-lookup",
.format_trace = format_ip4_lookup_trace,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
/* get first interface address */
@@ -288,7 +284,6 @@ ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
ip_interface_address_t *ia = 0;
ip4_address_t *result = 0;
- /* *INDENT-OFF* */
foreach_ip_interface_address
(lm, ia, sw_if_index,
1 /* honor unnumbered */ ,
@@ -298,7 +293,6 @@ ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
result = a;
break;
}));
- /* *INDENT-OFF* */
if (result_ia)
*result_ia = result ? ia : 0;
return result;
@@ -671,7 +665,6 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm,
* subnets on interfaces. Easy fix - disallow overlapping subnets, like
* most routers do.
*/
- /* *INDENT-OFF* */
if (!is_del)
{
/* When adding an address check that it does not conflict
@@ -732,7 +725,6 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm,
}
}
}
- /* *INDENT-ON* */
if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
@@ -853,7 +845,6 @@ ip4_directed_broadcast (u32 sw_if_index, u8 enable)
* when directed broadcast is enabled, the subnet braodcast route will forward
* packets using an adjacency with a broadcast MAC. otherwise it drops
*/
- /* *INDENT-OFF* */
foreach_ip_interface_address(&im->lookup_main, ia,
sw_if_index, 0,
({
@@ -877,7 +868,6 @@ ip4_directed_broadcast (u32 sw_if_index, u8 enable)
&pfx, sw_if_index);
}
}));
- /* *INDENT-ON* */
}
#endif
@@ -897,7 +887,6 @@ ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
0 /* honor unnumbered */,
({
@@ -911,7 +900,6 @@ ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
im, fib_index,
a, ia->address_length);
}));
- /* *INDENT-ON* */
return 0;
}
@@ -919,7 +907,6 @@ ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
/* Built-in ip4 unicast rx feature path definition */
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
{
.arc_name = "ip4-unicast",
@@ -1058,7 +1045,6 @@ VNET_FEATURE_INIT (ip4_interface_output, static) =
.node_name = "interface-output",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
static clib_error_t *
ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
@@ -1083,13 +1069,11 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
vlib_main_t *vm = vlib_get_main ();
vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
({
address = ip_interface_address_get_address (lm4, ia);
ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
}));
- /* *INDENT-ON* */
ip4_mfib_interface_enable_disable (sw_if_index, 0);
if (0 != im4->fib_index_by_sw_if_index[sw_if_index])
@@ -1206,9 +1190,11 @@ format_ip4_forward_next_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
u32 indent = format_get_indent (s);
- s = format (s, "%U%U",
- format_white_space, indent,
- format_ip4_header, t->packet_data, sizeof (t->packet_data));
+
+ s = format (s, "%Ufib:%d adj:%d flow:0x%08x", format_white_space, indent,
+ t->fib_index, t->dpo_index, t->flow_hash);
+ s = format (s, "\n%U%U", format_white_space, indent, format_ip4_header,
+ t->packet_data, sizeof (t->packet_data));
return s;
}
#endif
@@ -1397,13 +1383,11 @@ ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
}
#endif
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (ip4_local) = {
.arc_name = "ip4-local",
.start_nodes = VNET_FEATURES ("ip4-local", "ip4-receive"),
.last_in_arc = "ip4-local-end-of-arc",
};
-/* *INDENT-ON* */
static inline void
ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
@@ -1479,10 +1463,10 @@ ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
|| ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
{
- if (is_tcp_udp[0])
+ if (is_tcp_udp[0] && !ip4_local_csum_is_offloaded (b[0]))
ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
&good_tcp_udp[0]);
- if (is_tcp_udp[1])
+ if (is_tcp_udp[1] && !ip4_local_csum_is_offloaded (b[1]))
ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
&good_tcp_udp[1]);
}
@@ -1989,14 +1973,12 @@ show_ip_local_command_fn (vlib_main_t * vm,
* 47
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip_local, static) =
{
.path = "show ip local",
.function = show_ip_local_command_fn,
.short_help = "show ip local",
};
-/* *INDENT-ON* */
typedef enum
{
@@ -2243,9 +2225,6 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
adj0->ia_cfg_index);
next[0] = next_index;
- if (is_midchain)
- vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
- 0 /* is_ip6 */ );
}
else
{
@@ -2268,9 +2247,6 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
&next_index, b[1],
adj1->ia_cfg_index);
next[1] = next_index;
- if (is_midchain)
- vnet_calc_checksums_inline (vm, b[1], 1 /* is_ip4 */ ,
- 0 /* is_ip6 */ );
}
else
{
@@ -2420,9 +2396,6 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (is_midchain)
{
- vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
- 0 /* is_ip6 */ );
-
/* Guess we are only writing on ipv4 header. */
vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
}
@@ -2526,10 +2499,6 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (is_midchain)
{
- /* this acts on the packet that is about to be encapped */
- vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
- 0 /* is_ip6 */ );
-
/* Guess we are only writing on ipv4 header. */
vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
}
@@ -2656,7 +2625,6 @@ VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_rewrite_node) = {
.name = "ip4-rewrite",
.vector_size = sizeof (u32),
@@ -2701,7 +2669,6 @@ VLIB_REGISTER_NODE (ip4_midchain_node) = {
.format_trace = format_ip4_rewrite_trace,
.sibling_of = "ip4-rewrite",
};
-/* *INDENT-ON */
static clib_error_t *
set_ip_flow_hash_command_fn (vlib_main_t * vm,
@@ -2833,15 +2800,12 @@ set_ip_flow_hash_command_fn (vlib_main_t * vm,
* [0] [@0]: dpo-drop ip6
* @cliexend
?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
-{
+VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
.path = "set ip flow-hash",
- .short_help =
- "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
+ .short_help = "set ip flow-hash table <table-id> [src] [dst] [sport] "
+ "[dport] [proto] [reverse] [gtpv1teid]",
.function = set_ip_flow_hash_command_fn,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
int
@@ -2958,7 +2922,6 @@ set_ip_classify_command_fn (vlib_main_t * vm,
* Example of how to assign a classification table to an interface:
* @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ip_classify_command, static) =
{
.path = "set ip classify",
@@ -2966,7 +2929,6 @@ VLIB_CLI_COMMAND (set_ip_classify_command, static) =
"set ip classify intfc <interface> table-index <classify-idx>",
.function = set_ip_classify_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip4_inlines.h b/src/vnet/ip/ip4_inlines.h
index ca7327fbcdc..b4fcebc9896 100644
--- a/src/vnet/ip/ip4_inlines.h
+++ b/src/vnet/ip/ip4_inlines.h
@@ -43,6 +43,7 @@
#include <vnet/ip/ip_flow_hash.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/tcp/tcp_packet.h>
+#include <vnet/udp/udp_packet.h>
#define IP_DF 0x4000 /* don't fragment */
@@ -53,9 +54,11 @@ ip4_compute_flow_hash (const ip4_header_t * ip,
flow_hash_config_t flow_hash_config)
{
tcp_header_t *tcp = (void *) (ip + 1);
+ udp_header_t *udp = (void *) (ip + 1);
+ gtpv1u_header_t *gtpu = (void *) (udp + 1);
u32 a, b, c, t1, t2;
- uword is_tcp_udp = (ip->protocol == IP_PROTOCOL_TCP
- || ip->protocol == IP_PROTOCOL_UDP);
+ uword is_udp = ip->protocol == IP_PROTOCOL_UDP;
+ uword is_tcp_udp = (ip->protocol == IP_PROTOCOL_TCP || is_udp);
t1 = (flow_hash_config & IP_FLOW_HASH_SRC_ADDR)
? ip->src_address.data_u32 : 0;
@@ -90,6 +93,13 @@ ip4_compute_flow_hash (const ip4_header_t * ip,
b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? ip->protocol : 0;
c = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ?
(t1 << 16) | t2 : (t2 << 16) | t1;
+ if (PREDICT_TRUE (is_udp) &&
+ PREDICT_FALSE ((flow_hash_config & IP_FLOW_HASH_GTPV1_TEID) &&
+ udp->dst_port == GTPV1_PORT_BE))
+ {
+ t1 = gtpu->teid;
+ c ^= t1;
+ }
a ^= ip_flow_hash_router_id;
hash_v3_mix32 (a, b, c);
diff --git a/src/vnet/ip/ip4_input.c b/src/vnet/ip/ip4_input.c
index 436e52ff12c..106d17da3cb 100644
--- a/src/vnet/ip/ip4_input.c
+++ b/src/vnet/ip/ip4_input.c
@@ -374,7 +374,6 @@ VLIB_NODE_FN (ip4_input_no_checksum_node) (vlib_main_t * vm,
return ip4_input_inline (vm, node, frame, /* verify_checksum */ 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_input_node) = {
.name = "ip4-input",
.vector_size = sizeof (u32),
@@ -405,7 +404,6 @@ VLIB_REGISTER_NODE (ip4_input_no_checksum_node) = {
.format_buffer = format_ip4_header,
.format_trace = format_ip4_input_trace,
};
-/* *INDENT-ON* */
static clib_error_t *
ip4_init (vlib_main_t * vm)
diff --git a/src/vnet/ip/ip4_input.h b/src/vnet/ip/ip4_input.h
index 57aef0bf77a..d2ed13fa35f 100644
--- a/src/vnet/ip/ip4_input.h
+++ b/src/vnet/ip/ip4_input.h
@@ -60,15 +60,17 @@ check_ver_opt_csum (ip4_header_t * ip, u8 * error, int verify_checksum)
{
if (PREDICT_FALSE (ip->ip_version_and_header_length != 0x45))
{
- if ((ip->ip_version_and_header_length & 0xf) != 5)
+ if ((ip->ip_version_and_header_length & 0xf0) != 0x40)
+ *error = IP4_ERROR_VERSION;
+ else if ((ip->ip_version_and_header_length & 0x0f) < 5)
+ *error = IP4_ERROR_HDR_TOO_SHORT;
+ else
{
*error = IP4_ERROR_OPTIONS;
if (verify_checksum &&
clib_ip_csum ((u8 *) ip, ip4_header_bytes (ip)) != 0)
*error = IP4_ERROR_BAD_CHECKSUM;
}
- else
- *error = IP4_ERROR_VERSION;
}
else if (PREDICT_FALSE (verify_checksum &&
clib_ip_csum ((u8 *) ip, sizeof (ip4_header_t)) !=
diff --git a/src/vnet/ip/ip4_options.c b/src/vnet/ip/ip4_options.c
index 6ef6b6030cc..bbe311ffb20 100644
--- a/src/vnet/ip/ip4_options.c
+++ b/src/vnet/ip/ip4_options.c
@@ -127,7 +127,6 @@ format_ip4_options_trace (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_options_node) = {
.name = "ip4-options",
.vector_size = sizeof (u32),
@@ -140,7 +139,6 @@ VLIB_REGISTER_NODE (ip4_options_node) = {
.format_buffer = format_ip4_header,
.format_trace = format_ip4_options_trace,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip4_packet.h b/src/vnet/ip/ip4_packet.h
index 2673558e19e..269049194e6 100644
--- a/src/vnet/ip/ip4_packet.h
+++ b/src/vnet/ip/ip4_packet.h
@@ -129,19 +129,15 @@ typedef union
/* For checksumming we'll want to access IP header in word sized chunks. */
/* For 64 bit machines. */
- /* *INDENT-OFF* */
CLIB_PACKED (struct {
u64 checksum_data_64[2];
u32 checksum_data_64_32[1];
});
- /* *INDENT-ON* */
/* For 32 bit machines. */
- /* *INDENT-OFF* */
CLIB_PACKED (struct {
u32 checksum_data_32[5];
});
- /* *INDENT-ON* */
} ip4_header_t;
/* Value of ip_version_and_header_length for packets w/o options. */
@@ -200,9 +196,7 @@ ip4_next_header (ip4_header_t * i)
/* Turn off array bounds check due to ip4_header_t
option field operations. */
-/* *INDENT-OFF* */
WARN_OFF(array-bounds)
-/* *INDENT-ON* */
static_always_inline u16
ip4_header_checksum_inline (ip4_header_t * i, int with_checksum)
@@ -305,9 +299,7 @@ ip4_header_checksum_inline (ip4_header_t * i, int with_checksum)
return ~((u16) sum);
}
-/* *INDENT-OFF* */
WARN_ON(array-bounds)
-/* *INDENT-ON* */
always_inline u16
ip4_header_checksum (ip4_header_t * i)
diff --git a/src/vnet/ip/ip4_punt_drop.c b/src/vnet/ip/ip4_punt_drop.c
index f2985a244aa..b8cc3304437 100644
--- a/src/vnet/ip/ip4_punt_drop.c
+++ b/src/vnet/ip/ip4_punt_drop.c
@@ -18,7 +18,6 @@
#include <vnet/policer/policer.h>
#include <vnet/policer/police_inlines.h>
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (ip4_punt) =
{
.arc_name = "ip4-punt",
@@ -30,7 +29,6 @@ VNET_FEATURE_ARC_INIT (ip4_drop) =
.arc_name = "ip4-drop",
.start_nodes = VNET_FEATURES ("ip4-drop", "ip4-not-enabled"),
};
-/* *INDENT-ON* */
extern ip_punt_policer_t ip4_punt_policer_cfg;
@@ -89,7 +87,6 @@ VLIB_NODE_FN (ip4_punt_policer_node) (vlib_main_t * vm,
ip4_punt_policer_cfg.policer_index));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_punt_policer_node) = {
.name = "ip4-punt-policer",
.vector_size = sizeof (u32),
@@ -109,7 +106,6 @@ VNET_FEATURE_INIT (ip4_punt_policer_node) = {
.node_name = "ip4-punt-policer",
.runs_before = VNET_FEATURES("ip4-punt-redirect"),
};
-/* *INDENT-ON* */
#define foreach_ip4_punt_redirect_error \
@@ -138,7 +134,6 @@ VLIB_NODE_FN (ip4_punt_redirect_node) (vlib_main_t * vm,
FIB_PROTOCOL_IP4));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_punt_redirect_node) = {
.name = "ip4-punt-redirect",
.vector_size = sizeof (u32),
@@ -160,7 +155,6 @@ VNET_FEATURE_INIT (ip4_punt_redirect_node, static) = {
.node_name = "ip4-punt-redirect",
.runs_before = VNET_FEATURES("error-punt"),
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip4_drop_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
@@ -194,7 +188,6 @@ ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
vnet_feat_arc_ip4_punt.feature_arc_index);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_drop_node) =
{
.name = "ip4-drop",
@@ -237,7 +230,6 @@ VNET_FEATURE_INIT (ip4_drop_end_of_arc, static) = {
.node_name = "error-drop",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON */
#ifndef CLIB_MARCH_VARIANT
void
@@ -301,14 +293,12 @@ done:
* @cliexpar
* @cliexcmd{set ip punt policer <INDEX>}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip4_punt_policer_command, static) =
{
.path = "ip punt policer",
.function = ip4_punt_police_cmd,
.short_help = "ip punt policer [add|del] <index>",
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
@@ -404,14 +394,12 @@ done:
* @cliexpar
* @cliexcmd{set ip punt policer}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip4_punt_redirect_command, static) =
{
.path = "ip punt redirect",
.function = ip4_punt_redirect_cmd,
.short_help = "ip punt redirect [add|del] rx [<interface>|all] via [<nh>] <tx_interface>",
};
-/* *INDENT-ON* */
static clib_error_t *
ip4_punt_redirect_show_cmd (vlib_main_t * vm,
@@ -428,7 +416,6 @@ ip4_punt_redirect_show_cmd (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{set ip punt redierect}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip4_punt_redirect_command, static) =
{
.path = "show ip punt redirect",
@@ -436,7 +423,6 @@ VLIB_CLI_COMMAND (show_ip4_punt_redirect_command, static) =
.short_help = "show ip punt redirect",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip4_source_and_port_range_check.c b/src/vnet/ip/ip4_source_and_port_range_check.c
index 2edbeeddf10..27b2d549ea7 100644
--- a/src/vnet/ip/ip4_source_and_port_range_check.c
+++ b/src/vnet/ip/ip4_source_and_port_range_check.c
@@ -563,7 +563,6 @@ ip4_source_and_port_range_check_tx (vlib_main_t * vm,
if this changes can easily make new function
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_source_port_and_range_check_rx) = {
.function = ip4_source_and_port_range_check_rx,
.name = "ip4-source-and-port-range-check-rx",
@@ -580,9 +579,7 @@ VLIB_REGISTER_NODE (ip4_source_port_and_range_check_rx) = {
.format_buffer = format_ip4_header,
.format_trace = format_ip4_source_and_port_range_check_trace,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_source_port_and_range_check_tx) = {
.function = ip4_source_and_port_range_check_tx,
.name = "ip4-source-and-port-range-check-tx",
@@ -599,7 +596,6 @@ VLIB_REGISTER_NODE (ip4_source_port_and_range_check_tx) = {
.format_buffer = format_ip4_header,
.format_trace = format_ip4_source_and_port_range_check_trace,
};
-/* *INDENT-ON* */
int
set_ip_source_and_port_range_check (vlib_main_t * vm,
@@ -797,13 +793,11 @@ set_ip_source_and_port_range_check_fn (vlib_main_t * vm,
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_source_and_port_range_check_command, static) = {
.path = "set interface ip source-and-port-range-check",
.function = set_ip_source_and_port_range_check_fn,
.short_help = "set interface ip source-and-port-range-check <interface> [tcp-out-vrf <table-id>] [udp-out-vrf <table-id>] [tcp-in-vrf <table-id>] [udp-in-vrf <table-id>] [del]",
};
-/* *INDENT-ON* */
static u8 *
format_ppr_dpo (u8 * s, va_list * args)
@@ -1264,14 +1258,12 @@ ip_source_and_port_range_check_command_fn (vlib_main_t * vm,
* Example of how to delete an IPv4 subnet and range of ports from an IPv4 FIB table:
* @cliexcmd{set ip source-and-port-range-check vrf 7 172.16.1.0/24 range 23 - 100 del}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip_source_and_port_range_check_command, static) = {
.path = "set ip source-and-port-range-check",
.function = ip_source_and_port_range_check_command_fn,
.short_help =
"set ip source-and-port-range-check vrf <table-id> <ip-addr>/<mask> {port nn | range <nn> - <nn>} [del]",
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -1390,14 +1382,12 @@ show_source_and_port_range_check_fn (vlib_main_t * vm,
* 172.16.2.2 port 250 FAIL
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_source_and_port_range_check, static) = {
.path = "show ip source-and-port-range-check",
.function = show_source_and_port_range_check_fn,
.short_help =
"show ip source-and-port-range-check vrf <table-id> <ip-addr> [port <n>]",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip4_to_ip6.h b/src/vnet/ip/ip4_to_ip6.h
index a6d87f1f962..57c2b6ff78b 100644
--- a/src/vnet/ip/ip4_to_ip6.h
+++ b/src/vnet/ip/ip4_to_ip6.h
@@ -28,14 +28,12 @@
typedef int (*ip4_to_ip6_set_fn_t) (vlib_buffer_t * b, ip4_header_t * ip4,
ip6_header_t * ip6, void *ctx);
-/* *INDENT-OFF* */
static u8 icmp_to_icmp6_updater_pointer_table[] =
{ 0, 1, 4, 4, ~0,
~0, ~0, ~0, 7, 6,
~0, ~0, 8, 8, 8,
8, 24, 24, 24, 24
};
-/* *INDENT-ON* */
#define frag_id_4to6(id) (id)
diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h
index f33780f1a98..56eec523d5b 100644
--- a/src/vnet/ip/ip6.h
+++ b/src/vnet/ip/ip6.h
@@ -238,7 +238,6 @@ ip6_interface_address_matching_destination (ip6_main_t * im,
ip_interface_address_t *ia;
ip6_address_t *result = 0;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm, ia, sw_if_index,
1 /* honor unnumbered */,
({
@@ -249,7 +248,6 @@ ip6_interface_address_matching_destination (ip6_main_t * im,
break;
}
}));
- /* *INDENT-ON* */
if (result_ia)
*result_ia = result ? ia : 0;
return result;
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index 84ea5a068aa..48fb633fd32 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -71,7 +71,6 @@ ip6_add_interface_prefix_routes (ip6_main_t * im,
ip_lookup_main_t *lm = &im->lookup_main;
ip_interface_prefix_t *if_prefix;
- /* *INDENT-OFF* */
ip_interface_prefix_key_t key = {
.prefix = {
.fp_len = address_length,
@@ -85,7 +84,6 @@ ip6_add_interface_prefix_routes (ip6_main_t * im,
},
.sw_if_index = sw_if_index,
};
- /* *INDENT-ON* */
/* If prefix already set on interface, just increment ref count & return */
if_prefix = ip_get_interface_prefix (lm, &key);
@@ -178,7 +176,6 @@ ip6_del_interface_prefix_routes (ip6_main_t * im,
ip_lookup_main_t *lm = &im->lookup_main;
ip_interface_prefix_t *if_prefix;
- /* *INDENT-OFF* */
ip_interface_prefix_key_t key = {
.prefix = {
.fp_len = address_length,
@@ -192,13 +189,12 @@ ip6_del_interface_prefix_routes (ip6_main_t * im,
},
.sw_if_index = sw_if_index,
};
- /* *INDENT-ON* */
if_prefix = ip_get_interface_prefix (lm, &key);
if (!if_prefix)
{
clib_warning ("Prefix not found while deleting %U",
- format_ip4_address_and_length, address, address_length);
+ format_ip6_address_and_length, address, address_length);
return;
}
@@ -283,7 +279,6 @@ ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index)
ip_interface_address_t *ia = 0;
ip6_address_t *result = 0;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm, ia, sw_if_index,
1 /* honor unnumbered */,
({
@@ -291,7 +286,6 @@ ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index)
result = a;
break;
}));
- /* *INDENT-ON* */
return result;
}
@@ -359,7 +353,6 @@ ip6_add_del_interface_address (vlib_main_t * vm,
vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
vec_add1 (addr_fib, ip6_af);
- /* *INDENT-OFF* */
if (!is_del)
{
/* When adding an address check that it does not conflict
@@ -417,7 +410,6 @@ ip6_add_del_interface_address (vlib_main_t * vm,
}
}
}
- /* *INDENT-ON* */
if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
@@ -537,7 +529,6 @@ ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
0 /* honor unnumbered */,
({
@@ -550,7 +541,6 @@ ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
ip6_del_interface_routes (sw_if_index, im, fib_index,
a, ia->address_length);
}));
- /* *INDENT-ON* */
return 0;
}
@@ -558,7 +548,6 @@ ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down);
/* Built-in ip6 unicast rx feature path definition */
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (ip6_unicast, static) =
{
.arc_name = "ip6-unicast",
@@ -683,7 +672,6 @@ VNET_FEATURE_INIT (ip6_interface_output, static) = {
.node_name = "interface-output",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
@@ -709,13 +697,11 @@ ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
vlib_main_t *vm = vlib_get_main ();
vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm6, ia, sw_if_index, 0,
({
address = ip_interface_address_get_address (lm6, ia);
ip6_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
}));
- /* *INDENT-ON* */
ip6_mfib_interface_enable_disable (sw_if_index, 0);
if (0 != im6->fib_index_by_sw_if_index[sw_if_index])
@@ -748,7 +734,6 @@ VLIB_NODE_FN (ip6_lookup_node) (vlib_main_t * vm,
static u8 *format_ip6_lookup_trace (u8 * s, va_list * args);
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_lookup_node) =
{
.name = "ip6-lookup",
@@ -757,7 +742,6 @@ VLIB_REGISTER_NODE (ip6_lookup_node) =
.n_next_nodes = IP6_LOOKUP_N_NEXT,
.next_nodes = IP6_LOOKUP_NEXT_NODES,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip6_load_balance_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -935,7 +919,6 @@ VLIB_NODE_FN (ip6_load_balance_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_load_balance_node) =
{
.name = "ip6-load-balance",
@@ -943,7 +926,6 @@ VLIB_REGISTER_NODE (ip6_load_balance_node) =
.sibling_of = "ip6-lookup",
.format_trace = format_ip6_lookup_trace,
};
-/* *INDENT-ON* */
typedef struct
{
@@ -966,8 +948,7 @@ format_ip6_forward_next_trace (u8 * s, va_list * args)
ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
u32 indent = format_get_indent (s);
- s = format (s, "%Ufib:%d adj:%d flow:%d",
- format_white_space, indent,
+ s = format (s, "%Ufib:%d adj:%d flow:0x%08x", format_white_space, indent,
t->fib_index, t->adj_index, t->flow_hash);
s = format (s, "\n%U%U",
format_white_space, indent,
@@ -1234,12 +1215,10 @@ ip6_next_proto_is_tcp_udp (vlib_buffer_t * p0, ip6_header_t * ip0,
return 0;
}
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (ip6_local) = {
.arc_name = "ip6-local",
.start_nodes = VNET_FEATURES ("ip6-local", "ip6-receive"),
};
-/* *INDENT-ON* */
static_always_inline u8
ip6_tcp_udp_icmp_bad_length (vlib_main_t * vm, vlib_buffer_t * p0)
@@ -1991,13 +1970,6 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm,
if (is_midchain)
{
- /* before we paint on the next header, update the L4
- * checksums if required, since there's no offload on a tunnel */
- vnet_calc_checksums_inline (vm, p0, 0 /* is_ip4 */ ,
- 1 /* is_ip6 */ );
- vnet_calc_checksums_inline (vm, p1, 0 /* is_ip4 */ ,
- 1 /* is_ip6 */ );
-
/* Guess we are only writing on ipv6 header. */
vnet_rewrite_two_headers (adj0[0], adj1[0],
ip0, ip1, sizeof (ip6_header_t));
@@ -2091,9 +2063,6 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm,
if (is_midchain)
{
- vnet_calc_checksums_inline (vm, p0, 0 /* is_ip4 */ ,
- 1 /* is_ip6 */ );
-
/* Guess we are only writing on ip6 header. */
vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip6_header_t));
}
@@ -2243,7 +2212,6 @@ VLIB_NODE_FN (ip6_mcast_midchain_node) (vlib_main_t * vm,
return ip6_rewrite_inline (vm, node, frame, 0, 1, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_midchain_node) = {
.name = "ip6-midchain",
.vector_size = sizeof (u32),
@@ -2256,8 +2224,6 @@ VLIB_REGISTER_NODE (ip6_rewrite_node) =
.name = "ip6-rewrite",
.vector_size = sizeof (u32),
.format_trace = format_ip6_rewrite_trace,
- .n_errors = IP6_N_ERROR,
- .error_counters = ip6_error_counters,
.n_next_nodes = IP6_REWRITE_N_NEXT,
.next_nodes =
{
@@ -2292,7 +2258,6 @@ VLIB_REGISTER_NODE (ip6_mcast_midchain_node) =
.sibling_of = "ip6-rewrite",
};
-/* *INDENT-ON* */
/*
* Hop-by-Hop handling
@@ -2306,7 +2271,6 @@ _(PROCESSED, "pkts with ip6 hop-by-hop options") \
_(FORMAT, "incorrectly formatted hop-by-hop options") \
_(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options")
-/* *INDENT-OFF* */
typedef enum
{
#define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
@@ -2314,7 +2278,6 @@ typedef enum
#undef _
IP6_HOP_BY_HOP_N_ERROR,
} ip6_hop_by_hop_error_t;
-/* *INDENT-ON* */
/*
* Primary h-b-h handler trace support
@@ -2741,7 +2704,6 @@ VLIB_NODE_FN (ip6_hop_by_hop_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_hop_by_hop_node) =
{
.name = "ip6-hop-by-hop",
@@ -2753,7 +2715,6 @@ VLIB_REGISTER_NODE (ip6_hop_by_hop_node) =
.error_strings = ip6_hop_by_hop_error_strings,
.n_next_nodes = 0,
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_hop_by_hop_init (vlib_main_t * vm)
@@ -3005,14 +2966,12 @@ set_ip6_flow_hash_command_fn (vlib_main_t * vm,
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) = {
.path = "set ip6 flow-hash",
.short_help = "set ip6 flow-hash table <table-id> [src] [dst] [sport] "
"[dport] [proto] [reverse] [flowlabel]",
.function = set_ip6_flow_hash_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_ip6_local_command_fn (vlib_main_t * vm,
@@ -3053,14 +3012,12 @@ show_ip6_local_command_fn (vlib_main_t * vm,
* 115
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip6_local, static) =
{
.path = "show ip6 local",
.function = show_ip6_local_command_fn,
.short_help = "show ip6 local",
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
int
@@ -3172,7 +3129,6 @@ set_ip6_classify_command_fn (vlib_main_t * vm,
* Example of how to assign a classification table to an interface:
* @cliexcmd{set ip6 classify intfc GigabitEthernet2/0/0 table-index 1}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ip6_classify_command, static) =
{
.path = "set ip6 classify",
@@ -3180,7 +3136,6 @@ VLIB_CLI_COMMAND (set_ip6_classify_command, static) =
"set ip6 classify intfc <interface> table-index <classify-idx>",
.function = set_ip6_classify_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip6_hop_by_hop.c b/src/vnet/ip/ip6_hop_by_hop.c
index e66084c2c4d..412741abcf8 100644
--- a/src/vnet/ip/ip6_hop_by_hop.c
+++ b/src/vnet/ip/ip6_hop_by_hop.c
@@ -438,8 +438,7 @@ VLIB_NODE_FN (ip6_add_hop_by_hop_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = /* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) =
{
.name = "ip6-add-hop-by-hop",
.vector_size = sizeof (u32),
@@ -455,7 +454,6 @@ VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = /* *INDENT-OFF* */
#undef _
},
};
-/* *INDENT-ON* */
/* The main h-b-h tracer was already invoked, no need to do much here */
typedef struct
@@ -778,7 +776,6 @@ VLIB_NODE_FN (ip6_pop_hop_by_hop_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) =
{
.name = "ip6-pop-hop-by-hop",
@@ -791,7 +788,6 @@ VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) =
/* See ip/lookup.h */
.n_next_nodes = 0,
};
-/* *INDENT-ON* */
typedef struct
{
@@ -1006,7 +1002,6 @@ VLIB_NODE_FN (ip6_local_hop_by_hop_node) (vlib_main_t * vm,
}
#ifndef CLIB_MARCH_VARIANT
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_local_hop_by_hop_node) =
{
.name = "ip6-local-hop-by-hop",
@@ -1025,7 +1020,6 @@ VLIB_REGISTER_NODE (ip6_local_hop_by_hop_node) =
[IP6_LOCAL_HOP_BY_HOP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
clib_error_t *
show_ip6_hbh_command_fn (vlib_main_t * vm,
@@ -1059,13 +1053,11 @@ show_ip6_hbh_command_fn (vlib_main_t * vm,
* Display ip6 local hop-by-hop next protocol handler nodes
* @cliexcmd{show ip6 hbh}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip6_hbh, static) = {
.path = "show ip6 hbh",
.short_help = "show ip6 hbh",
.function = show_ip6_hbh_command_fn,
};
-/* *INDENT-ON* */
#endif /* CLIB_MARCH_VARIANT */
@@ -1105,12 +1097,10 @@ ip6_hop_by_hop_ioam_init (vlib_main_t * vm)
return (0);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip6_hop_by_hop_ioam_init) =
{
.runs_after = VLIB_INITS("ip_main_init", "ip6_lookup_init"),
};
-/* *INDENT-ON* */
void
ip6_local_hop_by_hop_register_protocol (u32 protocol, u32 node_index)
@@ -1264,13 +1254,11 @@ clear_ioam_rewrite_command_fn (vlib_main_t * vm,
* Example of how to clear iOAM features:
* @cliexcmd{clear ioam rewrite}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_clear_ioam_rewrite_cmd, static) = {
.path = "clear ioam rewrite",
.short_help = "clear ioam rewrite",
.function = clear_ioam_rewrite_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
ip6_ioam_enable (int has_trace_option, int has_pot_option,
@@ -1371,13 +1359,11 @@ ip6_set_ioam_rewrite_command_fn (vlib_main_t * vm,
* Example of how to enable trace and pot with ppc set to encap:
* @cliexcmd{set ioam rewrite trace pot ppc encap}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_set_ioam_rewrite_cmd, static) = {
.path = "set ioam rewrite",
.short_help = "set ioam [trace] [pot] [seqno] [analyse]",
.function = ip6_set_ioam_rewrite_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_show_ioam_summary_cmd_fn (vlib_main_t * vm,
@@ -1455,13 +1441,11 @@ ip6_show_ioam_summary_cmd_fn (vlib_main_t * vm,
* EDGE TO EDGE - PPC OPTION - 1 (Encap)
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_show_ioam_run_cmd, static) = {
.path = "show ioam summary",
.short_help = "show ioam summary",
.function = ip6_show_ioam_summary_cmd_fn,
};
-/* *INDENT-ON* */
void
vnet_register_ioam_end_of_path_callback (void *cb)
diff --git a/src/vnet/ip/ip6_inlines.h b/src/vnet/ip/ip6_inlines.h
index 9c2be60b267..9bd475224eb 100644
--- a/src/vnet/ip/ip6_inlines.h
+++ b/src/vnet/ip/ip6_inlines.h
@@ -49,29 +49,40 @@ always_inline u32
ip6_compute_flow_hash (const ip6_header_t * ip,
flow_hash_config_t flow_hash_config)
{
- tcp_header_t *tcp;
+ const tcp_header_t *tcp;
+ const udp_header_t *udp = (void *) (ip + 1);
+ const gtpv1u_header_t *gtpu = (void *) (udp + 1);
u64 a, b, c;
u64 t1, t2;
+ u32 t3;
uword is_tcp_udp = 0;
u8 protocol = ip->protocol;
+ uword is_udp = protocol == IP_PROTOCOL_UDP;
- if (PREDICT_TRUE
- ((ip->protocol == IP_PROTOCOL_TCP)
- || (ip->protocol == IP_PROTOCOL_UDP)))
+ if (PREDICT_TRUE ((protocol == IP_PROTOCOL_TCP) || is_udp))
{
is_tcp_udp = 1;
tcp = (void *) (ip + 1);
}
- else if (ip->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ else
{
- ip6_hop_by_hop_header_t *hbh = (ip6_hop_by_hop_header_t *) (ip + 1);
- if ((hbh->protocol == IP_PROTOCOL_TCP) ||
- (hbh->protocol == IP_PROTOCOL_UDP))
+ const void *cur = ip + 1;
+ if (protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ {
+ const ip6_hop_by_hop_header_t *hbh = cur;
+ protocol = hbh->protocol;
+ cur += (hbh->length + 1) * 8;
+ }
+ if (protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ {
+ const ip6_fragment_ext_header_t *frag = cur;
+ protocol = frag->protocol;
+ }
+ else if (protocol == IP_PROTOCOL_TCP || protocol == IP_PROTOCOL_UDP)
{
is_tcp_udp = 1;
- tcp = (tcp_header_t *) ((u8 *) hbh + ((hbh->length + 1) << 3));
+ tcp = cur;
}
- protocol = hbh->protocol;
}
t1 = (ip->src_address.as_u64[0] ^ ip->src_address.as_u64[1]);
@@ -113,7 +124,13 @@ ip6_compute_flow_hash (const ip6_header_t * ip,
((flow_hash_config & IP_FLOW_HASH_FL) ? ip6_flow_label_network_order (ip) :
0);
c ^= t1;
-
+ if (PREDICT_TRUE (is_udp) &&
+ PREDICT_FALSE ((flow_hash_config & IP_FLOW_HASH_GTPV1_TEID) &&
+ udp->dst_port == GTPV1_PORT_BE))
+ {
+ t3 = gtpu->teid;
+ a ^= t3;
+ }
hash_mix64 (a, b, c);
return (u32) c;
}
diff --git a/src/vnet/ip/ip6_input.c b/src/vnet/ip/ip6_input.c
index 8d89890f999..64c9d76ebaa 100644
--- a/src/vnet/ip/ip6_input.c
+++ b/src/vnet/ip/ip6_input.c
@@ -219,7 +219,6 @@ VLIB_NODE_FN (ip6_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_input_node) = {
.name = "ip6-input",
.vector_size = sizeof (u32),
@@ -238,7 +237,6 @@ VLIB_REGISTER_NODE (ip6_input_node) = {
.format_buffer = format_ip6_header,
.format_trace = format_ip6_input_trace,
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_init (vlib_main_t * vm)
diff --git a/src/vnet/ip/ip6_link.c b/src/vnet/ip/ip6_link.c
index afa9d8e3ea9..c2a7ccacbc1 100644
--- a/src/vnet/ip/ip6_link.c
+++ b/src/vnet/ip/ip6_link.c
@@ -242,12 +242,10 @@ ip6_link_delegate_flush (ip6_link_t * il)
{
ip6_link_delegate_t *ild;
- /* *INDENT-OFF* */
FOREACH_IP6_LINK_DELEGATE (ild, il,
({
il_delegate_vfts[ild->ild_type].ildv_disable(ild->ild_index);
}));
- /* *INDENT-ON* */
vec_free (il->il_delegates);
il->il_delegates = NULL;
@@ -357,14 +355,12 @@ ip6_link_set_local_address (u32 sw_if_index, const ip6_address_t * address)
ip6_address_copy (&ilp.ilp_addr, address);
ip6_ll_table_entry_update (&ilp, FIB_ROUTE_PATH_LOCAL);
- /* *INDENT-OFF* */
FOREACH_IP6_LINK_DELEGATE (ild, il,
({
if (NULL != il_delegate_vfts[ild->ild_type].ildv_ll_change)
il_delegate_vfts[ild->ild_type].ildv_ll_change(ild->ild_index,
&il->il_ll_addr);
}));
- /* *INDENT-ON* */
return (0);
}
@@ -465,7 +461,6 @@ ip6_link_add_del_address (ip6_main_t * im,
if (NULL == il)
return;
- /* *INDENT-OFF* */
FOREACH_IP6_LINK_DELEGATE (ild, il,
({
if (is_delete)
@@ -481,7 +476,6 @@ ip6_link_add_del_address (ip6_main_t * im,
address, address_length);
}
}));
- /* *INDENT-ON* */
}
static clib_error_t *
@@ -555,14 +549,12 @@ test_ip6_link_command_fn (vlib_main_t * vm,
* Original MAC address: 16:d9:e0:91:79:86
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_link_command, static) =
{
.path = "test ip6 link",
.function = test_ip6_link_command_fn,
.short_help = "test ip6 link <mac-address>",
};
-/* *INDENT-ON* */
static u8 *
ip6_print_addrs (u8 * s, u32 * addrs)
@@ -594,11 +586,10 @@ format_ip6_link (u8 * s, va_list * arg)
if (!ip6_link_is_enabled_i (il))
return (s);
- s = format (s, "%U is admin %s\n",
- format_vnet_sw_interface_name, vnm,
- vnet_get_sw_interface (vnm, il->il_sw_if_index),
- (vnet_sw_interface_is_admin_up (vnm, il->il_sw_if_index) ?
- "up" : "down"));
+ s = format (
+ s, "%U is admin %s\n", format_vnet_sw_if_index_name, vnm,
+ il->il_sw_if_index,
+ (vnet_sw_interface_is_admin_up (vnm, il->il_sw_if_index) ? "up" : "down"));
u32 ai;
u32 *link_scope = 0, *global_scope = 0;
@@ -660,13 +651,11 @@ format_ip6_link (u8 * s, va_list * arg)
s = format (s, "%U%U\n",
format_white_space, 4, format_ip6_address, &il->il_ll_addr);
- /* *INDENT-OFF* */
FOREACH_IP6_LINK_DELEGATE(ild, il,
({
s = format (s, "%U", il_delegate_vfts[ild->ild_type].ildv_format,
ild->ild_index, 2);
}));
- /* *INDENT-ON* */
return (s);
}
@@ -739,14 +728,12 @@ ip6_link_show (vlib_main_t * vm,
* show ip6 interface: IPv6 not enabled on interface
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_link_show_command, static) =
{
.path = "show ip6 interface",
.function = ip6_link_show,
.short_help = "show ip6 interface <interface>",
};
-/* *INDENT-ON* */
static clib_error_t *
enable_ip6_interface_cmd (vlib_main_t * vm,
@@ -779,14 +766,12 @@ enable_ip6_interface_cmd (vlib_main_t * vm,
* Example of how enable IPv6 on a given interface:
* @cliexcmd{enable ip6 interface GigabitEthernet2/0/0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (enable_ip6_interface_command, static) =
{
.path = "enable ip6 interface",
.function = enable_ip6_interface_cmd,
.short_help = "enable ip6 interface <interface>",
};
-/* *INDENT-ON* */
static clib_error_t *
disable_ip6_interface_cmd (vlib_main_t * vm,
@@ -819,14 +804,12 @@ disable_ip6_interface_cmd (vlib_main_t * vm,
* Example of how disable IPv6 on a given interface:
* @cliexcmd{disable ip6 interface GigabitEthernet2/0/0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (disable_ip6_interface_command, static) =
{
.path = "disable ip6 interface",
.function = disable_ip6_interface_cmd,
.short_help = "disable ip6 interface <interface>",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip6_ll_table.c b/src/vnet/ip/ip6_ll_table.c
index b3f42daf26c..f9172f6c50c 100644
--- a/src/vnet/ip/ip6_ll_table.c
+++ b/src/vnet/ip/ip6_ll_table.c
@@ -52,9 +52,8 @@ ip6_ll_fib_create (u32 sw_if_index)
vnet_main_t *vnm = vnet_get_main ();
u8 *desc;
- desc = format (NULL, "IP6-link-local:%U",
- format_vnet_sw_interface_name,
- vnm, vnet_get_sw_interface (vnm, sw_if_index));
+ desc = format (NULL, "IP6-link-local:%U", format_vnet_sw_if_index_name, vnm,
+ sw_if_index);
ip6_ll_table.ilt_fibs[sw_if_index] =
ip6_fib_table_create_and_lock (FIB_SOURCE_IP6_ND,
@@ -64,7 +63,6 @@ ip6_ll_fib_create (u32 sw_if_index)
* leave the default route as a drop, but fix fe::/10 to be a glean
* via the interface.
*/
- /* *INDENT-OFF* */
fib_prefix_t pfx = {
.fp_proto = FIB_PROTOCOL_IP6,
.fp_len = 10,
@@ -90,7 +88,6 @@ ip6_ll_fib_create (u32 sw_if_index)
1,
NULL,
FIB_ROUTE_PATH_FLAG_NONE);
- /* *INDENT-ON* */
}
static void
@@ -111,8 +108,13 @@ ip6_ll_table_entry_update (const ip6_ll_prefix_t * ilp,
.frp_flags = flags,
.frp_sw_if_index = ilp->ilp_sw_if_index,
.frp_proto = DPO_PROTO_IP6,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
};
- fib_prefix_t fp;
+ fib_prefix_t fp = { 0 };
+
+ if (flags & FIB_ROUTE_PATH_LOCAL)
+ rpath.frp_addr.ip6 = ilp->ilp_addr;
vec_validate_init_empty (ip6_ll_table.ilt_fibs, ilp->ilp_sw_if_index, ~0);
@@ -345,13 +347,11 @@ ip6_ll_show_fib (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_show_fib_command, static) = {
.path = "show ip6-ll",
.short_help = "show ip6-ll [summary] [interface] [<ip6-addr>[/<width>]] [detail]",
.function = ip6_ll_show_fib,
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_ll_sw_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_add)
diff --git a/src/vnet/ip/ip6_ll_types.c b/src/vnet/ip/ip6_ll_types.c
index a7ac164b05a..b074b6e991c 100644
--- a/src/vnet/ip/ip6_ll_types.c
+++ b/src/vnet/ip/ip6_ll_types.c
@@ -23,10 +23,8 @@ format_ip6_ll_prefix (u8 * s, va_list * args)
ip6_ll_prefix_t *ilp = va_arg (*args, ip6_ll_prefix_t *);
vnet_main_t *vnm = vnet_get_main ();
- s = format (s, "(%U, %U)",
- format_ip6_address, &ilp->ilp_addr,
- format_vnet_sw_interface_name,
- vnm, vnet_get_sw_interface (vnm, ilp->ilp_sw_if_index));
+ s = format (s, "(%U, %U)", format_ip6_address, &ilp->ilp_addr,
+ format_vnet_sw_if_index_name, vnm, ilp->ilp_sw_if_index);
return (s);
}
diff --git a/src/vnet/ip/ip6_packet.h b/src/vnet/ip/ip6_packet.h
index e71604ce7d3..c506792ddcf 100644
--- a/src/vnet/ip/ip6_packet.h
+++ b/src/vnet/ip/ip6_packet.h
@@ -441,6 +441,13 @@ typedef CLIB_PACKED (struct {
}) ip6_router_alert_option_t;
typedef CLIB_PACKED (struct {
+ u8 protocol;
+ u8 reserved;
+ u16 fragoff;
+ u32 id;
+}) ip6_fragment_ext_header_t;
+
+typedef CLIB_PACKED (struct {
u8 next_hdr;
/* Length of this header plus option data in 8 byte units. */
u8 n_data_u64s;
diff --git a/src/vnet/ip/ip6_punt_drop.c b/src/vnet/ip/ip6_punt_drop.c
index 32a2ab760ff..78ca9521f53 100644
--- a/src/vnet/ip/ip6_punt_drop.c
+++ b/src/vnet/ip/ip6_punt_drop.c
@@ -18,7 +18,6 @@
#include <vnet/policer/policer.h>
#include <vnet/policer/police_inlines.h>
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (ip6_punt) =
{
.arc_name = "ip6-punt",
@@ -30,7 +29,6 @@ VNET_FEATURE_ARC_INIT (ip6_drop) =
.arc_name = "ip6-drop",
.start_nodes = VNET_FEATURES ("ip6-drop", "ip6-not-enabled"),
};
-/* *INDENT-ON* */
extern ip_punt_policer_t ip6_punt_policer_cfg;
@@ -77,7 +75,6 @@ VLIB_NODE_FN (ip6_punt_policer_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_punt_policer_node) = {
.name = "ip6-punt-policer",
@@ -99,7 +96,6 @@ VNET_FEATURE_INIT (ip6_punt_policer_node, static) = {
.node_name = "ip6-punt-policer",
.runs_before = VNET_FEATURES("ip6-punt-redirect")
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip6_drop_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
@@ -134,7 +130,6 @@ VLIB_NODE_FN (ip6_punt_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vnet_feat_arc_ip6_punt.feature_arc_index);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_drop_node) =
{
.name = "ip6-drop",
@@ -175,7 +170,6 @@ VNET_FEATURE_INIT (ip6_drop_end_of_arc, static) = {
.node_name = "error-drop",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON */
#ifndef CLIB_MARCH_VARIANT
void
@@ -239,7 +233,6 @@ done:
* @cliexpar
* @cliexcmd{set ip punt policer <INDEX>}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_punt_policer_command, static) =
{
.path = "ip6 punt policer",
@@ -247,7 +240,6 @@ VLIB_CLI_COMMAND (ip6_punt_policer_command, static) =
.short_help = "ip6 punt policer [add|del] <index>",
};
-/* *INDENT-ON* */
#define foreach_ip6_punt_redirect_error \
_(DROP, "ip6 punt redirect drop")
@@ -275,7 +267,6 @@ VLIB_NODE_FN (ip6_punt_redirect_node) (vlib_main_t * vm,
FIB_PROTOCOL_IP6));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_punt_redirect_node) = {
.name = "ip6-punt-redirect",
.vector_size = sizeof (u32),
@@ -297,7 +288,6 @@ VNET_FEATURE_INIT (ip6_punt_redirect_node, static) = {
.node_name = "ip6-punt-redirect",
.runs_before = VNET_FEATURES("error-punt")
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
@@ -393,14 +383,12 @@ done:
* @cliexpar
* @cliexcmd{set ip punt policer <INDEX>}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_punt_redirect_command, static) =
{
.path = "ip6 punt redirect",
.function = ip6_punt_redirect_cmd,
.short_help = "ip6 punt redirect [add|del] rx [<interface>|all] via [<nh>] <tx_interface>",
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
@@ -421,7 +409,6 @@ ip6_punt_redirect_show_cmd (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{set ip punt policer <INDEX>}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip6_punt_redirect_command, static) =
{
.path = "show ip6 punt redirect",
@@ -429,7 +416,6 @@ VLIB_CLI_COMMAND (show_ip6_punt_redirect_command, static) =
.short_help = "show ip6 punt redirect",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip6_to_ip4.h b/src/vnet/ip/ip6_to_ip4.h
index b1b5bdb2d11..29d5718d4da 100644
--- a/src/vnet/ip/ip6_to_ip4.h
+++ b/src/vnet/ip/ip6_to_ip4.h
@@ -31,7 +31,6 @@ typedef int (*ip6_to_ip4_tcp_udp_set_fn_t) (vlib_buffer_t * b,
ip6_header_t * ip6,
ip4_header_t * ip4, void *ctx);
-/* *INDENT-OFF* */
static u8 icmp6_to_icmp_updater_pointer_table[] =
{ 0, 1, ~0, ~0,
2, 2, 9, 8,
@@ -44,7 +43,6 @@ static u8 icmp6_to_icmp_updater_pointer_table[] =
24, 24, 24, 24,
24, 24, 24, 24
};
-/* *INDENT-ON* */
#define frag_id_6to4(id) ((id) ^ ((id) >> 16))
diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c
index e03b0103391..644b4988abc 100644
--- a/src/vnet/ip/ip_api.c
+++ b/src/vnet/ip/ip_api.c
@@ -106,7 +106,6 @@ vl_api_ip_table_dump_t_handler (vl_api_ip_table_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (fib_table, ip4_main.fibs)
{
send_ip_table_details(am, reg, mp->context, fib_table);
@@ -118,7 +117,6 @@ vl_api_ip_table_dump_t_handler (vl_api_ip_table_dump_t * mp)
continue;
send_ip_table_details(am, reg, mp->context, fib_table);
}
- /* *INDENT-ON* */
}
typedef struct vl_api_ip_fib_dump_walk_ctx_t_
@@ -326,7 +324,6 @@ vl_api_ip_mtable_dump_t_handler (vl_api_ip_mtable_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (mfib_table, ip4_main.mfibs)
{
send_ip_mtable_details (reg, mp->context, mfib_table);
@@ -335,7 +332,6 @@ vl_api_ip_mtable_dump_t_handler (vl_api_ip_mtable_dump_t * mp)
{
send_ip_mtable_details (reg, mp->context, mfib_table);
}
- /* *INDENT-ON* */
}
typedef struct vl_api_ip_mfib_dump_ctx_t_
@@ -782,12 +778,10 @@ vl_api_ip_route_add_del_t_handler (vl_api_ip_route_add_del_t * mp)
rv = ip_route_add_del_t_handler (mp, &stats_index);
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IP_ROUTE_ADD_DEL_REPLY,
({
rmp->stats_index = htonl (stats_index);
}))
- /* *INDENT-ON* */
}
void
@@ -839,7 +833,6 @@ vl_api_ip_route_lookup_t_handler (vl_api_ip_route_lookup_t * mp)
}
}
- /* *INDENT-OFF* */
REPLY_MACRO3_ZERO(VL_API_IP_ROUTE_LOOKUP_REPLY,
npaths * sizeof (*fp),
({
@@ -859,7 +852,6 @@ vl_api_ip_route_lookup_t_handler (vl_api_ip_route_lookup_t * mp)
}
}
}));
- /* *INDENT-ON* */
vec_free (rpaths);
}
@@ -1049,12 +1041,10 @@ vl_api_ip_mroute_add_del_t_handler (vl_api_ip_mroute_add_del_t * mp)
rv = api_mroute_add_del_t_handler (mp, &stats_index);
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IP_MROUTE_ADD_DEL_REPLY,
({
rmp->stats_index = htonl (stats_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -1117,7 +1107,6 @@ vl_api_ip_address_dump_t_handler (vl_api_ip_address_dump_t * mp)
if (mp->is_ipv6)
{
- /* *INDENT-OFF* */
/* Do not send subnet details of the IP-interface for
* unnumbered interfaces. otherwise listening clients
* will be confused that the subnet is applied on more
@@ -1131,11 +1120,9 @@ vl_api_ip_address_dump_t_handler (vl_api_ip_address_dump_t * mp)
};
send_ip_address_details(am, reg, &pfx, sw_if_index, mp->context);
}));
- /* *INDENT-ON* */
}
else
{
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
({
fib_prefix_t pfx = {
@@ -1146,7 +1133,6 @@ vl_api_ip_address_dump_t_handler (vl_api_ip_address_dump_t * mp)
send_ip_address_details(am, reg, &pfx, sw_if_index, mp->context);
}));
- /* *INDENT-ON* */
}
BAD_SW_IF_INDEX_LABEL;
@@ -1203,7 +1189,6 @@ vl_api_ip_unnumbered_dump_t_handler (vl_api_ip_unnumbered_dump_t * mp)
}
else
{
- /* *INDENT-OFF* */
pool_foreach (si, im->sw_interfaces)
{
if ((si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED))
@@ -1214,7 +1199,6 @@ vl_api_ip_unnumbered_dump_t_handler (vl_api_ip_unnumbered_dump_t * mp)
mp->context);
}
}
- /* *INDENT-ON* */
}
BAD_SW_IF_INDEX_LABEL;
@@ -1238,12 +1222,10 @@ vl_api_ip_dump_t_handler (vl_api_ip_dump_t * mp)
/* Gather interfaces. */
sorted_sis = vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces));
vec_set_len (sorted_sis, 0);
- /* *INDENT-OFF* */
pool_foreach (si, im->sw_interfaces)
{
vec_add1 (sorted_sis, si[0]);
}
- /* *INDENT-ON* */
vec_foreach (si, sorted_sis)
{
@@ -1298,6 +1280,22 @@ vl_api_set_ip_flow_hash_v2_t_handler (vl_api_set_ip_flow_hash_v2_t *mp)
}
static void
+vl_api_set_ip_flow_hash_v3_t_handler (vl_api_set_ip_flow_hash_v3_t *mp)
+{
+ vl_api_set_ip_flow_hash_v3_reply_t *rmp;
+ ip_address_family_t af;
+ int rv;
+
+ rv = ip_address_family_decode (mp->af, &af);
+
+ if (!rv)
+ rv = ip_flow_hash_set (af, htonl (mp->table_id),
+ htonl (mp->flow_hash_config));
+
+ REPLY_MACRO (VL_API_SET_IP_FLOW_HASH_V3_REPLY);
+}
+
+static void
vl_api_set_ip_flow_hash_router_id_t_handler (
vl_api_set_ip_flow_hash_router_id_t *mp)
{
@@ -1707,7 +1705,6 @@ vl_api_ip_table_flush_t_handler (vl_api_ip_table_flush_t * mp)
vnet_sw_interface_t *si;
/* Shut down interfaces in this FIB / clean out intfc routes */
- /* *INDENT-OFF* */
pool_foreach (si, im->sw_interfaces)
{
if (fib_index == fib_table_get_index_for_sw_if_index (fproto,
@@ -1718,7 +1715,6 @@ vl_api_ip_table_flush_t_handler (vl_api_ip_table_flush_t * mp)
vnet_sw_interface_set_flags (vnm, si->sw_if_index, flags);
}
}
- /* *INDENT-ON* */
fib_table_flush (fib_index, fproto, FIB_SOURCE_API);
mfib_table_flush (mfib_table_find (fproto, ntohl (mp->table.table_id)),
@@ -2117,17 +2113,21 @@ ip_api_hookup (vlib_main_t * vm)
api_main_t *am = vlibapi_get_main ();
/*
- * Mark the route add/del API as MP safe
+ * Set up the (msg_name, crc, message-id) table
*/
- vl_api_set_msg_thread_safe (am, VL_API_IP_ROUTE_ADD_DEL, 1);
- vl_api_set_msg_thread_safe (am, VL_API_IP_ROUTE_ADD_DEL_REPLY, 1);
- vl_api_set_msg_thread_safe (am, VL_API_IP_ROUTE_ADD_DEL_V2, 1);
- vl_api_set_msg_thread_safe (am, VL_API_IP_ROUTE_ADD_DEL_V2_REPLY, 1);
+ REPLY_MSG_ID_BASE = setup_message_id_table ();
/*
- * Set up the (msg_name, crc, message-id) table
+ * Mark the route add/del API as MP safe
*/
- REPLY_MSG_ID_BASE = setup_message_id_table ();
+ vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL,
+ 1);
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL_REPLY, 1);
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL_V2, 1);
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL_V2_REPLY, 1);
return 0;
}
diff --git a/src/vnet/ip/ip_checksum.c b/src/vnet/ip/ip_checksum.c
index 1ac7248ea05..4fbf1fb74fa 100644
--- a/src/vnet/ip/ip_checksum.c
+++ b/src/vnet/ip/ip_checksum.c
@@ -165,14 +165,12 @@ test_ip_checksum_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_checksum, static) =
{
.path = "test ip checksum",
.short_help = "test ip checksum",
.function = test_ip_checksum_fn,
};
-/* *INDENT-ON* */
#endif /* CLIB_DEBUG */
diff --git a/src/vnet/ip/ip_container_proxy.c b/src/vnet/ip/ip_container_proxy.c
index 18d07ba6082..1618704e804 100644
--- a/src/vnet/ip/ip_container_proxy.c
+++ b/src/vnet/ip/ip_container_proxy.c
@@ -138,7 +138,6 @@ ip_container_proxy_walk (ip_container_proxy_cb_t cb, void *ctx)
};
u32 fib_index;
- /* *INDENT-OFF* */
pool_foreach_index (fib_index, ip4_main.fibs)
{
fib_table_walk (fib_index, FIB_PROTOCOL_IP4,
@@ -149,7 +148,6 @@ ip_container_proxy_walk (ip_container_proxy_cb_t cb, void *ctx)
fib_table_walk (fib_index, FIB_PROTOCOL_IP6,
ip_container_proxy_fib_table_walk, &wctx);
}
- /* *INDENT-ON* */
}
clib_error_t *
@@ -216,14 +214,12 @@ ip_container_cmd (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip_container_command_node, static) = {
.path = "ip container",
.function = ip_container_cmd,
.short_help = "ip container <address> <interface>",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
clib_error_t *
show_ip_container_cmd_fn (vlib_main_t * vm, unformat_input_t * main_input,
@@ -275,14 +271,12 @@ show_ip_container_cmd_fn (vlib_main_t * vm, unformat_input_t * main_input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip_container_command, static) = {
.path = "show ip container",
.function = show_ip_container_cmd_fn,
.short_help = "show ip container <address> <interface>",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip_flow_hash.h b/src/vnet/ip/ip_flow_hash.h
index bd37ef7307b..30dfcd70a1b 100644
--- a/src/vnet/ip/ip_flow_hash.h
+++ b/src/vnet/ip/ip_flow_hash.h
@@ -38,7 +38,17 @@
_ (proto, 4, IP_FLOW_HASH_PROTO) \
_ (reverse, 5, IP_FLOW_HASH_REVERSE_SRC_DST) \
_ (symmetric, 6, IP_FLOW_HASH_SYMMETRIC) \
- _ (flowlabel, 7, IP_FLOW_HASH_FL)
+ _ (flowlabel, 7, IP_FLOW_HASH_FL) \
+ _ (gtpv1teid, 8, IP_FLOW_HASH_GTPV1_TEID)
+
+typedef struct
+{
+ u8 ver_flags;
+ u8 type;
+ u16 length;
+ u32 teid;
+} __attribute__ ((packed)) gtpv1u_header_t;
+#define GTPV1_PORT_BE 0x6808
/**
* A flow hash configuration is a mask of the flow hash options
diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c
index 5e8d3682eaa..934e40a5d18 100644
--- a/src/vnet/ip/ip_frag.c
+++ b/src/vnet/ip/ip_frag.c
@@ -500,7 +500,6 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
return IP_FRAG_ERROR_NONE;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_frag_node) = {
.function = ip4_frag,
.name = IP4_FRAG_NODE_NAME,
@@ -519,9 +518,7 @@ VLIB_REGISTER_NODE (ip4_frag_node) = {
[IP_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
[IP_FRAG_NEXT_DROP] = "ip4-drop" },
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_frag_node) = {
.function = ip6_frag,
.name = IP6_FRAG_NODE_NAME,
@@ -540,7 +537,6 @@ VLIB_REGISTER_NODE (ip6_frag_node) = {
[IP_FRAG_NEXT_ICMP_ERROR] = "error-drop",
[IP_FRAG_NEXT_DROP] = "ip6-drop" },
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip_in_out_acl.c b/src/vnet/ip/ip_in_out_acl.c
index d8d6d768e93..eb3c94a188a 100644
--- a/src/vnet/ip/ip_in_out_acl.c
+++ b/src/vnet/ip/ip_in_out_acl.c
@@ -327,8 +327,9 @@ ip_in_out_acl_inline_trace (
{
while (1)
{
- if (PREDICT_TRUE (t[0]->next_table_index != ~0))
- t[0] = pool_elt_at_index (tables, t[0]->next_table_index);
+ table_index[0] = t[0]->next_table_index;
+ if (PREDICT_TRUE (table_index[0] != ~0))
+ t[0] = pool_elt_at_index (tables, table_index[0]);
else
{
_next[0] = (t[0]->miss_next_index < n_next_nodes) ?
@@ -434,8 +435,9 @@ ip_in_out_acl_inline_trace (
{
while (1)
{
- if (PREDICT_TRUE (t[1]->next_table_index != ~0))
- t[1] = pool_elt_at_index (tables, t[1]->next_table_index);
+ table_index[1] = t[1]->next_table_index;
+ if (PREDICT_TRUE (table_index[1] != ~0))
+ t[1] = pool_elt_at_index (tables, table_index[1]);
else
{
_next[1] = (t[1]->miss_next_index < n_next_nodes) ?
@@ -636,8 +638,9 @@ ip_in_out_acl_inline_trace (
{
while (1)
{
- if (PREDICT_TRUE (t0->next_table_index != ~0))
- t0 = pool_elt_at_index (tables, t0->next_table_index);
+ table_index0 = t0->next_table_index;
+ if (PREDICT_TRUE (table_index0 != ~0))
+ t0 = pool_elt_at_index (tables, table_index0);
else
{
next0 = (t0->miss_next_index < n_next_nodes) ?
@@ -813,7 +816,6 @@ VLIB_NODE_FN (ip4_outacl_node)
VLIB_TX, 1 /* is_output */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_inacl_node) = {
.name = "ip4-inacl",
.vector_size = sizeof (u32),
@@ -852,7 +854,6 @@ VLIB_REGISTER_NODE (ip4_outacl_node) = {
[ACL_NEXT_INDEX_DENY] = "ip4-drop",
},
};
-/* *INDENT-ON* */
VNET_FEATURE_INIT (ip4_punt_acl_feature) = {
.arc_name = "ip4-punt",
@@ -888,7 +889,6 @@ VLIB_NODE_FN (ip6_outacl_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
VLIB_TX, 1 /* is_output */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_inacl_node) = {
.name = "ip6-inacl",
.vector_size = sizeof (u32),
@@ -927,7 +927,6 @@ VLIB_REGISTER_NODE (ip6_outacl_node) = {
[ACL_NEXT_INDEX_DENY] = "ip6-drop",
},
};
-/* *INDENT-ON* */
VNET_FEATURE_INIT (ip6_punt_acl_feature) = {
.arc_name = "ip6-punt",
diff --git a/src/vnet/ip/ip_init.c b/src/vnet/ip/ip_init.c
index 8894a878881..c2490f196ef 100644
--- a/src/vnet/ip/ip_init.c
+++ b/src/vnet/ip/ip_init.c
@@ -104,7 +104,6 @@ do { \
return error;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip_main_init) = {
.init_order = VLIB_INITS ("vnet_main_init", "ip4_init", "ip6_init",
"icmp4_init", "icmp6_init", "ip6_hop_by_hop_init",
@@ -112,7 +111,6 @@ VLIB_INIT_FUNCTION (ip_main_init) = {
"in_out_acl_init", "policer_classify_init",
"flow_classify_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip_interface.c b/src/vnet/ip/ip_interface.c
index d5ee7fd9b2b..ca1938f651a 100644
--- a/src/vnet/ip/ip_interface.c
+++ b/src/vnet/ip/ip_interface.c
@@ -145,27 +145,23 @@ ip_interface_has_address (u32 sw_if_index, ip46_address_t * ip, u8 is_ip4)
{
ip_lookup_main_t *lm4 = &ip4_main.lookup_main;
ip4_address_t *ip4;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* unnumbered */ ,
({
ip4 = ip_interface_address_get_address (lm4, ia);
if (ip4_address_compare (ip4, &ip->ip4) == 0)
return 1;
}));
- /* *INDENT-ON* */
}
else
{
ip_lookup_main_t *lm6 = &ip6_main.lookup_main;
ip6_address_t *ip6;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm6, ia, sw_if_index, 1 /* unnumbered */ ,
({
ip6 = ip_interface_address_get_address (lm6, ia);
if (ip6_address_compare (ip6, &ip->ip6) == 0)
return 1;
}));
- /* *INDENT-ON* */
}
return 0;
}
@@ -179,16 +175,13 @@ ip_interface_get_first_ip (u32 sw_if_index, u8 is_ip4)
if (is_ip4)
{
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* unnumbered */ ,
({
return ip_interface_address_get_address (lm4, ia);
}));
- /* *INDENT-ON* */
}
else
{
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm6, ia, sw_if_index, 1 /* unnumbered */ ,
({
ip6_address_t *rv;
@@ -197,7 +190,6 @@ ip_interface_get_first_ip (u32 sw_if_index, u8 is_ip4)
if (!ip6_address_is_link_local_unicast (rv))
return rv;
}));
- /* *INDENT-ON* */
}
return 0;
@@ -211,7 +203,6 @@ ip_interface_address_mark_one_interface (vnet_main_t *vnm,
ip_lookup_main_t *lm6 = &ip6_main.lookup_main;
ip_interface_address_t *ia = 0;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm4, ia, si->sw_if_index, 1 /* unnumbered */ ,
({
ia->flags |= IP_INTERFACE_ADDRESS_FLAG_STALE;
@@ -220,7 +211,6 @@ ip_interface_address_mark_one_interface (vnet_main_t *vnm,
({
ia->flags |= IP_INTERFACE_ADDRESS_FLAG_STALE;
}));
- /* *INDENT-ON* */
return (WALK_CONTINUE);
}
@@ -246,7 +236,6 @@ ip_interface_address_sweep_one_interface (vnet_main_t * vnm,
u32 *ip4_masks = 0;
int i;
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im4->lookup_main, ia, si->sw_if_index, 1,
({
if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
@@ -268,7 +257,6 @@ ip_interface_address_sweep_one_interface (vnet_main_t * vnm,
vec_add1 (ip6_masks, ia->address_length);
}
}));
- /* *INDENT-ON* */
for (i = 0; i < vec_len (ip4_addrs); i++)
ip4_add_del_interface_address (vm, si->sw_if_index, &ip4_addrs[i],
diff --git a/src/vnet/ip/ip_interface.h b/src/vnet/ip/ip_interface.h
index f0474c1bf9a..f0034ed0314 100644
--- a/src/vnet/ip/ip_interface.h
+++ b/src/vnet/ip/ip_interface.h
@@ -56,7 +56,6 @@ ip_get_interface_prefix (ip_lookup_main_t * lm, ip_interface_prefix_key_t * k)
return p ? pool_elt_at_index (lm->if_prefix_pool, p[0]) : 0;
}
-/* *INDENT-OFF* */
#define foreach_ip_interface_address(lm,a,sw_if_index,loop,body) \
do { \
vnet_main_t *_vnm = vnet_get_main(); \
@@ -90,7 +89,6 @@ do { \
body; \
} \
} while (0)
-/* *INDENT-ON* */
#endif /* included_ip_interface_h */
diff --git a/src/vnet/ip/ip_psh_cksum.h b/src/vnet/ip/ip_psh_cksum.h
index 8723749865f..a80211561b7 100644
--- a/src/vnet/ip/ip_psh_cksum.h
+++ b/src/vnet/ip/ip_psh_cksum.h
@@ -38,8 +38,7 @@ ip4_pseudo_header_cksum (ip4_header_t *ip4)
psh.proto = ip4->protocol;
psh.l4len = clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) -
sizeof (ip4_header_t));
- return ~clib_net_to_host_u16 (
- clib_ip_csum ((u8 *) &psh, sizeof (ip4_psh_t)));
+ return ~(clib_ip_csum ((u8 *) &psh, sizeof (ip4_psh_t)));
}
static_always_inline u16
@@ -50,8 +49,7 @@ ip6_pseudo_header_cksum (ip6_header_t *ip6)
psh.dst = ip6->dst_address;
psh.l4len = ip6->payload_length;
psh.proto = clib_host_to_net_u32 ((u32) ip6->protocol);
- return ~clib_net_to_host_u16 (
- clib_ip_csum ((u8 *) &psh, sizeof (ip6_psh_t)));
+ return ~(clib_ip_csum ((u8 *) &psh, sizeof (ip6_psh_t)));
}
#endif /* included_ip_psh_cksum_h */
diff --git a/src/vnet/ip/ip_punt_drop.c b/src/vnet/ip/ip_punt_drop.c
index bf01adadb10..dc113f51386 100644
--- a/src/vnet/ip/ip_punt_drop.c
+++ b/src/vnet/ip/ip_punt_drop.c
@@ -143,9 +143,8 @@ format_ip_punt_redirect (u8 * s, va_list * args)
rx = ip_punt_redirect_get (rxs[rx_sw_if_index]);
- s = format (s, " rx %U via:\n",
- format_vnet_sw_interface_name, vnm,
- vnet_get_sw_interface (vnm, rx_sw_if_index));
+ s = format (s, " rx %U via:\n", format_vnet_sw_if_index_name, vnm,
+ rx_sw_if_index);
s = format (s, " %U", format_fib_path_list, rx->pl, 2);
s = format (s, " forwarding\n", format_dpo_id, &rx->dpo, 0);
s = format (s, " %U\n", format_dpo_id, &rx->dpo, 0);
diff --git a/src/vnet/ip/ip_test.c b/src/vnet/ip/ip_test.c
index 7c994868d87..727afba67f4 100644
--- a/src/vnet/ip/ip_test.c
+++ b/src/vnet/ip/ip_test.c
@@ -1277,6 +1277,12 @@ api_set_ip_flow_hash_v2 (vat_main_t *vat)
}
static int
+api_set_ip_flow_hash_v3 (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
api_ip_mroute_add_del (vat_main_t *vam)
{
unformat_input_t *i = vam->input;
diff --git a/src/vnet/ip/ip_types.c b/src/vnet/ip/ip_types.c
index 88b3f7b9820..ec80a96f15c 100644
--- a/src/vnet/ip/ip_types.c
+++ b/src/vnet/ip/ip_types.c
@@ -41,16 +41,17 @@ uword
unformat_ip_address (unformat_input_t * input, va_list * args)
{
ip_address_t *a = va_arg (*args, ip_address_t *);
+ ip_address_t tmp, *p_tmp = &tmp;
- if (unformat_user (input, unformat_ip46_address, &ip_addr_46 (a),
- IP46_TYPE_ANY))
- {
- ip_addr_version (a) =
- ip46_address_is_ip4 (&ip_addr_46 (a)) ? AF_IP4 : AF_IP6;
- return 1;
- }
-
- return 0;
+ clib_memset (p_tmp, 0, sizeof (*p_tmp));
+ if (unformat (input, "%U", unformat_ip4_address, &ip_addr_v4 (p_tmp)))
+ ip_addr_version (p_tmp) = AF_IP4;
+ else if (unformat_user (input, unformat_ip6_address, &ip_addr_v6 (p_tmp)))
+ ip_addr_version (p_tmp) = AF_IP6;
+ else
+ return 0;
+ *a = *p_tmp;
+ return 1;
}
u8 *
diff --git a/src/vnet/ip/ip_types.h b/src/vnet/ip/ip_types.h
index e4d89ebd88d..f1b387df194 100644
--- a/src/vnet/ip/ip_types.h
+++ b/src/vnet/ip/ip_types.h
@@ -75,13 +75,11 @@ typedef enum ip_feature_location_t_
#define N_IP_FEATURE_LOCATIONS (IP_FEATURE_DROP+1)
-/* *INDENT-OFF* */
typedef struct ip_address
{
ip46_address_t ip;
ip_address_family_t version;
} __clib_packed ip_address_t;
-/* *INDENT-ON* */
#define IP_ADDRESS_V4_ALL_0S {.ip.ip4.as_u32 = 0, .version = AF_IP4}
#define IP_ADDRESS_V6_ALL_0S {.ip.ip6.as_u64 = {0, 0}, .version = AF_IP6}
@@ -112,13 +110,11 @@ extern void ip_address_from_46 (const ip46_address_t * a,
extern void ip_address_increment (ip_address_t * ip);
extern void ip_address_reset (ip_address_t * ip);
-/* *INDENT-OFF* */
typedef struct ip_prefix
{
ip_address_t addr;
u8 len;
} __clib_packed ip_prefix_t;
-/* *INDENT-ON* */
#define ip_prefix_addr(_a) (_a)->addr
#define ip_prefix_version(_a) ip_addr_version(&ip_prefix_addr(_a))
diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c
index 26bdaa635aa..c225c222a38 100644
--- a/src/vnet/ip/lookup.c
+++ b/src/vnet/ip/lookup.c
@@ -145,13 +145,13 @@ unformat_ip_flow_hash_config (unformat_input_t *input, va_list *args)
{
if (unformat (input, "%_,"))
;
-#define _(a, b) \
+#define _(a, b, c) \
else if (unformat (input, "%_" #a)) \
{ \
- *flow_hash_config |= b; \
+ *flow_hash_config |= c; \
matched_once = 1; \
}
- foreach_flow_hash_bit_v1
+ foreach_flow_hash_bit
#undef _
else
{
@@ -220,6 +220,27 @@ const ip46_address_t zero_addr = {
0, 0},
};
+bool
+fib_prefix_validate (const fib_prefix_t *prefix)
+{
+ if (FIB_PROTOCOL_IP4 == prefix->fp_proto)
+ {
+ if (prefix->fp_len > 32)
+ {
+ return false;
+ }
+ }
+
+ if (FIB_PROTOCOL_IP6 == prefix->fp_proto)
+ {
+ if (prefix->fp_len > 128)
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
static clib_error_t *
vnet_ip_route_cmd (vlib_main_t * vm,
unformat_input_t * main_input, vlib_cli_command_t * cmd)
@@ -353,6 +374,12 @@ vnet_ip_route_cmd (vlib_main_t * vm,
.fp_addr = prefixs[i].fp_addr,
};
+ if (!fib_prefix_validate (&rpfx))
+ {
+ vlib_cli_output (vm, "Invalid prefix len: %d", rpfx.fp_len);
+ continue;
+ }
+
if (is_del)
fib_table_entry_path_remove2 (fib_index,
&rpfx, FIB_SOURCE_CLI, rpaths);
@@ -530,33 +557,25 @@ vnet_show_ip6_table_cmd (vlib_main_t *vm, unformat_input_t *main_input,
return (vnet_show_ip_table_cmd (vm, main_input, cmd, FIB_PROTOCOL_IP6));
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_ip_command, static) = {
.path = "ip",
.short_help = "Internet protocol (IP) commands",
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_ip6_command, static) = {
.path = "ip6",
.short_help = "Internet protocol version 6 (IPv6) commands",
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_show_ip_command, static) = {
.path = "show ip",
.short_help = "Internet protocol (IP) show commands",
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_show_ip6_command, static) = {
.path = "show ip6",
.short_help = "Internet protocol version 6 (IPv6) show commands",
};
-/* *INDENT-ON* */
/*?
* This command is used to add or delete IPv4 or IPv6 routes. All
@@ -585,7 +604,6 @@ VLIB_CLI_COMMAND (vlib_cli_show_ip6_command, static) = {
* To add a route to a particular FIB table (VRF), use:
* @cliexcmd{ip route add 172.16.24.0/24 table 7 via GigabitEthernet2/0/0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip_route_command, static) = {
.path = "ip route",
.short_help = "ip route [add|del] [count <n>] <dst-ip-addr>/<width> [table "
@@ -593,35 +611,30 @@ VLIB_CLI_COMMAND (ip_route_command, static) = {
"[next-hop-table <value>] [weight <value>] [preference "
"<value>] [udp-encap <value>] [ip4-lookup-in-table <value>] "
"[ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] "
- "[resolve-via-host] [resolve-via-connected] [rx-ip4 "
+ "[resolve-via-host] [resolve-via-connected] [rx-ip4|rx-ip6 "
"<interface>] [out-labels <value value value>]",
.function = vnet_ip_route_cmd,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*?
* This command is used to add or delete IPv4 Tables. All
* Tables must be explicitly added before that can be used. Creating a
* table will add both unicast and multicast FIBs
*
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip4_table_command, static) = {
.path = "ip table",
.short_help = "ip table [add|del] <table-id>",
.function = vnet_ip4_table_cmd,
};
-/* *INDENT-ON* */
-/* *INDENT-ON* */
/*?
* This command is used to add or delete IPv4 Tables. All
* Tables must be explicitly added before that can be used. Creating a
* table will add both unicast and multicast FIBs
*
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_table_command, static) = {
.path = "ip6 table",
.short_help = "ip6 table [add|del] <table-id>",
@@ -726,14 +739,12 @@ ip6_table_bind_cmd (vlib_main_t * vm,
* Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
* @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
{
.path = "set interface ip table",
.function = ip4_table_bind_cmd,
.short_help = "set interface ip table <interface> <table-id>",
};
-/* *INDENT-ON* */
/*?
* Place the indicated interface into the supplied IPv6 FIB table (also known
@@ -754,14 +765,12 @@ VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
* Example of how to add an interface to an IPv6 FIB table (where 2 is the table-id):
* @cliexcmd{set interface ip6 table GigabitEthernet2/0/0 2}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip6_table_command, static) =
{
.path = "set interface ip6 table",
.function = ip6_table_bind_cmd,
.short_help = "set interface ip6 table <interface> <table-id>"
};
-/* *INDENT-ON* */
clib_error_t *
vnet_ip_mroute_cmd (vlib_main_t * vm,
@@ -998,7 +1007,6 @@ done:
* @cliexcmd{ip mroute add 232.1.1.1 Signal}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip_mroute_command, static) =
{
.path = "ip mroute",
@@ -1006,7 +1014,6 @@ VLIB_CLI_COMMAND (ip_mroute_command, static) =
.function = vnet_ip_mroute_cmd,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/lookup.h b/src/vnet/ip/lookup.h
index aa998273213..8083d974df6 100644
--- a/src/vnet/ip/lookup.h
+++ b/src/vnet/ip/lookup.h
@@ -168,17 +168,16 @@ always_inline void
ip_lookup_set_buffer_fib_index (u32 * fib_index_by_sw_if_index,
vlib_buffer_t * b)
{
- /* *INDENT-OFF* */
vnet_buffer (b)->ip.fib_index =
vec_elt (fib_index_by_sw_if_index, vnet_buffer (b)->sw_if_index[VLIB_RX]);
vnet_buffer (b)->ip.fib_index =
((vnet_buffer (b)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
vnet_buffer (b)->ip.fib_index :
vnet_buffer (b)->sw_if_index[VLIB_TX]);
- /* *INDENT-ON* */
}
void ip_lookup_init (ip_lookup_main_t * lm, u32 ip_lookup_node_index);
+bool fib_prefix_validate (const fib_prefix_t *prefix);
#endif /* included_ip_lookup_h */
/*
diff --git a/src/vnet/ip/punt.c b/src/vnet/ip/punt.c
index 10deb2e8849..3c46549634a 100644
--- a/src/vnet/ip/punt.c
+++ b/src/vnet/ip/punt.c
@@ -148,14 +148,31 @@ punt_socket_register_l4 (vlib_main_t * vm,
punt_main_t *pm = &punt_main;
punt_client_t *c;
- /* For now we only support UDP punt */
- if (protocol != IP_PROTOCOL_UDP)
- return clib_error_return (0,
- "only UDP protocol (%d) is supported, got %d",
- IP_PROTOCOL_UDP, protocol);
-
if (port == (u16) ~ 0)
- return clib_error_return (0, "UDP port number required");
+ return clib_error_return (0, "Port number required");
+
+ u32 node_index;
+ switch (protocol)
+ {
+ case IP_PROTOCOL_UDP:
+ node_index = (af == AF_IP4 ? udp4_punt_socket_node.index :
+ udp6_punt_socket_node.index);
+ udp_register_dst_port (vm, port, node_index, af == AF_IP4);
+ break;
+ case IP_PROTOCOL_ICMP6:
+ if (af != AF_IP6)
+ return clib_error_return (
+ 0, "only UDP or ICMP6 protocol (%d, %d) is supported, got %d",
+ IP_PROTOCOL_UDP, IP_PROTOCOL_ICMP6, protocol);
+
+ node_index = icmp6_punt_socket_node.index;
+ icmp6_register_type (vm, port, node_index);
+ break;
+ default:
+ return clib_error_return (
+ 0, "only UDP or ICMP6 protocol (%d) is supported, got %d",
+ IP_PROTOCOL_UDP, protocol);
+ }
c = punt_client_l4_get (af, port);
@@ -173,12 +190,6 @@ punt_socket_register_l4 (vlib_main_t * vm,
c->reg.punt.l4.protocol = protocol;
c->reg.punt.l4.af = af;
- u32 node_index = (af == AF_IP4 ?
- udp4_punt_socket_node.index :
- udp6_punt_socket_node.index);
-
- udp_register_dst_port (vm, port, node_index, af == AF_IP4);
-
return (NULL);
}
@@ -463,7 +474,6 @@ punt_cli (vlib_main_t * vm,
unformat_input_t line_input, *input = &line_input;
clib_error_t *error = NULL;
bool is_add = true;
- /* *INDENT-OFF* */
punt_reg_t pr = {
.punt = {
.l4 = {
@@ -475,7 +485,6 @@ punt_cli (vlib_main_t * vm,
.type = PUNT_TYPE_L4,
};
u32 port;
- /* *INDENT-ON* */
if (!unformat_user (input__, unformat_line_input, input))
return 0;
@@ -541,13 +550,11 @@ done:
* @cliexcmd{set punt udp del all}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (punt_command, static) = {
.path = "set punt",
.short_help = "set punt [IPV4|ip6|ipv6] [UDP|tcp] [del] [ALL|<port-num>]",
.function = punt_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
punt_socket_register_cmd (vlib_main_t * vm,
@@ -557,7 +564,6 @@ punt_socket_register_cmd (vlib_main_t * vm,
unformat_input_t line_input, *input = &line_input;
u8 *socket_name = 0;
clib_error_t *error = NULL;
- /* *INDENT-OFF* */
punt_reg_t pr = {
.punt = {
.l4 = {
@@ -568,7 +574,6 @@ punt_socket_register_cmd (vlib_main_t * vm,
},
.type = PUNT_TYPE_L4,
};
- /* *INDENT-ON* */
if (!unformat_user (input__, unformat_line_input, input))
return 0;
@@ -616,7 +621,6 @@ done:
* @cliexcmd{punt socket register socket punt_l4_foo.sock}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (punt_socket_register_command, static) =
{
.path = "punt socket register",
@@ -624,7 +628,6 @@ VLIB_CLI_COMMAND (punt_socket_register_command, static) =
.short_help = "punt socket register [IPV4|ipv6] [UDP|tcp] [ALL|<port-num>] socket <socket>",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
punt_socket_deregister_cmd (vlib_main_t * vm,
@@ -633,7 +636,6 @@ punt_socket_deregister_cmd (vlib_main_t * vm,
{
unformat_input_t line_input, *input = &line_input;
clib_error_t *error = NULL;
- /* *INDENT-OFF* */
punt_reg_t pr = {
.punt = {
.l4 = {
@@ -644,7 +646,6 @@ punt_socket_deregister_cmd (vlib_main_t * vm,
},
.type = PUNT_TYPE_L4,
};
- /* *INDENT-ON* */
if (!unformat_user (input__, unformat_line_input, input))
return 0;
@@ -685,7 +686,6 @@ done:
* @cliexpar
* @cliexcmd{punt socket register}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (punt_socket_deregister_command, static) =
{
.path = "punt socket deregister",
@@ -693,7 +693,6 @@ VLIB_CLI_COMMAND (punt_socket_deregister_command, static) =
.short_help = "punt socket deregister [IPV4|ipv6] [UDP|tcp] [ALL|<port-num>]",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
void
punt_client_walk (punt_type_t pt, punt_client_walk_cb_t cb, void *ctx)
@@ -706,24 +705,20 @@ punt_client_walk (punt_type_t pt, punt_client_walk_cb_t cb, void *ctx)
{
u32 pci, key;
- /* *INDENT-OFF* */
hash_foreach(key, pci, pm->db.clients_by_l4_port,
({
cb (pool_elt_at_index(pm->punt_client_pool, pci), ctx);
}));
- /* *INDENT-ON* */
break;
}
case PUNT_TYPE_IP_PROTO:
{
u32 pci, key;
- /* *INDENT-OFF* */
hash_foreach(key, pci, pm->db.clients_by_ip_proto,
({
cb (pool_elt_at_index(pm->punt_client_pool, pci), ctx);
}));
- /* *INDENT-ON* */
break;
}
case PUNT_TYPE_EXCEPTION:
@@ -821,7 +816,6 @@ done:
* @cliexpar
* @cliexcmd{show punt socket ipv4}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_punt_socket_registration_command, static) =
{
.path = "show punt socket registrations",
@@ -829,7 +823,6 @@ VLIB_CLI_COMMAND (show_punt_socket_registration_command, static) =
.short_help = "show punt socket registrations [l4|exception]",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
clib_error_t *
ip_punt_init (vlib_main_t * vm)
diff --git a/src/vnet/ip/punt.h b/src/vnet/ip/punt.h
index a2612d60f07..e8495caad61 100644
--- a/src/vnet/ip/punt.h
+++ b/src/vnet/ip/punt.h
@@ -20,7 +20,12 @@
#ifndef included_punt_h
#define included_punt_h
+#ifdef __linux__
#include <linux/un.h>
+#elif __FreeBSD__
+#include <sys/un.h>
+#define UNIX_PATH_MAX SUNPATHLEN
+#endif /* __linux__ */
#include <stdbool.h>
#include <vnet/ip/ip.h>
@@ -239,6 +244,7 @@ extern vlib_node_registration_t udp4_punt_node;
extern vlib_node_registration_t udp6_punt_node;
extern vlib_node_registration_t udp4_punt_socket_node;
extern vlib_node_registration_t udp6_punt_socket_node;
+extern vlib_node_registration_t icmp6_punt_socket_node;
extern vlib_node_registration_t ip4_proto_punt_socket_node;
extern vlib_node_registration_t ip6_proto_punt_socket_node;
extern vlib_node_registration_t punt_socket_rx_node;
diff --git a/src/vnet/ip/punt_api.c b/src/vnet/ip/punt_api.c
index bcbf939f69d..20297af2e75 100644
--- a/src/vnet/ip/punt_api.c
+++ b/src/vnet/ip/punt_api.c
@@ -224,12 +224,10 @@ vl_api_punt_socket_register_t_handler (vl_api_punt_socket_register_t * mp)
char *p = vnet_punt_get_server_pathname ();
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_PUNT_SOCKET_REGISTER_REPLY,
({
memcpy ((char *) rmp->pathname, p, sizeof (rmp->pathname));
}));
- /* *INDENT-ON* */
}
typedef struct punt_socket_send_ctx_t_
diff --git a/src/vnet/ip/punt_node.c b/src/vnet/ip/punt_node.c
index 7f9beef0ffe..6400e49c626 100644
--- a/src/vnet/ip/punt_node.c
+++ b/src/vnet/ip/punt_node.c
@@ -23,6 +23,7 @@
*/
#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
#include <vlib/vlib.h>
#include <vnet/ip/punt.h>
#include <vlib/unix/unix.h>
@@ -182,7 +183,6 @@ VLIB_NODE_FN (udp6_punt_node) (vlib_main_t * vm,
return udp46_punt_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp4_punt_node) = {
.name = "ip4-udp-punt",
/* Takes a vector of packets. */
@@ -214,7 +214,6 @@ VLIB_REGISTER_NODE (udp6_punt_node) = {
#undef _
},
};
-/* *INDENT-ON* */
typedef struct
{
@@ -243,10 +242,9 @@ format_udp_punt_trace (u8 * s, va_list * args)
}
always_inline uword
-punt_socket_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame,
- punt_type_t pt, ip_address_family_t af)
+punt_socket_inline2 (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, punt_type_t pt,
+ ip_address_family_t af, ip_protocol_t protocol)
{
u32 *buffers = vlib_frame_vector_args (frame);
u32 thread_index = vm->thread_index;
@@ -266,33 +264,42 @@ punt_socket_inline (vlib_main_t * vm,
uword l;
punt_packetdesc_t packetdesc;
punt_client_t *c;
-
+ u16 port = 0;
b = vlib_get_buffer (vm, buffers[i]);
if (PUNT_TYPE_L4 == pt)
{
- /* Reverse UDP Punt advance */
- udp_header_t *udp;
- if (AF_IP4 == af)
+ if (protocol == IP_PROTOCOL_UDP)
{
- vlib_buffer_advance (b, -(sizeof (ip4_header_t) +
- sizeof (udp_header_t)));
- ip4_header_t *ip = vlib_buffer_get_current (b);
- udp = (udp_header_t *) (ip + 1);
+ /* Reverse UDP Punt advance */
+ udp_header_t *udp;
+ if (AF_IP4 == af)
+ {
+ vlib_buffer_advance (
+ b, -(sizeof (ip4_header_t) + sizeof (udp_header_t)));
+ ip4_header_t *ip = vlib_buffer_get_current (b);
+ udp = (udp_header_t *) (ip + 1);
+ }
+ else
+ {
+ vlib_buffer_advance (
+ b, -(sizeof (ip6_header_t) + sizeof (udp_header_t)));
+ ip6_header_t *ip = vlib_buffer_get_current (b);
+ udp = (udp_header_t *) (ip + 1);
+ }
+ port = clib_net_to_host_u16 (udp->dst_port);
}
- else
+ else if (protocol == IP_PROTOCOL_ICMP6)
{
- vlib_buffer_advance (b, -(sizeof (ip6_header_t) +
- sizeof (udp_header_t)));
ip6_header_t *ip = vlib_buffer_get_current (b);
- udp = (udp_header_t *) (ip + 1);
+ icmp46_header_t *icmp = ip6_next_header (ip);
+ port = icmp->type;
}
-
/*
* Find registerered client
* If no registered client, drop packet and count
*/
- c = punt_client_l4_get (af, clib_net_to_host_u16 (udp->dst_port));
+ c = punt_client_l4_get (af, port);
}
else if (PUNT_TYPE_IP_PROTO == pt)
{
@@ -339,7 +346,7 @@ punt_socket_inline (vlib_main_t * vm,
iov->iov_len = sizeof (packetdesc);
/** VLIB buffer chain -> Unix iovec(s). */
- vlib_buffer_advance (b, -(sizeof (ethernet_header_t)));
+ vlib_buffer_advance (b, -ethernet_buffer_header_size (b));
vec_add2 (ptd->iovecs, iov, 1);
iov->iov_base = b->data + b->current_data;
iov->iov_len = l = b->current_length;
@@ -396,6 +403,14 @@ error:
return n_packets;
}
+always_inline uword
+punt_socket_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, punt_type_t pt,
+ ip_address_family_t af)
+{
+ return punt_socket_inline2 (vm, node, frame, pt, af, IP_PROTOCOL_UDP);
+}
+
static uword
udp4_punt_socket (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * from_frame)
@@ -427,6 +442,14 @@ ip6_proto_punt_socket (vlib_main_t * vm,
}
static uword
+icmp6_punt_socket (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *from_frame)
+{
+ return punt_socket_inline2 (vm, node, from_frame, PUNT_TYPE_L4, AF_IP6,
+ IP_PROTOCOL_ICMP6);
+}
+
+static uword
exception_punt_socket (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * from_frame)
{
@@ -435,7 +458,6 @@ exception_punt_socket (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp4_punt_socket_node) = {
.function = udp4_punt_socket,
.name = "ip4-udp-punt-socket",
@@ -483,7 +505,16 @@ VLIB_REGISTER_NODE (exception_punt_socket_node) = {
.n_errors = PUNT_N_ERROR,
.error_strings = punt_error_strings,
};
-/* *INDENT-ON* */
+VLIB_REGISTER_NODE (icmp6_punt_socket_node) = {
+ .function = icmp6_punt_socket,
+ .name = "ip6-icmp-punt-socket",
+ .format_trace = format_udp_punt_trace,
+ .flags = VLIB_NODE_FLAG_IS_DROP,
+ .vector_size = sizeof (u32),
+ .n_errors = PUNT_N_ERROR,
+ .error_strings = punt_error_strings,
+};
+
typedef struct
{
@@ -614,7 +645,6 @@ punt_socket_rx (vlib_main_t * vm,
return total_count;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (punt_socket_rx_node) =
{
.function = punt_socket_rx,
@@ -633,7 +663,6 @@ VLIB_REGISTER_NODE (punt_socket_rx_node) =
},
.format_trace = format_punt_trace,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/reass/ip4_full_reass.c b/src/vnet/ip/reass/ip4_full_reass.c
index 5b69234e438..bab7d479dcf 100644
--- a/src/vnet/ip/reass/ip4_full_reass.c
+++ b/src/vnet/ip/reass/ip4_full_reass.c
@@ -427,8 +427,7 @@ ip4_full_reass_free (ip4_full_reass_main_t * rm,
* with local variables would cause either buffer leak or corruption */
always_inline void
ip4_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node,
- ip4_full_reass_t *reass, u32 *n_left_to_next,
- u32 **to_next)
+ ip4_full_reass_t *reass)
{
u32 range_bi = reass->first_bi;
vlib_buffer_t *range_b;
@@ -452,40 +451,23 @@ ip4_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node,
if (~0 != reass->error_next_index &&
reass->error_next_index < node->n_next_nodes)
{
- u32 next_index;
-
- next_index = reass->error_next_index;
- u32 bi = ~0;
+ u32 n_free = vec_len (to_free);
/* record number of packets sent to custom app */
vlib_node_increment_counter (vm, node->node_index,
- IP4_ERROR_REASS_TO_CUSTOM_APP,
- vec_len (to_free));
-
- while (vec_len (to_free) > 0)
- {
- vlib_get_next_frame (vm, node, next_index, *to_next,
- (*n_left_to_next));
+ IP4_ERROR_REASS_TO_CUSTOM_APP, n_free);
- while (vec_len (to_free) > 0 && (*n_left_to_next) > 0)
- {
- bi = vec_pop (to_free);
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ for (u32 i = 0; i < n_free; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, to_free[i]);
+ if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
+ ip4_full_reass_add_trace (vm, node, reass, to_free[i],
+ RANGE_DISCARD, 0, ~0);
+ }
- if (~0 != bi)
- {
- vlib_buffer_t *b = vlib_get_buffer (vm, bi);
- if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
- {
- ip4_full_reass_add_trace (vm, node, reass, bi,
- RANGE_DISCARD, 0, ~0);
- }
- *to_next[0] = bi;
- (*to_next) += 1;
- (*n_left_to_next) -= 1;
- }
- }
- vlib_put_next_frame (vm, node, next_index, (*n_left_to_next));
- }
+ vlib_buffer_enqueue_to_single_next (vm, node, to_free,
+ reass->error_next_index, n_free);
}
else
{
@@ -564,8 +546,7 @@ always_inline ip4_full_reass_t *
ip4_full_reass_find_or_create (vlib_main_t *vm, vlib_node_runtime_t *node,
ip4_full_reass_main_t *rm,
ip4_full_reass_per_thread_t *rt,
- ip4_full_reass_kv_t *kv, u8 *do_handoff,
- u32 *n_left_to_next, u32 **to_next)
+ ip4_full_reass_kv_t *kv, u8 *do_handoff)
{
ip4_full_reass_t *reass;
f64 now;
@@ -590,7 +571,7 @@ again:
{
vlib_node_increment_counter (vm, node->node_index,
IP4_ERROR_REASS_TIMEOUT, 1);
- ip4_full_reass_drop_all (vm, node, reass, n_left_to_next, to_next);
+ ip4_full_reass_drop_all (vm, node, reass);
ip4_full_reass_free (rm, rt, reass);
reass = NULL;
}
@@ -647,7 +628,6 @@ ip4_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_buffer_t *last_b = NULL;
u32 sub_chain_bi = reass->first_bi;
u32 total_length = 0;
- u32 buf_cnt = 0;
do
{
u32 tmp_bi = sub_chain_bi;
@@ -684,7 +664,6 @@ ip4_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_buffer_length_in_chain (vm, tmp) - trim_front - trim_end;
while (1)
{
- ++buf_cnt;
if (trim_front)
{
if (trim_front > tmp->current_length)
@@ -1184,205 +1163,195 @@ ip4_full_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
bool is_local)
{
u32 *from = vlib_frame_vector_args (frame);
- u32 n_left_from, n_left_to_next, *to_next, next_index;
+ u32 n_left, n_next = 0, to_next[VLIB_FRAME_SIZE];
ip4_full_reass_main_t *rm = &ip4_full_reass_main;
ip4_full_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
+ u16 nexts[VLIB_FRAME_SIZE];
+
clib_spinlock_lock (&rt->lock);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
- while (n_left_from > 0)
+ n_left = frame->n_vectors;
+ while (n_left > 0)
{
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 error0 = IP4_ERROR_NONE;
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0;
- vlib_buffer_t *b0;
- u32 next0;
- u32 error0 = IP4_ERROR_NONE;
-
- bi0 = from[0];
- b0 = vlib_get_buffer (vm, bi0);
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
- ip4_header_t *ip0 = vlib_buffer_get_current (b0);
- if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0))
+ ip4_header_t *ip0 = vlib_buffer_get_current (b0);
+ if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0))
+ {
+ // this is a whole packet - no fragmentation
+ if (CUSTOM != type)
{
- // this is a whole packet - no fragmentation
- if (CUSTOM != type)
- {
- next0 = IP4_FULL_REASS_NEXT_INPUT;
- }
- else
- {
- next0 = vnet_buffer (b0)->ip.reass.next_index;
- }
- ip4_full_reass_add_trace (vm, node, NULL, bi0, PASSTHROUGH, 0,
- ~0);
- goto packet_enqueue;
+ next0 = IP4_FULL_REASS_NEXT_INPUT;
}
-
- if (is_local && !rm->is_local_reass_enabled)
+ else
{
- next0 = IP4_FULL_REASS_NEXT_DROP;
- goto packet_enqueue;
+ next0 = vnet_buffer (b0)->ip.reass.next_index;
}
+ ip4_full_reass_add_trace (vm, node, NULL, bi0, PASSTHROUGH, 0, ~0);
+ goto packet_enqueue;
+ }
- const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
- const u32 fragment_length =
- clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0);
- const u32 fragment_last = fragment_first + fragment_length - 1;
+ if (is_local && !rm->is_local_reass_enabled)
+ {
+ next0 = IP4_FULL_REASS_NEXT_DROP;
+ goto packet_enqueue;
+ }
- /* Keep track of received fragments */
- vlib_node_increment_counter (vm, node->node_index,
- IP4_ERROR_REASS_FRAGMENTS_RCVD, 1);
+ const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
+ const u32 fragment_length =
+ clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0);
+ const u32 fragment_last = fragment_first + fragment_length - 1;
- if (fragment_first > fragment_last ||
- fragment_first + fragment_length > UINT16_MAX - 20 ||
- (fragment_length < 8 && // 8 is minimum frag length per RFC 791
- ip4_get_fragment_more (ip0)))
- {
- next0 = IP4_FULL_REASS_NEXT_DROP;
- error0 = IP4_ERROR_REASS_MALFORMED_PACKET;
- goto packet_enqueue;
- }
+ /* Keep track of received fragments */
+ vlib_node_increment_counter (vm, node->node_index,
+ IP4_ERROR_REASS_FRAGMENTS_RCVD, 1);
- u32 fib_index = vec_elt (ip4_main.fib_index_by_sw_if_index,
- vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+ if (fragment_first > fragment_last ||
+ fragment_first + fragment_length > UINT16_MAX - 20 ||
+ (fragment_length < 8 && // 8 is minimum frag length per RFC 791
+ ip4_get_fragment_more (ip0)))
+ {
+ next0 = IP4_FULL_REASS_NEXT_DROP;
+ error0 = IP4_ERROR_REASS_MALFORMED_PACKET;
+ goto packet_enqueue;
+ }
+
+ u32 fib_index = (vnet_buffer (b0)->sw_if_index[VLIB_TX] == (u32) ~0) ?
+ vec_elt (ip4_main.fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]) :
+ vnet_buffer (b0)->sw_if_index[VLIB_TX];
- ip4_full_reass_kv_t kv = { .k.fib_index = fib_index,
- .k.src.as_u32 = ip0->src_address.as_u32,
- .k.dst.as_u32 = ip0->dst_address.as_u32,
- .k.frag_id = ip0->fragment_id,
- .k.proto = ip0->protocol
+ ip4_full_reass_kv_t kv = { .k.fib_index = fib_index,
+ .k.src.as_u32 = ip0->src_address.as_u32,
+ .k.dst.as_u32 = ip0->dst_address.as_u32,
+ .k.frag_id = ip0->fragment_id,
+ .k.proto = ip0->protocol
- };
- u8 do_handoff = 0;
+ };
+ u8 do_handoff = 0;
- ip4_full_reass_t *reass = ip4_full_reass_find_or_create (
- vm, node, rm, rt, &kv, &do_handoff, &n_left_to_next, &to_next);
+ ip4_full_reass_t *reass =
+ ip4_full_reass_find_or_create (vm, node, rm, rt, &kv, &do_handoff);
- if (reass)
+ if (reass)
+ {
+ const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
+ if (0 == fragment_first)
{
- const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
- if (0 == fragment_first)
- {
- reass->sendout_thread_index = vm->thread_index;
- }
+ reass->sendout_thread_index = vm->thread_index;
}
+ }
- if (PREDICT_FALSE (do_handoff))
+ if (PREDICT_FALSE (do_handoff))
+ {
+ next0 = IP4_FULL_REASS_NEXT_HANDOFF;
+ vnet_buffer (b0)->ip.reass.owner_thread_index =
+ kv.v.memory_owner_thread_index;
+ }
+ else if (reass)
+ {
+ u32 handoff_thread_idx;
+ u32 counter = ~0;
+ switch (ip4_full_reass_update (vm, node, rm, rt, reass, &bi0, &next0,
+ &error0, CUSTOM == type,
+ &handoff_thread_idx))
{
+ case IP4_REASS_RC_OK:
+ /* nothing to do here */
+ break;
+ case IP4_REASS_RC_HANDOFF:
next0 = IP4_FULL_REASS_NEXT_HANDOFF;
+ b0 = vlib_get_buffer (vm, bi0);
vnet_buffer (b0)->ip.reass.owner_thread_index =
- kv.v.memory_owner_thread_index;
+ handoff_thread_idx;
+ break;
+ case IP4_REASS_RC_TOO_MANY_FRAGMENTS:
+ counter = IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG;
+ break;
+ case IP4_REASS_RC_NO_BUF:
+ counter = IP4_ERROR_REASS_NO_BUF;
+ break;
+ case IP4_REASS_RC_INTERNAL_ERROR:
+ counter = IP4_ERROR_REASS_INTERNAL_ERROR;
+ /* Sanitization is needed in internal error cases only, as
+ * the incoming packet is already dropped in other cases,
+ * also adding bi0 back to the reassembly list, fixes the
+ * leaking of buffers during internal errors.
+ *
+ * Also it doesnt make sense to send these buffers custom
+ * app, these fragments are with internal errors */
+ sanitize_reass_buffers_add_missing (vm, reass, &bi0);
+ reass->error_next_index = ~0;
+ break;
}
- else if (reass)
- {
- u32 handoff_thread_idx;
- u32 counter = ~0;
- switch (ip4_full_reass_update
- (vm, node, rm, rt, reass, &bi0, &next0,
- &error0, CUSTOM == type, &handoff_thread_idx))
- {
- case IP4_REASS_RC_OK:
- /* nothing to do here */
- break;
- case IP4_REASS_RC_HANDOFF:
- next0 = IP4_FULL_REASS_NEXT_HANDOFF;
- b0 = vlib_get_buffer (vm, bi0);
- vnet_buffer (b0)->ip.reass.owner_thread_index =
- handoff_thread_idx;
- break;
- case IP4_REASS_RC_TOO_MANY_FRAGMENTS:
- counter = IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG;
- break;
- case IP4_REASS_RC_NO_BUF:
- counter = IP4_ERROR_REASS_NO_BUF;
- break;
- case IP4_REASS_RC_INTERNAL_ERROR:
- counter = IP4_ERROR_REASS_INTERNAL_ERROR;
- /* Sanitization is needed in internal error cases only, as
- * the incoming packet is already dropped in other cases,
- * also adding bi0 back to the reassembly list, fixes the
- * leaking of buffers during internal errors.
- *
- * Also it doesnt make sense to send these buffers custom
- * app, these fragments are with internal errors */
- sanitize_reass_buffers_add_missing (vm, reass, &bi0);
- reass->error_next_index = ~0;
- break;
- }
- if (~0 != counter)
- {
- vlib_node_increment_counter (vm, node->node_index, counter,
- 1);
- ip4_full_reass_drop_all (vm, node, reass, &n_left_to_next,
- &to_next);
- ip4_full_reass_free (rm, rt, reass);
- goto next_packet;
- }
- }
- else
+ if (~0 != counter)
{
- next0 = IP4_FULL_REASS_NEXT_DROP;
- error0 = IP4_ERROR_REASS_LIMIT_REACHED;
+ vlib_node_increment_counter (vm, node->node_index, counter, 1);
+ ip4_full_reass_drop_all (vm, node, reass);
+ ip4_full_reass_free (rm, rt, reass);
+ goto next_packet;
}
+ }
+ else
+ {
+ next0 = IP4_FULL_REASS_NEXT_DROP;
+ error0 = IP4_ERROR_REASS_LIMIT_REACHED;
+ }
+ packet_enqueue:
- packet_enqueue:
-
- if (bi0 != ~0)
+ if (bi0 != ~0)
+ {
+ /* bi0 might have been updated by reass_finalize, reload */
+ b0 = vlib_get_buffer (vm, bi0);
+ if (IP4_ERROR_NONE != error0)
{
- to_next[0] = bi0;
- to_next += 1;
- n_left_to_next -= 1;
-
- /* bi0 might have been updated by reass_finalize, reload */
- b0 = vlib_get_buffer (vm, bi0);
- if (IP4_ERROR_NONE != error0)
- {
- b0->error = node->errors[error0];
- }
-
- if (next0 == IP4_FULL_REASS_NEXT_HANDOFF)
- {
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- ip4_full_reass_add_trace (
- vm, node, NULL, bi0, HANDOFF, 0,
- vnet_buffer (b0)->ip.reass.owner_thread_index);
- }
- }
- else if (FEATURE == type && IP4_ERROR_NONE == error0)
- {
- vnet_feature_next (&next0, b0);
- }
+ b0->error = node->errors[error0];
+ }
- /* Increment the counter to-custom-app also as this fragment is
- * also going to application */
- if (CUSTOM == type)
+ if (next0 == IP4_FULL_REASS_NEXT_HANDOFF)
+ {
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- vlib_node_increment_counter (
- vm, node->node_index, IP4_ERROR_REASS_TO_CUSTOM_APP, 1);
+ ip4_full_reass_add_trace (
+ vm, node, NULL, bi0, HANDOFF, 0,
+ vnet_buffer (b0)->ip.reass.owner_thread_index);
}
+ }
+ else if (FEATURE == type && IP4_ERROR_NONE == error0)
+ {
+ vnet_feature_next (&next0, b0);
+ }
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- IP4_REASS_DEBUG_BUFFER (bi0, enqueue_next);
+ /* Increment the counter to-custom-app also as this fragment is
+ * also going to application */
+ if (CUSTOM == type)
+ {
+ vlib_node_increment_counter (vm, node->node_index,
+ IP4_ERROR_REASS_TO_CUSTOM_APP, 1);
}
- next_packet:
- from += 1;
- n_left_from -= 1;
+ to_next[n_next] = bi0;
+ nexts[n_next] = next0;
+ n_next++;
+ IP4_REASS_DEBUG_BUFFER (bi0, enqueue_next);
}
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ next_packet:
+ from += 1;
+ n_left -= 1;
}
clib_spinlock_unlock (&rt->lock);
+
+ vlib_buffer_enqueue_to_next (vm, node, to_next, nexts, n_next);
return frame->n_vectors;
}
@@ -1455,11 +1424,11 @@ VLIB_REGISTER_NODE (ip4_full_reass_node_feature) = {
};
VNET_FEATURE_INIT (ip4_full_reass_feature, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "ip4-full-reassembly-feature",
- .runs_before = VNET_FEATURES ("ip4-lookup",
- "ipsec4-input-feature"),
- .runs_after = 0,
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-full-reassembly-feature",
+ .runs_before = VNET_FEATURES ("ip4-lookup", "ipsec4-input-feature",
+ "ip4-sv-reassembly-feature"),
+ .runs_after = 0,
};
VLIB_NODE_FN (ip4_full_reass_node_custom) (vlib_main_t * vm,
@@ -1484,15 +1453,6 @@ VLIB_REGISTER_NODE (ip4_full_reass_node_custom) = {
},
};
-VNET_FEATURE_INIT (ip4_full_reass_custom, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "ip4-full-reassembly-feature",
- .runs_before = VNET_FEATURES ("ip4-lookup",
- "ipsec4-input-feature"),
- .runs_after = 0,
-};
-
-
#ifndef CLIB_MARCH_VARIANT
uword
ip4_full_reass_custom_register_next_node (uword node_index)
@@ -1688,7 +1648,6 @@ ip4_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
uword thread_index = 0;
int index;
const uword nthreads = vlib_num_workers () + 1;
- u32 n_left_to_next, *to_next;
for (thread_index = 0; thread_index < nthreads; ++thread_index)
{
@@ -1734,8 +1693,7 @@ ip4_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
vec_foreach (i, pool_indexes_to_free)
{
ip4_full_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
- ip4_full_reass_drop_all (vm, node, reass, &n_left_to_next,
- &to_next);
+ ip4_full_reass_drop_all (vm, node, reass);
ip4_full_reass_free (rm, rt, reass);
}
@@ -2101,7 +2059,7 @@ ip4_full_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
"ip4-full-reassembly-feature",
sw_if_index, 0, 0, 0);
}
- return -1;
+ return 0;
}
void
diff --git a/src/vnet/ip/reass/ip4_sv_reass.c b/src/vnet/ip/reass/ip4_sv_reass.c
index 4ef144e9bee..7c3c2fff217 100644
--- a/src/vnet/ip/reass/ip4_sv_reass.c
+++ b/src/vnet/ip/reass/ip4_sv_reass.c
@@ -150,6 +150,7 @@ typedef struct
/** Worker handoff */
u32 fq_index;
u32 fq_feature_index;
+ u32 fq_custom_context_index;
// reference count for enabling/disabling feature - per interface
u32 *feature_use_refcount_per_intf;
@@ -457,14 +458,19 @@ l4_layer_truncated (ip4_header_t *ip)
}
always_inline uword
-ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame, bool is_feature,
- bool is_output_feature, bool is_custom)
+ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_feature,
+ bool is_output_feature, bool is_custom,
+ bool with_custom_context)
{
u32 *from = vlib_frame_vector_args (frame);
- u32 n_left_from, n_left_to_next, *to_next, next_index;
+ u32 n_left_from, n_left_to_next, *to_next, *to_next_aux, next_index;
ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
+ u32 *context;
+ if (with_custom_context)
+ context = vlib_frame_aux_args (frame);
+
clib_spinlock_lock (&rt->lock);
n_left_from = frame->n_vectors;
@@ -621,6 +627,8 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
next[0] = next0;
next[1] = next1;
next += 2;
+ if (with_custom_context)
+ context += 2;
}
while (n_left_from > 0)
@@ -696,6 +704,8 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
n_left_from -= 1;
next[0] = next0;
next += 1;
+ if (with_custom_context)
+ context += 1;
}
vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
@@ -709,7 +719,11 @@ slow_path:
while (n_left_from > 0)
{
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ if (with_custom_context)
+ vlib_get_next_frame_with_aux_safe (vm, node, next_index, to_next,
+ to_next_aux, n_left_to_next);
+ else
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
while (n_left_from > 0 && n_left_to_next > 0)
{
@@ -717,6 +731,7 @@ slow_path:
vlib_buffer_t *b0;
u32 next0;
u32 error0 = IP4_ERROR_NONE;
+ u8 forward_context = 0;
bi0 = from[0];
b0 = vlib_get_buffer (vm, bi0);
@@ -792,13 +807,17 @@ slow_path:
ip4_sv_reass_kv_t kv;
u8 do_handoff = 0;
- kv.k.as_u64[0] =
- (u64) vec_elt (ip4_main.fib_index_by_sw_if_index,
- vnet_buffer (b0)->sw_if_index[VLIB_RX]) |
- (u64) ip0->src_address.as_u32 << 32;
- kv.k.as_u64[1] =
- (u64) ip0->dst_address.
- as_u32 | (u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48;
+ if (with_custom_context)
+ kv.k.as_u64[0] = (u64) *context | (u64) ip0->src_address.as_u32
+ << 32;
+ else
+ kv.k.as_u64[0] =
+ (u64) vec_elt (ip4_main.fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]) |
+ (u64) ip0->src_address.as_u32 << 32;
+ kv.k.as_u64[1] = (u64) ip0->dst_address.as_u32 |
+ (u64) ip0->fragment_id << 32 |
+ (u64) ip0->protocol << 48;
ip4_sv_reass_t *reass =
ip4_sv_reass_find_or_create (vm, rm, rt, &kv, &do_handoff);
@@ -808,6 +827,8 @@ slow_path:
next0 = IP4_SV_REASSEMBLY_NEXT_HANDOFF;
vnet_buffer (b0)->ip.reass.owner_thread_index =
kv.v.thread_index;
+ if (with_custom_context)
+ forward_context = 1;
goto packet_enqueue;
}
@@ -938,13 +959,26 @@ slow_path:
b0 = vlib_get_buffer (vm, bi0);
vnet_feature_next (&next0, b0);
}
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
+ if (with_custom_context && forward_context)
+ {
+ if (to_next_aux)
+ {
+ to_next_aux[0] = *context;
+ to_next_aux += 1;
+ }
+ vlib_validate_buffer_enqueue_with_aux_x1 (
+ vm, node, next_index, to_next, to_next_aux, n_left_to_next,
+ bi0, *context, next0);
+ }
+ else
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
next_packet:
from += 1;
n_left_from -= 1;
+ if (with_custom_context)
+ context += 1;
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
@@ -959,12 +993,11 @@ VLIB_NODE_FN (ip4_sv_reass_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ ,
- false /* is_output_feature */ ,
- false /* is_custom */ );
+ return ip4_sv_reass_inline (
+ vm, node, frame, false /* is_feature */, false /* is_output_feature */,
+ false /* is_custom */, false /* with_custom_context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_node) = {
.name = "ip4-sv-reassembly",
.vector_size = sizeof (u32),
@@ -980,18 +1013,16 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node) = {
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip4_sv_reass_node_feature) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_sv_reass_inline (vm, node, frame, true /* is_feature */ ,
- false /* is_output_feature */ ,
- false /* is_custom */ );
+ return ip4_sv_reass_inline (
+ vm, node, frame, true /* is_feature */, false /* is_output_feature */,
+ false /* is_custom */, false /* with_custom_context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_node_feature) = {
.name = "ip4-sv-reassembly-feature",
.vector_size = sizeof (u32),
@@ -1006,28 +1037,24 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node_feature) = {
[IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip4_sv_reass_feature) = {
.arc_name = "ip4-unicast",
.node_name = "ip4-sv-reassembly-feature",
.runs_before = VNET_FEATURES ("ip4-lookup"),
.runs_after = 0,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip4_sv_reass_node_output_feature) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_sv_reass_inline (vm, node, frame, true /* is_feature */ ,
- true /* is_output_feature */ ,
- false /* is_custom */ );
+ return ip4_sv_reass_inline (
+ vm, node, frame, true /* is_feature */, true /* is_output_feature */,
+ false /* is_custom */, false /* with_custom_context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_node_output_feature) = {
.name = "ip4-sv-reassembly-output-feature",
.vector_size = sizeof (u32),
@@ -1042,18 +1069,14 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node_output_feature) = {
[IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip4_sv_reass_output_feature) = {
.arc_name = "ip4-output",
.node_name = "ip4-sv-reassembly-output-feature",
.runs_before = 0,
.runs_after = 0,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = {
.name = "ip4-sv-reassembly-custom-next",
.vector_size = sizeof (u32),
@@ -1069,15 +1092,39 @@ VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = {
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip4_sv_reass_custom_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ ,
- false /* is_output_feature */ ,
- true /* is_custom */ );
+ return ip4_sv_reass_inline (
+ vm, node, frame, false /* is_feature */, false /* is_output_feature */,
+ true /* is_custom */, false /* with_custom_context */);
+}
+
+VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_node) = {
+ .name = "ip4-sv-reassembly-custom-context",
+ .vector_size = sizeof (u32),
+ .aux_size = sizeof(u32),
+ .format_trace = format_ip4_sv_reass_trace,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
+ .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
+ .next_nodes =
+ {
+ [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
+ [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
+ [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reassembly-custom-context-handoff",
+
+ },
+};
+
+VLIB_NODE_FN (ip4_sv_reass_custom_context_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip4_sv_reass_inline (
+ vm, node, frame, false /* is_feature */, false /* is_output_feature */,
+ true /* is_custom */, true /* with_custom_context */);
}
#ifndef CLIB_MARCH_VARIANT
@@ -1222,6 +1269,8 @@ ip4_sv_reass_init_function (vlib_main_t * vm)
rm->fq_index = vlib_frame_queue_main_init (ip4_sv_reass_node.index, 0);
rm->fq_feature_index =
vlib_frame_queue_main_init (ip4_sv_reass_node_feature.index, 0);
+ rm->fq_custom_context_index =
+ vlib_frame_queue_main_init (ip4_sv_reass_custom_context_node.index, 0);
rm->feature_use_refcount_per_intf = NULL;
rm->output_feature_use_refcount_per_intf = NULL;
@@ -1274,7 +1323,6 @@ ip4_sv_reass_walk_expired (vlib_main_t *vm,
clib_spinlock_lock (&rt->lock);
vec_reset_length (pool_indexes_to_free);
- /* *INDENT-OFF* */
pool_foreach_index (index, rt->pool) {
reass = pool_elt_at_index (rt->pool, index);
if (now > reass->last_heard + rm->timeout)
@@ -1282,15 +1330,12 @@ ip4_sv_reass_walk_expired (vlib_main_t *vm,
vec_add1 (pool_indexes_to_free, index);
}
}
- /* *INDENT-ON* */
int *i;
- /* *INDENT-OFF* */
vec_foreach (i, pool_indexes_to_free)
{
ip4_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
ip4_sv_reass_free (vm, rm, rt, reass);
}
- /* *INDENT-ON* */
clib_spinlock_unlock (&rt->lock);
}
@@ -1305,7 +1350,6 @@ ip4_sv_reass_walk_expired (vlib_main_t *vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_expire_node) = {
.function = ip4_sv_reass_walk_expired,
.type = VLIB_NODE_TYPE_PROCESS,
@@ -1314,7 +1358,6 @@ VLIB_REGISTER_NODE (ip4_sv_reass_expire_node) = {
.n_errors = IP4_N_ERROR,
.error_counters = ip4_error_counters,
};
-/* *INDENT-ON* */
static u8 *
format_ip4_sv_reass_key (u8 * s, va_list * args)
@@ -1381,11 +1424,9 @@ show_ip4_reass (vlib_main_t * vm,
clib_spinlock_lock (&rt->lock);
if (details)
{
- /* *INDENT-OFF* */
pool_foreach (reass, rt->pool) {
vlib_cli_output (vm, "%U", format_ip4_sv_reass, vm, reass);
}
- /* *INDENT-ON* */
}
sum_reass_n += rt->reass_n;
clib_spinlock_unlock (&rt->lock);
@@ -1409,13 +1450,11 @@ show_ip4_reass (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip4_sv_reass_cmd, static) = {
.path = "show ip4-sv-reassembly",
.short_help = "show ip4-sv-reassembly [details]",
.function = show_ip4_reass,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
vnet_api_error_t
@@ -1466,25 +1505,30 @@ format_ip4_sv_reass_handoff_trace (u8 * s, va_list * args)
}
always_inline uword
-ip4_sv_reass_handoff_node_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, bool is_feature)
+ip4_sv_reass_handoff_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_feature,
+ bool is_custom_context)
{
ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
- u32 n_enq, n_left_from, *from;
+ u32 n_enq, n_left_from, *from, *context;
u16 thread_indices[VLIB_FRAME_SIZE], *ti;
u32 fq_index;
from = vlib_frame_vector_args (frame);
+ if (is_custom_context)
+ context = vlib_frame_aux_args (frame);
+
n_left_from = frame->n_vectors;
vlib_get_buffers (vm, from, bufs, n_left_from);
b = bufs;
ti = thread_indices;
- fq_index = (is_feature) ? rm->fq_feature_index : rm->fq_index;
+ fq_index = (is_feature) ? rm->fq_feature_index :
+ (is_custom_context ? rm->fq_custom_context_index :
+ rm->fq_index);
while (n_left_from > 0)
{
@@ -1503,8 +1547,12 @@ ip4_sv_reass_handoff_node_inline (vlib_main_t * vm,
ti += 1;
b += 1;
}
- n_enq = vlib_buffer_enqueue_to_thread (vm, node, fq_index, from,
- thread_indices, frame->n_vectors, 1);
+ if (is_custom_context)
+ n_enq = vlib_buffer_enqueue_to_thread_with_aux (
+ vm, node, fq_index, from, context, thread_indices, frame->n_vectors, 1);
+ else
+ n_enq = vlib_buffer_enqueue_to_thread (
+ vm, node, fq_index, from, thread_indices, frame->n_vectors, 1);
if (n_enq < frame->n_vectors)
vlib_node_increment_counter (vm, node->node_index,
@@ -1517,12 +1565,11 @@ VLIB_NODE_FN (ip4_sv_reass_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_sv_reass_handoff_node_inline (vm, node, frame,
- false /* is_feature */ );
+ return ip4_sv_reass_handoff_node_inline (
+ vm, node, frame, false /* is_feature */, false /* is_custom_context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_handoff_node) = {
.name = "ip4-sv-reassembly-handoff",
.vector_size = sizeof (u32),
@@ -1536,22 +1583,39 @@ VLIB_REGISTER_NODE (ip4_sv_reass_handoff_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
+VLIB_NODE_FN (ip4_sv_reass_custom_context_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip4_sv_reass_handoff_node_inline (
+ vm, node, frame, false /* is_feature */, true /* is_custom_context */);
+}
+
+VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_handoff_node) = {
+ .name = "ip4-sv-reassembly-custom-context-handoff",
+ .vector_size = sizeof (u32),
+ .aux_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(ip4_sv_reass_handoff_error_strings),
+ .error_strings = ip4_sv_reass_handoff_error_strings,
+ .format_trace = format_ip4_sv_reass_handoff_trace,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
-/* *INDENT-OFF* */
VLIB_NODE_FN (ip4_sv_reass_feature_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t *
node,
vlib_frame_t * frame)
{
- return ip4_sv_reass_handoff_node_inline (vm, node, frame,
- true /* is_feature */ );
+ return ip4_sv_reass_handoff_node_inline (
+ vm, node, frame, true /* is_feature */, false /* is_custom_context */);
}
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_feature_handoff_node) = {
.name = "ip4-sv-reass-feature-hoff",
.vector_size = sizeof (u32),
@@ -1565,7 +1629,6 @@ VLIB_REGISTER_NODE (ip4_sv_reass_feature_handoff_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
int
@@ -1603,6 +1666,13 @@ ip4_sv_reass_custom_register_next_node (uword node_index)
node_index);
}
+uword
+ip4_sv_reass_custom_context_register_next_node (uword node_index)
+{
+ return vlib_node_add_next (
+ vlib_get_main (), ip4_sv_reass_custom_context_node.index, node_index);
+}
+
int
ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index,
int is_enable)
diff --git a/src/vnet/ip/reass/ip4_sv_reass.h b/src/vnet/ip/reass/ip4_sv_reass.h
index e926dbeebcc..3a684eb9809 100644
--- a/src/vnet/ip/reass/ip4_sv_reass.h
+++ b/src/vnet/ip/reass/ip4_sv_reass.h
@@ -49,6 +49,7 @@ int ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index,
int is_enable);
uword ip4_sv_reass_custom_register_next_node (uword node_index);
+uword ip4_sv_reass_custom_context_register_next_node (uword node_index);
#endif /* __included_ip4_sv_reass_h__ */
diff --git a/src/vnet/ip/reass/ip6_full_reass.c b/src/vnet/ip/reass/ip6_full_reass.c
index 97815572ee2..27647985877 100644
--- a/src/vnet/ip/reass/ip6_full_reass.c
+++ b/src/vnet/ip/reass/ip6_full_reass.c
@@ -705,8 +705,6 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_buffer_t *last_b = NULL;
u32 sub_chain_bi = reass->first_bi;
u32 total_length = 0;
- u32 buf_cnt = 0;
- u32 dropped_cnt = 0;
u32 *vec_drop_compress = NULL;
ip6_full_reass_rc_t rv = IP6_FULL_REASS_RC_OK;
do
@@ -748,7 +746,6 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_buffer_length_in_chain (vm, tmp) - trim_front - trim_end;
while (1)
{
- ++buf_cnt;
if (trim_front)
{
if (trim_front > tmp->current_length)
@@ -804,7 +801,6 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
goto free_buffers_and_return;
}
vec_add1 (vec_drop_compress, tmp_bi);
- ++dropped_cnt;
}
if (tmp->flags & VLIB_BUFFER_NEXT_PRESENT)
{
@@ -1283,15 +1279,17 @@ ip6_full_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
}
else
{
+ u32 fib_index =
+ (vnet_buffer (b0)->sw_if_index[VLIB_TX] == (u32) ~0) ?
+ vec_elt (ip6_main.fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]) :
+ vnet_buffer (b0)->sw_if_index[VLIB_TX];
kv.k.as_u64[0] = ip0->src_address.as_u64[0];
kv.k.as_u64[1] = ip0->src_address.as_u64[1];
kv.k.as_u64[2] = ip0->dst_address.as_u64[0];
kv.k.as_u64[3] = ip0->dst_address.as_u64[1];
kv.k.as_u64[4] =
- ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index,
- vnet_buffer (b0)->sw_if_index[VLIB_RX]))
- << 32 |
- (u64) frag_hdr->identification;
+ ((u64) fib_index) << 32 | (u64) frag_hdr->identification;
/* RFC 8200: The Next Header values in the Fragment headers of
* different fragments of the same original packet may differ.
* Only the value from the Offset zero fragment packet is used
@@ -2187,7 +2185,7 @@ ip6_full_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
"ip6-full-reassembly-feature",
sw_if_index, 0, 0, 0);
}
- return -1;
+ return 0;
}
void
diff --git a/src/vnet/ip/reass/ip6_sv_reass.c b/src/vnet/ip/reass/ip6_sv_reass.c
index c7f64ca3338..fe2ed05555c 100644
--- a/src/vnet/ip/reass/ip6_sv_reass.c
+++ b/src/vnet/ip/reass/ip6_sv_reass.c
@@ -150,6 +150,7 @@ typedef struct
/** Worker handoff */
u32 fq_index;
u32 fq_feature_index;
+ u32 fq_custom_context_index;
// reference count for enabling/disabling feature - per interface
u32 *feature_use_refcount_per_intf;
@@ -513,14 +514,18 @@ ip6_sv_reass_verify_packet_size_lt_64k (vlib_main_t * vm,
}
always_inline uword
-ip6_sv_reassembly_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, bool is_feature)
+ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_feature,
+ bool custom_next, bool custom_context)
{
u32 *from = vlib_frame_vector_args (frame);
- u32 n_left_from, n_left_to_next, *to_next, next_index;
+ u32 n_left_from, n_left_to_next, *to_next, *to_next_aux, next_index;
ip6_sv_reass_main_t *rm = &ip6_sv_reass_main;
ip6_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
+ u32 *context;
+ if (custom_context)
+ context = vlib_frame_aux_args (frame);
+
clib_spinlock_lock (&rt->lock);
n_left_from = frame->n_vectors;
@@ -528,7 +533,11 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
while (n_left_from > 0)
{
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ if (custom_context)
+ vlib_get_next_frame_with_aux_safe (vm, node, next_index, to_next,
+ to_next_aux, n_left_to_next);
+ else
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
while (n_left_from > 0 && n_left_to_next > 0)
{
@@ -536,7 +545,7 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
vlib_buffer_t *b0;
u32 next0 = IP6_SV_REASSEMBLY_NEXT_DROP;
u32 error0 = IP6_ERROR_NONE;
-
+ u8 forward_context = 0;
bi0 = from[0];
b0 = vlib_get_buffer (vm, bi0);
@@ -576,7 +585,8 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
goto packet_enqueue;
}
vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0;
- next0 = IP6_SV_REASSEMBLY_NEXT_INPUT;
+ next0 = custom_next ? vnet_buffer (b0)->ip.reass.next_index :
+ IP6_SV_REASSEMBLY_NEXT_INPUT;
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
ip6_sv_reass_add_trace (
@@ -615,10 +625,15 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
kv.k.as_u64[1] = ip0->src_address.as_u64[1];
kv.k.as_u64[2] = ip0->dst_address.as_u64[0];
kv.k.as_u64[3] = ip0->dst_address.as_u64[1];
- kv.k.as_u64[4] =
- ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index,
- vnet_buffer (b0)->sw_if_index[VLIB_RX])) << 32 |
- (u64) frag_hdr->identification;
+ if (custom_context)
+ kv.k.as_u64[4] =
+ (u64) *context << 32 | (u64) frag_hdr->identification;
+ else
+ kv.k.as_u64[4] =
+ ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]))
+ << 32 |
+ (u64) frag_hdr->identification;
kv.k.as_u64[5] = ip0->protocol;
ip6_sv_reass_t *reass =
@@ -629,6 +644,8 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
next0 = IP6_SV_REASSEMBLY_NEXT_HANDOFF;
vnet_buffer (b0)->ip.reass.owner_thread_index =
kv.v.thread_index;
+ if (custom_context)
+ forward_context = 1;
goto packet_enqueue;
}
@@ -653,7 +670,8 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
reass->tcp_seq_number;
vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
- next0 = IP6_SV_REASSEMBLY_NEXT_INPUT;
+ next0 = custom_next ? vnet_buffer (b0)->ip.reass.next_index :
+ IP6_SV_REASSEMBLY_NEXT_INPUT;
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
ip6_sv_reass_add_trace (
@@ -748,11 +766,25 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
b0 = vlib_get_buffer (vm, bi0);
vnet_feature_next (&next0, b0);
}
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
+ if (custom_context && forward_context)
+ {
+ if (to_next_aux)
+ {
+ to_next_aux[0] = *context;
+ to_next_aux += 1;
+ }
+ vlib_validate_buffer_enqueue_with_aux_x1 (
+ vm, node, next_index, to_next, to_next_aux, n_left_to_next,
+ bi0, *context, next0);
+ }
+ else
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
next_packet:
from += 1;
+ if (custom_context)
+ context += 1;
n_left_from -= 1;
}
@@ -767,10 +799,11 @@ VLIB_NODE_FN (ip6_sv_reass_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */ );
+ return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */,
+ false /* custom next */,
+ false /* custom context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_sv_reass_node) = {
.name = "ip6-sv-reassembly",
.vector_size = sizeof (u32),
@@ -786,16 +819,16 @@ VLIB_REGISTER_NODE (ip6_sv_reass_node) = {
[IP6_SV_REASSEMBLY_NEXT_HANDOFF] = "ip6-sv-reassembly-handoff",
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip6_sv_reass_node_feature) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_sv_reassembly_inline (vm, node, frame, true /* is_feature */ );
+ return ip6_sv_reassembly_inline (vm, node, frame, true /* is_feature */,
+ false /* custom next */,
+ false /* custom context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_sv_reass_node_feature) = {
.name = "ip6-sv-reassembly-feature",
.vector_size = sizeof (u32),
@@ -811,16 +844,38 @@ VLIB_REGISTER_NODE (ip6_sv_reass_node_feature) = {
[IP6_SV_REASSEMBLY_NEXT_HANDOFF] = "ip6-sv-reass-feature-hoff",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip6_sv_reassembly_feature) = {
.arc_name = "ip6-unicast",
.node_name = "ip6-sv-reassembly-feature",
.runs_before = VNET_FEATURES ("ip6-lookup"),
.runs_after = 0,
};
-/* *INDENT-ON* */
+
+VLIB_NODE_FN (ip6_sv_reass_custom_context_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */,
+ true /* custom next */,
+ true /* custom context */);
+}
+
+VLIB_REGISTER_NODE (ip6_sv_reass_custom_context_node) = {
+ .name = "ip6-sv-reassembly-custom-context",
+ .vector_size = sizeof (u32),
+ .aux_size = sizeof (u32),
+ .format_trace = format_ip6_sv_reass_trace,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
+ .n_next_nodes = IP6_SV_REASSEMBLY_N_NEXT,
+ .next_nodes =
+ {
+ [IP6_SV_REASSEMBLY_NEXT_INPUT] = "ip6-input",
+ [IP6_SV_REASSEMBLY_NEXT_DROP] = "ip6-drop",
+ [IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error",
+ [IP6_SV_REASSEMBLY_NEXT_HANDOFF] = "ip6-sv-reassembly-custom-context-handoff",
+ },
+};
#ifndef CLIB_MARCH_VARIANT
static u32
@@ -971,6 +1026,8 @@ ip6_sv_reass_init_function (vlib_main_t * vm)
rm->fq_index = vlib_frame_queue_main_init (ip6_sv_reass_node.index, 0);
rm->fq_feature_index =
vlib_frame_queue_main_init (ip6_sv_reass_node_feature.index, 0);
+ rm->fq_custom_context_index =
+ vlib_frame_queue_main_init (ip6_sv_reass_custom_context_node.index, 0);
rm->feature_use_refcount_per_intf = NULL;
@@ -1021,7 +1078,6 @@ ip6_sv_reass_walk_expired (vlib_main_t *vm,
clib_spinlock_lock (&rt->lock);
vec_reset_length (pool_indexes_to_free);
- /* *INDENT-OFF* */
pool_foreach_index (index, rt->pool) {
reass = pool_elt_at_index (rt->pool, index);
if (now > reass->last_heard + rm->timeout)
@@ -1029,15 +1085,12 @@ ip6_sv_reass_walk_expired (vlib_main_t *vm,
vec_add1 (pool_indexes_to_free, index);
}
}
- /* *INDENT-ON* */
int *i;
- /* *INDENT-OFF* */
vec_foreach (i, pool_indexes_to_free)
{
ip6_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
ip6_sv_reass_free (vm, rm, rt, reass);
}
- /* *INDENT-ON* */
clib_spinlock_unlock (&rt->lock);
}
@@ -1052,7 +1105,6 @@ ip6_sv_reass_walk_expired (vlib_main_t *vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_sv_reass_expire_node) = {
.function = ip6_sv_reass_walk_expired,
.format_trace = format_ip6_sv_reass_trace,
@@ -1062,7 +1114,6 @@ VLIB_REGISTER_NODE (ip6_sv_reass_expire_node) = {
.n_errors = IP6_N_ERROR,
.error_counters = ip6_error_counters,
};
-/* *INDENT-ON* */
static u8 *
format_ip6_sv_reass_key (u8 * s, va_list * args)
@@ -1128,11 +1179,9 @@ show_ip6_sv_reass (vlib_main_t * vm, unformat_input_t * input,
clib_spinlock_lock (&rt->lock);
if (details)
{
- /* *INDENT-OFF* */
pool_foreach (reass, rt->pool) {
vlib_cli_output (vm, "%U", format_ip6_sv_reass, vm, reass);
}
- /* *INDENT-ON* */
}
sum_reass_n += rt->reass_n;
clib_spinlock_unlock (&rt->lock);
@@ -1158,13 +1207,11 @@ show_ip6_sv_reass (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip6_sv_reassembly_cmd, static) = {
.path = "show ip6-sv-reassembly",
.short_help = "show ip6-sv-reassembly [details]",
.function = show_ip6_sv_reass,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
vnet_api_error_t
@@ -1214,25 +1261,29 @@ format_ip6_sv_reassembly_handoff_trace (u8 * s, va_list * args)
}
always_inline uword
-ip6_sv_reassembly_handoff_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, bool is_feature)
+ip6_sv_reassembly_handoff_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_feature,
+ bool custom_context)
{
ip6_sv_reass_main_t *rm = &ip6_sv_reass_main;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
- u32 n_enq, n_left_from, *from;
+ u32 n_enq, n_left_from, *from, *context;
u16 thread_indices[VLIB_FRAME_SIZE], *ti;
u32 fq_index;
from = vlib_frame_vector_args (frame);
+ if (custom_context)
+ context = vlib_frame_aux_args (frame);
n_left_from = frame->n_vectors;
vlib_get_buffers (vm, from, bufs, n_left_from);
b = bufs;
ti = thread_indices;
- fq_index = (is_feature) ? rm->fq_feature_index : rm->fq_index;
+ fq_index = (is_feature) ?
+ rm->fq_feature_index :
+ (custom_context ? rm->fq_custom_context_index : rm->fq_index);
while (n_left_from > 0)
{
@@ -1251,8 +1302,12 @@ ip6_sv_reassembly_handoff_inline (vlib_main_t * vm,
ti += 1;
b += 1;
}
- n_enq = vlib_buffer_enqueue_to_thread (vm, node, fq_index, from,
- thread_indices, frame->n_vectors, 1);
+ if (custom_context)
+ n_enq = vlib_buffer_enqueue_to_thread_with_aux (
+ vm, node, fq_index, from, context, thread_indices, frame->n_vectors, 1);
+ else
+ n_enq = vlib_buffer_enqueue_to_thread (
+ vm, node, fq_index, from, thread_indices, frame->n_vectors, 1);
if (n_enq < frame->n_vectors)
vlib_node_increment_counter (vm, node->node_index,
@@ -1265,11 +1320,10 @@ VLIB_NODE_FN (ip6_sv_reassembly_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_sv_reassembly_handoff_inline (vm, node, frame,
- false /* is_feature */ );
+ return ip6_sv_reassembly_handoff_inline (
+ vm, node, frame, false /* is_feature */, false /* custom_context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_sv_reassembly_handoff_node) = {
.name = "ip6-sv-reassembly-handoff",
.vector_size = sizeof (u32),
@@ -1288,11 +1342,11 @@ VLIB_REGISTER_NODE (ip6_sv_reassembly_handoff_node) = {
VLIB_NODE_FN (ip6_sv_reassembly_feature_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * frame)
{
- return ip6_sv_reassembly_handoff_inline (vm, node, frame, true /* is_feature */ );
+ return ip6_sv_reassembly_handoff_inline (
+ vm, node, frame, true /* is_feature */, false /* custom_context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_sv_reassembly_feature_handoff_node) = {
.name = "ip6-sv-reass-feature-hoff",
.vector_size = sizeof (u32),
@@ -1306,7 +1360,28 @@ VLIB_REGISTER_NODE (ip6_sv_reassembly_feature_handoff_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
+
+VLIB_NODE_FN (ip6_sv_reassembly_custom_context_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_sv_reassembly_handoff_inline (
+ vm, node, frame, false /* is_feature */, true /* custom_context */);
+}
+
+VLIB_REGISTER_NODE (ip6_sv_reassembly_custom_context_handoff_node) = {
+ .name = "ip6-sv-reassembly-custom-context-handoff",
+ .vector_size = sizeof (u32),
+ .aux_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(ip6_sv_reassembly_handoff_error_strings),
+ .error_strings = ip6_sv_reassembly_handoff_error_strings,
+ .format_trace = format_ip6_sv_reassembly_handoff_trace,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
#ifndef CLIB_MARCH_VARIANT
int
@@ -1335,6 +1410,14 @@ ip6_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
}
return 0;
}
+
+uword
+ip6_sv_reass_custom_context_register_next_node (uword node_index)
+{
+ return vlib_node_add_next (
+ vlib_get_main (), ip6_sv_reassembly_custom_context_handoff_node.index,
+ node_index);
+}
#endif
/*
diff --git a/src/vnet/ip/reass/ip6_sv_reass.h b/src/vnet/ip/reass/ip6_sv_reass.h
index 81ac2312bdf..7dc9df132dd 100644
--- a/src/vnet/ip/reass/ip6_sv_reass.h
+++ b/src/vnet/ip/reass/ip6_sv_reass.h
@@ -44,6 +44,7 @@ vnet_api_error_t ip6_sv_reass_enable_disable (u32 sw_if_index,
u8 enable_disable);
int ip6_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable);
+uword ip6_sv_reass_custom_context_register_next_node (uword node_index);
#endif /* __included_ip6_sv_reass_h */
diff --git a/src/vnet/ip/vtep.h b/src/vnet/ip/vtep.h
index 8b2c7fe723f..97e74429e88 100644
--- a/src/vnet/ip/vtep.h
+++ b/src/vnet/ip/vtep.h
@@ -29,7 +29,6 @@
* processing and go directly to the tunnel protocol handler node.
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED
(struct {
union {
@@ -40,7 +39,6 @@ typedef CLIB_PACKED
u64 as_u64;
};
}) vtep4_key_t;
-/* *INDENT-ON* */
/**
* @brief Tunnel endpoint key (IPv6)
@@ -51,13 +49,11 @@ typedef CLIB_PACKED
* processing and go directly to the tunnel protocol handler node.
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED
(struct {
ip6_address_t addr;
u32 fib_index;
}) vtep6_key_t;
-/* *INDENT-ON* */
typedef struct
{
diff --git a/src/vnet/ip6-nd/ip6_mld.c b/src/vnet/ip6-nd/ip6_mld.c
index ea70bcc5d19..74428ec93c3 100644
--- a/src/vnet/ip6-nd/ip6_mld.c
+++ b/src/vnet/ip6-nd/ip6_mld.c
@@ -33,7 +33,6 @@
* adjacency tables and neighbor discovery logic.
*/
-/* *INDENT-OFF*/
/* multicast listener report packet format for ethernet. */
typedef CLIB_PACKED (struct
{
@@ -51,7 +50,6 @@ typedef CLIB_PACKED (struct
ip6_header_t ip;
icmp6_multicast_listener_report_header_t report_hdr;
}) icmp6_multicast_listener_report_packet_t;
-/* *INDENT-ON*/
typedef struct
{
@@ -224,12 +222,10 @@ ip6_mld_delegate_disable (index_t imdi)
imd = pool_elt_at_index (ip6_mld_pool, imdi);
/* clean MLD pools */
- /* *INDENT-OFF* */
pool_flush (m, imd->mldp_group_pool,
({
mhash_unset (&imd->address_to_mldp_index, &m->mcast_address, 0);
}));
- /* *INDENT-ON* */
pool_free (imd->mldp_group_pool);
@@ -326,7 +322,6 @@ ip6_neighbor_send_mldpv2_report (u32 sw_if_index)
rh0->icmp.checksum = 0;
- /* *INDENT-OFF* */
pool_foreach (m, imd->mldp_group_pool)
{
rr.type = m->type;
@@ -345,7 +340,6 @@ ip6_neighbor_send_mldpv2_report (u32 sw_if_index)
payload_length += sizeof( icmp6_multicast_address_record_t);
}
- /* *INDENT-ON* */
rh0->rsvd = 0;
rh0->num_addr_records = clib_host_to_net_u16 (num_addr_records);
@@ -388,7 +382,6 @@ ip6_mld_timer_event (vlib_main_t * vm,
ip6_mld_t *imd;
/* Interface ip6 radv info list */
- /* *INDENT-OFF* */
pool_foreach (imd, ip6_mld_pool)
{
if (!vnet_sw_interface_is_admin_up (vnm, imd->sw_if_index))
@@ -405,7 +398,6 @@ ip6_mld_timer_event (vlib_main_t * vm,
imd->all_routers_mcast = 1;
}
}
- /* *INDENT-ON* */
return 0;
}
@@ -433,13 +425,11 @@ ip6_mld_event_process (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_mld_event_process_node) = {
.function = ip6_mld_event_process,
.name = "ip6-mld-process",
.type = VLIB_NODE_TYPE_PROCESS,
};
-/* *INDENT-ON* */
static u8 *
format_ip6_mld (u8 * s, va_list * args)
@@ -453,7 +443,6 @@ format_ip6_mld (u8 * s, va_list * args)
s = format (s, "%UJoined group address(es):\n", format_white_space, indent);
- /* *INDENT-OFF* */
pool_foreach (m, imd->mldp_group_pool)
{
s = format (s, "%U%U\n",
@@ -461,7 +450,6 @@ format_ip6_mld (u8 * s, va_list * args)
format_ip6_address,
&m->mcast_address);
}
- /* *INDENT-ON* */
return (s);
}
@@ -526,12 +514,10 @@ ip6_mld_init (vlib_main_t * vm)
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip6_mld_init) =
{
.runs_after = VLIB_INITS("icmp6_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip6-nd/ip6_nd.api b/src/vnet/ip6-nd/ip6_nd.api
index 0a519c16f7f..3ddf25103c1 100644
--- a/src/vnet/ip6-nd/ip6_nd.api
+++ b/src/vnet/ip6-nd/ip6_nd.api
@@ -20,7 +20,7 @@
called through a shared memory interface.
*/
-option version = "1.0.0";
+option version = "1.1.0";
import "vnet/ip/ip_types.api";
import "vnet/interface_types.api";
@@ -106,6 +106,134 @@ autoreply define sw_interface_ip6nd_ra_prefix
u32 pref_lifetime;
};
+/** \brief IPv6 Router Advertisements prefix entry
+ @param prefix - prefix to advertise
+ @param onlink_flag - if true, the prefix can be used for on-link
+ determination
+ @param autonomous_flag - if true, the prefix can be used for stateless
+ address configuration
+ @param val_lifetime - valid lifetime in seconds (0xffffffff represents
+ infinity)
+ @param pref_lifetime - preferred lifetime in seconds (0xffffffff represents
+ infinity)
+ @param valid_lifetime_expires - number of seconds in which valid lifetime
+ expires (zero means never, negative value
+ means expired this number of seconds ago)
+ @param pref_lifetime_expires - number of seconds in which preferred
+ lifetime expires (zero means never, negative
+ value means expired this number of seconds
+ ago)
+ @param decrement_lifetime_flag - if true, decrement valid lifetime and
+ preferred lifetime
+ @param no_advertise - if true, the prefix will not be advertised
+*/
+typedef ip6nd_ra_prefix
+{
+ vl_api_prefix_t prefix;
+ bool onlink_flag;
+ bool autonomous_flag;
+ u32 val_lifetime;
+ u32 pref_lifetime;
+ f64 valid_lifetime_expires;
+ f64 pref_lifetime_expires;
+ bool decrement_lifetime_flag;
+ bool no_advertise;
+};
+
+/** \brief Dump IPv6 Router Advertisements details on a per-interface basis
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface index to use as a filter (0xffffffff
+ represents all interfaces)
+*/
+define sw_interface_ip6nd_ra_dump
+{
+ option in_progress;
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ option vat_help = "[(<if-name>|sw_if_index <if-idx>)]";
+};
+
+/** \brief Details on IPv6 Router Advertisements for a single interface
+ @param context - returned sender context, to match reply w/ request
+ @param sw_if_index - interface index the details are belong to
+ @param cur_hop_limit - current hop limit
+ @param adv_managed_flag - if true, enable DHCP for address
+ @param adv_other_flag - if true, Enable DHCP for other information
+ @param adv_router_lifetime - lifetime associated with the default router in
+ seconds (zero indicates that the router is not
+ a default router)
+ @param adv_neighbor_reachable_time - number of milliseconds within which a
+ neighbor is assumed to be reachable
+ (zero means unspecified)
+ @param adv_retransmit_interval - number of milliseconds between
+ retransmitted Neighbor Solicitation
+ messages (zero means unspecified)
+ @param adv_link_mtu - MTU that all the nodes on a link use
+ @param send_radv - if true, send periodic Router Advertisements
+ @param cease_radv - if true, cease to send periodic Router Advertisements
+ @param send_unicast - if true, destination address of a Router
+ Advertisement message will use the source address of
+ the Router Solicitation message (when available).
+ Otherwise, multicast address will be used
+ @param adv_link_layer_address - if true, add link layer address option
+ @param max_radv_interval - maximum time in seconds allowed between sending
+ unsolicited multicast Router Advertisements
+ @param min_radv_interval - minimum time in seconds allowed between sending
+ unsolicited multicast Router Advertisements
+ @param last_radv_time - number of seconds since the last time a solicited
+ Router Advertisement message was sent (zero means
+ never)
+ @param last_multicast_time - number of seconds since the last time a
+ multicast Router Advertisements message was
+ sent (zero means never)
+ @param next_multicast_time - number of seconds within which next time a
+ multicast Router Advertisement message will be
+ sent (zero means never)
+ @param initial_adverts_count - number of initial Router Advertisement
+ messages to send
+ @param initial_adverts_interval - number of seconds between initial Router
+ Advertisement messages
+ @param initial_adverts_sent - if true, all initial Router Advertisement
+ messages were sent
+ @param n_advertisements_sent - number of Router Advertisements sent
+ @param n_solicitations_rcvd - number of Router Solicitations received
+ @param n_solicitations_dropped - number of Router Solicitations dropped
+ @param n_prefixes - number of prefix entries
+ @param prefixes - array of prefix entries
+*/
+define sw_interface_ip6nd_ra_details
+{
+ option in_progress;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ u8 cur_hop_limit;
+ bool adv_managed_flag;
+ bool adv_other_flag;
+ u16 adv_router_lifetime;
+ u32 adv_neighbor_reachable_time;
+ u32 adv_retransmit_interval;
+ u32 adv_link_mtu;
+ bool send_radv;
+ bool cease_radv;
+ bool send_unicast;
+ bool adv_link_layer_address;
+ f64 max_radv_interval;
+ f64 min_radv_interval;
+ f64 last_radv_time;
+ f64 last_multicast_time;
+ f64 next_multicast_time;
+ u32 initial_adverts_count;
+ f64 initial_adverts_interval;
+ bool initial_adverts_sent;
+ u32 n_advertisements_sent;
+ u32 n_solicitations_rcvd;
+ u32 n_solicitations_dropped;
+ u32 n_prefixes;
+ vl_api_ip6nd_ra_prefix_t prefixes[n_prefixes];
+};
+
/** \brief IPv6 ND (mirror) proxy
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/vnet/ip6-nd/ip6_nd.c b/src/vnet/ip6-nd/ip6_nd.c
index 513d2bf6e87..763aca290e6 100644
--- a/src/vnet/ip6-nd/ip6_nd.c
+++ b/src/vnet/ip6-nd/ip6_nd.c
@@ -149,7 +149,6 @@ icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm,
if (PREDICT_TRUE (error0 == ICMP6_ERROR_NONE && o0 != 0 &&
!ip6_sadd_unspecified))
{
- /* *INDENT-OFF* */
ip_neighbor_learn_t learn = {
.sw_if_index = sw_if_index0,
.ip = {
@@ -159,7 +158,6 @@ icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm,
h0->target_address),
}
};
- /* *INDENT-ON* */
memcpy (&learn.mac, o0->ethernet_address, sizeof (learn.mac));
ip_neighbor_learn_dp (&learn);
}
@@ -343,7 +341,6 @@ icmp6_neighbor_advertisement (vlib_main_t * vm,
0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_icmp_neighbor_solicitation_node,static) =
{
.function = icmp6_neighbor_solicitation,
@@ -374,7 +371,6 @@ VLIB_REGISTER_NODE (ip6_icmp_neighbor_advertisement_node,static) =
[0] = "ip6-punt",
},
};
-/* *INDENT-ON* */
static u8 *
format_ip6_nd (u8 * s, va_list * args)
@@ -427,12 +423,10 @@ ip6_nd_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip6_nd_init) =
{
.runs_after = VLIB_INITS("icmp6_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip6-nd/ip6_nd_api.c b/src/vnet/ip6-nd/ip6_nd_api.c
index 6520a61f691..5555d8fea64 100644
--- a/src/vnet/ip6-nd/ip6_nd_api.c
+++ b/src/vnet/ip6-nd/ip6_nd_api.c
@@ -95,13 +95,11 @@ vl_api_ip6nd_proxy_dump_t_handler (vl_api_ip6nd_proxy_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach_index (fib_index, im6->fibs)
{
fib_table_walk (fib_index, FIB_PROTOCOL_IP6,
api_ip6nd_proxy_fib_table_walk, &ctx);
}
- /* *INDENT-ON* */
vec_sort_with_function (ctx.indices, fib_entry_cmp_for_sort);
@@ -222,6 +220,175 @@ static void
}
static void
+ip6_radv_prefix_encode (f64 now, const ip6_radv_prefix_t *in,
+ vl_api_ip6nd_ra_prefix_t *out)
+{
+ fib_prefix_t in_ip6_pfx = {
+ .fp_addr = {
+ .ip6 = in->prefix,
+ },
+ .fp_len = in->prefix_len,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ };
+
+ ip_prefix_encode (&in_ip6_pfx, &out->prefix);
+
+ out->onlink_flag = in->adv_on_link_flag;
+ out->autonomous_flag = in->adv_autonomous_flag;
+ out->val_lifetime = htonl (in->adv_valid_lifetime_in_secs);
+ out->pref_lifetime = htonl (in->adv_pref_lifetime_in_secs);
+
+ if (in->adv_valid_lifetime_in_secs != ~0)
+ {
+ out->valid_lifetime_expires =
+ clib_host_to_net_f64 (in->valid_lifetime_expires - now);
+ }
+
+ if (in->adv_pref_lifetime_in_secs != ~0)
+ {
+ out->pref_lifetime_expires =
+ clib_host_to_net_f64 (in->pref_lifetime_expires - now);
+ }
+
+ out->decrement_lifetime_flag = in->decrement_lifetime_flag;
+ out->no_advertise = (in->enabled == 0);
+}
+
+static void
+send_sw_interface_ip6nd_ra_details (vl_api_registration_t *reg, u32 context,
+ ip6_ra_t *radv_info)
+{
+ vl_api_sw_interface_ip6nd_ra_details_t *rmp = 0;
+ vl_api_ip6nd_ra_prefix_t *api_radv_pfx;
+ u32 n_prefixes = pool_elts (radv_info->adv_prefixes_pool);
+ ip6_radv_prefix_t *radv_pfx;
+ u32 msg_size = sizeof (*rmp) + n_prefixes * sizeof (*api_radv_pfx);
+ vlib_main_t *vm = vlib_get_main ();
+ f64 now = vlib_time_now (vm);
+
+ rmp = vl_msg_api_alloc (msg_size);
+ if (!rmp)
+ return;
+ clib_memset (rmp, 0, msg_size);
+ rmp->_vl_msg_id =
+ ntohs (VL_API_SW_INTERFACE_IP6ND_RA_DETAILS + REPLY_MSG_ID_BASE);
+ rmp->context = context;
+
+ rmp->sw_if_index = htonl (radv_info->sw_if_index);
+ rmp->cur_hop_limit = radv_info->curr_hop_limit;
+ rmp->adv_managed_flag = radv_info->adv_managed_flag;
+ rmp->adv_other_flag = radv_info->adv_other_flag;
+ rmp->adv_router_lifetime = htons (radv_info->adv_router_lifetime_in_sec);
+ rmp->adv_neighbor_reachable_time =
+ htonl (radv_info->adv_neighbor_reachable_time_in_msec);
+ rmp->adv_retransmit_interval = htonl (
+ radv_info->adv_time_in_msec_between_retransmitted_neighbor_solicitations);
+ rmp->adv_link_mtu = htonl (radv_info->adv_link_mtu);
+ rmp->send_radv = radv_info->send_radv;
+ rmp->cease_radv = radv_info->cease_radv;
+ rmp->send_unicast = radv_info->send_unicast;
+ rmp->adv_link_layer_address = radv_info->adv_link_layer_address;
+ rmp->max_radv_interval = clib_host_to_net_f64 (radv_info->max_radv_interval);
+ rmp->min_radv_interval = clib_host_to_net_f64 (radv_info->min_radv_interval);
+
+ if (radv_info->last_radv_time > 0.0)
+ {
+ rmp->last_radv_time =
+ clib_host_to_net_f64 (now - radv_info->last_radv_time);
+ }
+
+ if ((radv_info->next_multicast_time - radv_info->last_multicast_time) > 0.0)
+ {
+ rmp->last_multicast_time =
+ clib_host_to_net_f64 (now - radv_info->last_multicast_time);
+ rmp->next_multicast_time =
+ clib_host_to_net_f64 (radv_info->next_multicast_time - now);
+ }
+
+ rmp->initial_adverts_count = htonl (radv_info->initial_adverts_count);
+ rmp->initial_adverts_interval =
+ clib_host_to_net_f64 (radv_info->initial_adverts_interval);
+ rmp->initial_adverts_sent = (radv_info->initial_adverts_sent == 0);
+ rmp->n_advertisements_sent = htonl (radv_info->n_advertisements_sent);
+ rmp->n_solicitations_rcvd = htonl (radv_info->n_solicitations_rcvd);
+ rmp->n_solicitations_dropped = htonl (radv_info->n_solicitations_dropped);
+ rmp->n_prefixes = htonl (n_prefixes);
+
+ api_radv_pfx = rmp->prefixes;
+ pool_foreach (radv_pfx, radv_info->adv_prefixes_pool)
+ {
+ ip6_radv_prefix_encode (now, radv_pfx, api_radv_pfx);
+
+ api_radv_pfx++;
+ }
+
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+typedef struct
+{
+ u32 *sw_if_indices;
+} api_dump_ip6_ra_itf_walk_ctx_t;
+
+static walk_rc_t
+api_dump_ip6_ra_itf_walk_fn (u32 sw_if_index, void *arg)
+{
+ api_dump_ip6_ra_itf_walk_ctx_t *ctx = arg;
+
+ vec_add1 (ctx->sw_if_indices, sw_if_index);
+
+ return (WALK_CONTINUE);
+}
+
+static void
+vl_api_sw_interface_ip6nd_ra_dump_t_handler (
+ vl_api_sw_interface_ip6nd_ra_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+ u32 sw_if_index;
+ ip6_ra_t *radv_info;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ if (sw_if_index == INDEX_INVALID)
+ {
+ /* dump all interfaces */
+
+ api_dump_ip6_ra_itf_walk_ctx_t ctx = {
+ .sw_if_indices = NULL,
+ };
+ u32 *sw_if_i;
+
+ ip6_ra_itf_walk (api_dump_ip6_ra_itf_walk_fn, &ctx);
+
+ vec_foreach (sw_if_i, ctx.sw_if_indices)
+ {
+ radv_info = ip6_ra_get_itf (*sw_if_i);
+ if (radv_info != NULL)
+ {
+ send_sw_interface_ip6nd_ra_details (reg, mp->context, radv_info);
+ }
+ }
+
+ vec_free (ctx.sw_if_indices);
+ }
+ else
+ {
+ /* dump a single interface */
+
+ radv_info = ip6_ra_get_itf (sw_if_index);
+ if (radv_info != NULL)
+ {
+ send_sw_interface_ip6nd_ra_details (reg, mp->context, radv_info);
+ }
+ }
+}
+
+static void
vl_api_ip6nd_send_router_solicitation_t_handler
(vl_api_ip6nd_send_router_solicitation_t * mp)
{
@@ -250,7 +417,6 @@ static void
static void
ip6_ra_handle_report (const ip6_ra_report_t * rap)
{
- /* *INDENT-OFF* */
vpe_client_registration_t *rp;
pool_foreach (rp, vpe_api_main.ip6_ra_events_registrations)
@@ -304,7 +470,6 @@ ip6_ra_handle_report (const ip6_ra_report_t * rap)
vl_api_send_msg (vl_reg, (u8 *) event);
}
}
- /* *INDENT-ON* */
}
static void
diff --git a/src/vnet/ip6-nd/ip6_nd_inline.h b/src/vnet/ip6-nd/ip6_nd_inline.h
index 5e8b9d6e4c0..c959c94ed1d 100644
--- a/src/vnet/ip6-nd/ip6_nd_inline.h
+++ b/src/vnet/ip6-nd/ip6_nd_inline.h
@@ -23,6 +23,7 @@
#include <vnet/ip/icmp46_packet.h>
#include <vnet/ip/ip6.h>
#include <vnet/ip-neighbor/ip_neighbor_types.h>
+#include <vnet/ip6-nd/ip6_ra.h>
typedef enum
{
@@ -71,6 +72,13 @@ icmp6_send_neighbor_advertisement (
clib_host_to_net_u32 (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED |
ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE);
+ /* if sending RAs is enabled, the "router" flag should be set,
+ * otherwise, neighbors may believe we have changed from a router
+ * to a host - RFC 4861 section 4.4 */
+ if (ip6_ra_adv_enabled (sw_if_index0))
+ icmp6_nsa->advertisement_flags |=
+ clib_host_to_net_u32 (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_ROUTER);
+
icmp6_nsa->icmp.checksum = 0;
icmp6_nsa->icmp.checksum =
ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6_h, &bogus_length);
diff --git a/src/vnet/ip6-nd/ip6_nd_proxy.c b/src/vnet/ip6-nd/ip6_nd_proxy.c
index 256b48581bb..f7f07cb59f6 100644
--- a/src/vnet/ip6-nd/ip6_nd_proxy.c
+++ b/src/vnet/ip6-nd/ip6_nd_proxy.c
@@ -23,7 +23,6 @@
static int
ip6_nd_proxy_add_del (u32 sw_if_index, const ip6_address_t * addr, u8 is_del)
{
- /* *INDENT-OFF* */
u32 fib_index;
fib_prefix_t pfx = {
.fp_len = 128,
@@ -35,7 +34,6 @@ ip6_nd_proxy_add_del (u32 sw_if_index, const ip6_address_t * addr, u8 is_del)
ip46_address_t nh = {
.ip6 = *addr,
};
- /* *INDENT-ON* */
fib_index = ip6_fib_table_get_index_for_sw_if_index (sw_if_index);
@@ -117,14 +115,12 @@ set_ip6_nd_proxy_cmd (vlib_main_t * vm,
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ip6_nd_proxy_command, static) =
{
.path = "set ip6 nd proxy",
.short_help = "set ip6 nd proxy <interface> [del] <host-ip>",
.function = set_ip6_nd_proxy_cmd,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip6-nd/ip6_nd_test.c b/src/vnet/ip6-nd/ip6_nd_test.c
index 933029d7593..488ca591ba0 100644
--- a/src/vnet/ip6-nd/ip6_nd_test.c
+++ b/src/vnet/ip6-nd/ip6_nd_test.c
@@ -325,6 +325,63 @@ api_ip6nd_proxy_enable_disable (vat_main_t *vam)
return -1;
}
+static int
+api_sw_interface_ip6nd_ra_dump (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_ip6nd_ra_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ u32 sw_if_index = ~0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %u", &sw_if_index))
+ ;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ /* Construct the API message */
+ M (SW_INTERFACE_IP6ND_RA_DUMP, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+
+ /* Send it */
+ S (mp);
+
+ /* Use control ping for synchronization */
+ PING (&ip6_nd_test_main, mp_ping);
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+
+ return ret;
+}
+
+static void
+vl_api_sw_interface_ip6nd_ra_details_t_handler (
+ vl_api_sw_interface_ip6nd_ra_details_t *mp)
+{
+ vat_main_t *vam = ip6_nd_test_main.vat_main;
+ u32 sw_if_index;
+ u8 send_radv;
+
+ /* Read the message */
+ sw_if_index = ntohl (mp->sw_if_index);
+ send_radv = mp->send_radv;
+
+ /* Print it */
+ print (vam->ofp, "sw_if_index: %u, send_radv: %s", sw_if_index,
+ (send_radv ? "on" : "off"));
+}
+
#include <ip6-nd/ip6_nd.api_test.c>
/*
diff --git a/src/vnet/ip6-nd/ip6_ra.c b/src/vnet/ip6-nd/ip6_ra.c
index 53f5a41418c..ffc02e813e2 100644
--- a/src/vnet/ip6-nd/ip6_ra.c
+++ b/src/vnet/ip6-nd/ip6_ra.c
@@ -30,7 +30,6 @@
* The files contains the API and CLI code for managing IPv6 RAs
*/
-/* *INDENT-OFF* */
/* Router solicitation packet format for ethernet. */
typedef CLIB_PACKED (struct
{
@@ -51,7 +50,6 @@ typedef CLIB_PACKED (struct
icmp6_neighbor_discovery_prefix_information_option_t
prefix[0];
}) icmp6_router_advertisement_packet_t;
-/* *INDENT-ON* */
#define DEF_MAX_RADV_INTERVAL 200
#define DEF_MIN_RADV_INTERVAL .75 * DEF_MAX_RADV_INTERVAL
@@ -65,95 +63,6 @@ typedef CLIB_PACKED (struct
#define MAX_DELAY_BETWEEN_RAS 1800 /* seconds */
#define MAX_RA_DELAY_TIME .5 /* seconds */
-/* advertised prefix option */
-typedef struct
-{
- /* basic advertised information */
- ip6_address_t prefix;
- u8 prefix_len;
- int adv_on_link_flag;
- int adv_autonomous_flag;
- u32 adv_valid_lifetime_in_secs;
- u32 adv_pref_lifetime_in_secs;
-
- /* advertised values are computed from these times if decrementing */
- f64 valid_lifetime_expires;
- f64 pref_lifetime_expires;
-
- /* local information */
- int enabled;
- int deprecated_prefix_flag;
- int decrement_lifetime_flag;
-
-#define MIN_ADV_VALID_LIFETIME 7203 /* seconds */
-#define DEF_ADV_VALID_LIFETIME 2592000
-#define DEF_ADV_PREF_LIFETIME 604800
-
- /* extensions are added here, mobile, DNS etc.. */
-} ip6_radv_prefix_t;
-
-typedef struct ip6_ra_t_
-{
- /* advertised config information, zero means unspecified */
- u8 curr_hop_limit;
- int adv_managed_flag;
- int adv_other_flag;
- u16 adv_router_lifetime_in_sec;
- u32 adv_neighbor_reachable_time_in_msec;
- u32 adv_time_in_msec_between_retransmitted_neighbor_solicitations;
-
- /* mtu option */
- u32 adv_link_mtu;
-
- /* local information */
- u32 sw_if_index;
- int send_radv; /* radv on/off on this interface - set by config */
- int cease_radv; /* we are ceasing to send - set byf config */
- int send_unicast;
- int adv_link_layer_address;
- int prefix_option;
- int failed_device_check;
- int ref_count;
-
- /* prefix option */
- ip6_radv_prefix_t *adv_prefixes_pool;
-
- /* Hash table mapping address to index in interface advertised prefix pool. */
- mhash_t address_to_prefix_index;
-
- f64 max_radv_interval;
- f64 min_radv_interval;
- f64 min_delay_between_radv;
- f64 max_delay_between_radv;
- f64 max_rtr_default_lifetime;
-
- f64 last_radv_time;
- f64 last_multicast_time;
- f64 next_multicast_time;
-
-
- u32 initial_adverts_count;
- f64 initial_adverts_interval;
- u32 initial_adverts_sent;
-
- /* stats */
- u32 n_advertisements_sent;
- u32 n_solicitations_rcvd;
- u32 n_solicitations_dropped;
-
- /* router solicitations sending state */
- u8 keep_sending_rs; /* when true then next fields are valid */
- icmp6_send_router_solicitation_params_t params;
- f64 sleep_interval;
- f64 due_time;
- u32 n_left;
- f64 start_time;
- vlib_buffer_t *buffer;
-
- u32 seed;
-
-} ip6_ra_t;
-
static ip6_link_delegate_id_t ip6_ra_delegate_id;
static ip6_ra_t *ip6_ra_pool;
@@ -191,7 +100,7 @@ ip6_ra_report_unregister (ip6_ra_report_notify_t fn)
}
}
-static inline ip6_ra_t *
+ip6_ra_t *
ip6_ra_get_itf (u32 sw_if_index)
{
index_t rai;
@@ -204,6 +113,28 @@ ip6_ra_get_itf (u32 sw_if_index)
return (NULL);
}
+u8
+ip6_ra_adv_enabled (u32 sw_if_index)
+{
+ ip6_ra_t *ra;
+
+ ra = ip6_ra_get_itf (sw_if_index);
+
+ return ((ra != NULL) && (ra->send_radv != 0));
+}
+
+void
+ip6_ra_itf_walk (ip6_ra_itf_walk_fn_t fn, void *ctx)
+{
+ ip6_ra_t *radv_info;
+
+ pool_foreach (radv_info, ip6_ra_pool)
+ {
+ if (WALK_STOP == fn (radv_info->sw_if_index, ctx))
+ break;
+ }
+}
+
/* for "syslogging" - use elog for now */
#define foreach_log_level \
_ (DEBUG, "DEBUG") \
@@ -372,7 +303,6 @@ icmp6_router_solicitation (vlib_main_t * vm,
if (PREDICT_TRUE (error0 == ICMP6_ERROR_NONE && o0 != 0 &&
!is_unspecified && !is_link_local))
{
- /* *INDENT-OFF* */
ip_neighbor_learn_t learn = {
.sw_if_index = sw_if_index0,
.ip = {
@@ -380,7 +310,6 @@ icmp6_router_solicitation (vlib_main_t * vm,
.version = AF_IP6,
},
};
- /* *INDENT-ON* */
memcpy (&learn.mac, o0->ethernet_address, sizeof (learn.mac));
ip_neighbor_learn_dp (&learn);
}
@@ -527,7 +456,6 @@ icmp6_router_solicitation (vlib_main_t * vm,
/* add advertised prefix options */
ip6_radv_prefix_t *pr_info;
- /* *INDENT-OFF* */
pool_foreach (pr_info, radv_info->adv_prefixes_pool)
{
if(pr_info->enabled &&
@@ -593,7 +521,6 @@ icmp6_router_solicitation (vlib_main_t * vm,
}
}
- /* *INDENT-ON* */
/* add additional options before here */
@@ -701,7 +628,6 @@ icmp6_router_solicitation (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_icmp_router_solicitation_node,static) =
{
.function = icmp6_router_solicitation,
@@ -718,7 +644,6 @@ VLIB_REGISTER_NODE (ip6_icmp_router_solicitation_node,static) =
[ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX] = "interface-output",
},
};
-/* *INDENT-ON* */
/* validate advertised info for consistancy (see RFC-4861 section 6.2.7) - log any inconsistencies, packet will always be dropped */
static_always_inline uword
@@ -1011,7 +936,6 @@ icmp6_router_advertisement (vlib_main_t * vm,
prefix->prefix.fp_proto = FIB_PROTOCOL_IP6;
/* look for matching prefix - if we our advertising it, it better be consistant */
- /* *INDENT-OFF* */
pool_foreach (pr_info, radv_info->adv_prefixes_pool)
{
@@ -1042,7 +966,6 @@ icmp6_router_advertisement (vlib_main_t * vm,
}
break;
}
- /* *INDENT-ON* */
break;
}
default:
@@ -1076,7 +999,6 @@ icmp6_router_advertisement (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_icmp_router_advertisement_node,static) =
{
.function = icmp6_router_advertisement,
@@ -1091,7 +1013,6 @@ VLIB_REGISTER_NODE (ip6_icmp_router_advertisement_node,static) =
[0] = "ip6-drop",
},
};
-/* *INDENT-ON* */
static inline f64
random_f64_from_to (f64 from, f64 to)
@@ -1281,14 +1202,12 @@ send_rs_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
do
{
due_time = current_time + 1e9;
- /* *INDENT-OFF* */
pool_foreach (radv_info, ip6_ra_pool)
{
if (check_send_rs (vm, radv_info, current_time, &dt)
&& (dt < due_time))
due_time = dt;
}
- /* *INDENT-ON* */
current_time = vlib_time_now (vm);
}
while (due_time < current_time);
@@ -1299,13 +1218,11 @@ send_rs_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_rs_process_node) = {
.function = send_rs_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "ip6-rs-process",
};
-/* *INDENT-ON* */
void
icmp6_send_router_solicitation (vlib_main_t * vm, u32 sw_if_index, u8 stop,
@@ -1413,12 +1330,10 @@ ip6_ra_delegate_disable (index_t rai)
radv_info = pool_elt_at_index (ip6_ra_pool, rai);
/* clean up prefix and MDP pools */
- /* *INDENT-OFF* */
pool_flush(p, radv_info->adv_prefixes_pool,
({
mhash_unset (&radv_info->address_to_prefix_index, &p->prefix, 0);
}));
- /* *INDENT-ON* */
pool_free (radv_info->adv_prefixes_pool);
@@ -1440,12 +1355,10 @@ ip6_ra_update_secondary_radv_info (ip6_address_t * address, u8 prefix_len,
ip6_address_mask_from_width (&mask, prefix_len);
vec_reset_length (radv_indices);
- /* *INDENT-OFF* */
pool_foreach (radv_info, ip6_ra_pool)
{
vec_add1 (radv_indices, radv_info - ip6_ra_pool);
}
- /* *INDENT-ON* */
/*
* If we have another customer for this prefix,
@@ -1460,7 +1373,6 @@ ip6_ra_update_secondary_radv_info (ip6_address_t * address, u8 prefix_len,
if (radv_info->sw_if_index == primary_sw_if_index)
continue;
- /* *INDENT-OFF* */
pool_foreach (this_prefix, radv_info->adv_prefixes_pool)
{
if (this_prefix->prefix_len == prefix_len
@@ -1483,7 +1395,6 @@ ip6_ra_update_secondary_radv_info (ip6_address_t * address, u8 prefix_len,
clib_warning ("ip6_neighbor_ra_prefix returned %d", rv);
}
}
- /* *INDENT-ON*/
}
}
@@ -1504,7 +1415,6 @@ ip6_ra_process_timer_event (vlib_main_t * vm,
f64 now = vlib_time_now (vm);
/* Interface ip6 radv info list */
- /* *INDENT-OFF* */
pool_foreach (radv_info, ip6_ra_pool)
{
if( !vnet_sw_interface_is_admin_up (vnm, radv_info->sw_if_index))
@@ -1594,7 +1504,6 @@ ip6_ra_process_timer_event (vlib_main_t * vm,
}
}
}
- /* *INDENT-ON* */
if (f)
{
@@ -1651,14 +1560,12 @@ ip6_ra_event_process (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_ra_process_node) =
{
.function = ip6_ra_event_process,
.name = "ip6-ra-process",
.type = VLIB_NODE_TYPE_PROCESS,
};
-/* *INDENT-ON* */
static void
ip6_ra_signal_report (ip6_ra_report_t * r)
@@ -1700,6 +1607,9 @@ ip6_ra_config (vlib_main_t * vm, u32 sw_if_index,
if (!radv_info)
return (VNET_API_ERROR_IP6_NOT_ENABLED);
+ /* Start off believing that we're going to send radv's */
+ radv_info->send_radv = 1;
+
if ((max_interval != 0) && (min_interval == 0))
min_interval = .75 * max_interval;
@@ -2117,14 +2027,12 @@ format_ip6_ra (u8 * s, va_list * args)
indent += 2;
- /* *INDENT-OFF* */
pool_foreach (p, radv_info->adv_prefixes_pool)
{
s = format (s, "%Uprefix %U, length %d\n",
format_white_space, indent+2,
format_ip6_address, &p->prefix, p->prefix_len);
}
- /* *INDENT-ON* */
s = format (s, "%UMTU is %d\n",
format_white_space, indent, radv_info->adv_link_mtu);
@@ -2300,14 +2208,12 @@ format_ip6_ra (u8 * s, va_list * args)
* Example of how to delete a prefix:
* @cliexcmd{ip6 nd GigabitEthernet2/0/0 no prefix fe80::fe:28ff:fe9c:75b3/64}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_nd_command, static) =
{
.path = "ip6 nd",
.short_help = "ip6 nd <interface> ...",
.function = ip6_ra_cmd,
};
-/* *INDENT-ON* */
/**
* VFT for registering as a delegate to an IP6 link
@@ -2333,12 +2239,10 @@ ip6_ra_init (vlib_main_t * vm)
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip6_ra_init) =
{
.runs_after = VLIB_INITS("icmp6_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip6-nd/ip6_ra.h b/src/vnet/ip6-nd/ip6_ra.h
index d09e8c0c975..958845b0a55 100644
--- a/src/vnet/ip6-nd/ip6_ra.h
+++ b/src/vnet/ip6-nd/ip6_ra.h
@@ -21,6 +21,105 @@
#include <vnet/fib/fib_types.h>
+/* advertised prefix option */
+typedef struct
+{
+ /* basic advertised information */
+ ip6_address_t prefix;
+ u8 prefix_len;
+ int adv_on_link_flag;
+ int adv_autonomous_flag;
+ u32 adv_valid_lifetime_in_secs;
+ u32 adv_pref_lifetime_in_secs;
+
+ /* advertised values are computed from these times if decrementing */
+ f64 valid_lifetime_expires;
+ f64 pref_lifetime_expires;
+
+ /* local information */
+ int enabled;
+ int deprecated_prefix_flag;
+ int decrement_lifetime_flag;
+
+#define MIN_ADV_VALID_LIFETIME 7203 /* seconds */
+#define DEF_ADV_VALID_LIFETIME 2592000
+#define DEF_ADV_PREF_LIFETIME 604800
+
+ /* extensions are added here, mobile, DNS etc.. */
+} ip6_radv_prefix_t;
+
+typedef struct
+{
+ u32 irt;
+ u32 mrt;
+ u32 mrc;
+ u32 mrd;
+} icmp6_send_router_solicitation_params_t;
+
+typedef struct ip6_ra_t_
+{
+ /* advertised config information, zero means unspecified */
+ u8 curr_hop_limit;
+ int adv_managed_flag;
+ int adv_other_flag;
+ u16 adv_router_lifetime_in_sec;
+ u32 adv_neighbor_reachable_time_in_msec;
+ u32 adv_time_in_msec_between_retransmitted_neighbor_solicitations;
+
+ /* mtu option */
+ u32 adv_link_mtu;
+
+ /* local information */
+ u32 sw_if_index;
+ int send_radv; /* radv on/off on this interface - set by config */
+ int cease_radv; /* we are ceasing to send - set byf config */
+ int send_unicast;
+ int adv_link_layer_address;
+ int prefix_option;
+ int failed_device_check;
+ int ref_count;
+
+ /* prefix option */
+ ip6_radv_prefix_t *adv_prefixes_pool;
+
+ /* Hash table mapping address to index in interface advertised prefix pool.
+ */
+ mhash_t address_to_prefix_index;
+
+ f64 max_radv_interval;
+ f64 min_radv_interval;
+ f64 min_delay_between_radv;
+ f64 max_delay_between_radv;
+ f64 max_rtr_default_lifetime;
+
+ f64 last_radv_time;
+ f64 last_multicast_time;
+ f64 next_multicast_time;
+
+ u32 initial_adverts_count;
+ f64 initial_adverts_interval;
+ u32 initial_adverts_sent;
+
+ /* stats */
+ u32 n_advertisements_sent;
+ u32 n_solicitations_rcvd;
+ u32 n_solicitations_dropped;
+
+ /* router solicitations sending state */
+ u8 keep_sending_rs; /* when true then next fields are valid */
+ icmp6_send_router_solicitation_params_t params;
+ f64 sleep_interval;
+ f64 due_time;
+ u32 n_left;
+ f64 start_time;
+ vlib_buffer_t *buffer;
+
+ u32 seed;
+
+} ip6_ra_t;
+
+extern ip6_ra_t *ip6_ra_get_itf (u32 sw_if_index);
+
extern int ip6_ra_config (vlib_main_t * vm, u32 sw_if_index,
u8 suppress, u8 managed, u8 other,
u8 ll_option, u8 send_unicast, u8 cease,
@@ -35,13 +134,9 @@ extern int ip6_ra_prefix (vlib_main_t * vm, u32 sw_if_index,
u8 off_link, u8 no_autoconfig,
u8 no_onlink, u8 is_no);
-typedef struct
-{
- u32 irt;
- u32 mrt;
- u32 mrc;
- u32 mrd;
-} icmp6_send_router_solicitation_params_t;
+typedef walk_rc_t (*ip6_ra_itf_walk_fn_t) (u32 sw_if_index, void *ctx);
+
+extern void ip6_ra_itf_walk (ip6_ra_itf_walk_fn_t fn, void *ctx);
extern void icmp6_send_router_solicitation (vlib_main_t * vm,
u32 sw_if_index,
@@ -82,7 +177,7 @@ extern void ip6_ra_update_secondary_radv_info (ip6_address_t * address,
u32 primary_sw_if_index,
u32 valid_time,
u32 preferred_time);
-
+extern u8 ip6_ra_adv_enabled (u32 sw_if_index);
#endif /* included_ip6_neighbor_h */
/*
diff --git a/src/vnet/ip6-nd/rd_cp.c b/src/vnet/ip6-nd/rd_cp.c
index 13fd90db288..5d419286051 100644
--- a/src/vnet/ip6-nd/rd_cp.c
+++ b/src/vnet/ip6-nd/rd_cp.c
@@ -72,8 +72,6 @@ enum
RD_CP_EVENT_INTERRUPT,
};
-#define vl_api_ip6_nd_address_autoconfig_t_print vl_noop_handler
-
static void
router_solicitation_start_stop (u32 sw_if_index, u8 start)
{
@@ -262,7 +260,6 @@ ip6_ra_report_handler (const ip6_ra_report_t * r)
{
router_lifetime_in_sec = r->router_lifetime_in_sec;
u8 route_already_present = 0;
- /* *INDENT-OFF* */
pool_foreach (default_route, rm->default_route_pool)
{
if (default_route->sw_if_index != sw_if_index)
@@ -276,7 +273,6 @@ ip6_ra_report_handler (const ip6_ra_report_t * r)
goto default_route_pool_foreach_out;
}
}
- /* *INDENT-ON* */
default_route_pool_foreach_out:
if (!route_already_present)
@@ -333,7 +329,6 @@ ip6_ra_report_handler (const ip6_ra_report_t * r)
continue;
u8 address_already_present = 0;
- /* *INDENT-OFF* */
pool_foreach (slaac_address, rm->slaac_address_pool)
{
if (slaac_address->sw_if_index != sw_if_index)
@@ -349,7 +344,6 @@ ip6_ra_report_handler (const ip6_ra_report_t * r)
goto slaac_address_pool_foreach_out;
}
}
- /* *INDENT-ON* */
slaac_address_pool_foreach_out:
if (address_already_present)
@@ -414,7 +408,6 @@ rd_cp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
* we do not use pool_foreach() to iterate over pool elements here
* as we are removing elements inside the loop body
*/
- /* *INDENT-OFF* */
pool_foreach_index (index, rm->slaac_address_pool)
{
slaac_address = pool_elt_at_index(rm->slaac_address_pool, index);
@@ -442,7 +435,6 @@ rd_cp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
else
remove_default_route (vm, default_route);
}
- /* *INDENT-ON* */
current_time = vlib_time_now (vm);
}
while (due_time < current_time);
@@ -453,13 +445,11 @@ rd_cp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (rd_cp_process_node) = {
.function = rd_cp_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "rd-cp-process",
};
-/* *INDENT-ON* */
static void
interrupt_process (void)
@@ -514,21 +504,17 @@ rd_cp_set_address_autoconfig (u32 sw_if_index,
if (if_config->enabled && !enable)
{
- /* *INDENT-OFF* */
pool_foreach (slaac_address, rm->slaac_address_pool)
{
remove_slaac_address (vm, slaac_address);
}
- /* *INDENT-ON* */
}
if (if_config->install_default_routes && !install_default_routes)
{
- /* *INDENT-OFF* */
pool_foreach (default_route, rm->default_route_pool)
{
remove_default_route (vm, default_route);
}
- /* *INDENT-ON* */
}
if_config->enabled = enable;
@@ -588,13 +574,11 @@ ip6_nd_address_autoconfig (vlib_main_t * vm,
* @cliexcmd{ip6 nd address autoconfig GigabitEthernet2/0/0 disable}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_nd_address_autoconfig_command, static) = {
.path = "ip6 nd address autoconfig",
.short_help = "ip6 nd address autoconfig <interface> [default-route|disable]",
.function = ip6_nd_address_autoconfig,
};
-/* *INDENT-ON* */
static clib_error_t *
rd_cp_init (vlib_main_t * vm)
diff --git a/src/vnet/ipfix-export/flow_report.c b/src/vnet/ipfix-export/flow_report.c
index de4c72c437f..4eb93520ed8 100644
--- a/src/vnet/ipfix-export/flow_report.c
+++ b/src/vnet/ipfix-export/flow_report.c
@@ -579,13 +579,11 @@ flow_report_process (vlib_main_t * vm,
return 0; /* not so much */
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (flow_report_process_node) = {
.function = flow_report_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "flow-report-process",
};
-/* *INDENT-ON* */
int
vnet_flow_report_add_del (ipfix_exporter_t *exp,
@@ -862,7 +860,6 @@ set_ipfix_exporter_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ipfix_exporter_command, static) = {
.path = "set ipfix exporter",
.short_help = "set ipfix exporter "
@@ -873,7 +870,6 @@ VLIB_CLI_COMMAND (set_ipfix_exporter_command, static) = {
"[udp-checksum]",
.function = set_ipfix_exporter_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -885,13 +881,11 @@ ipfix_flush_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipfix_flush_command, static) = {
.path = "ipfix flush",
.short_help = "flush the current ipfix data [for make test]",
.function = ipfix_flush_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
flow_report_init (vlib_main_t * vm)
diff --git a/src/vnet/ipfix-export/flow_report_classify.c b/src/vnet/ipfix-export/flow_report_classify.c
index ea6ba5cab58..9e1b99f252d 100644
--- a/src/vnet/ipfix-export/flow_report_classify.c
+++ b/src/vnet/ipfix-export/flow_report_classify.c
@@ -179,7 +179,6 @@ ipfix_classify_send_flows (flow_report_main_t *frm, ipfix_exporter_t *exp,
tcpudp_header_t *tcpudp;
udp_header_t *udp;
int field_index;
- u32 records_this_buffer;
u16 new_l0, old_l0;
ip_csum_t sum0;
vlib_main_t *vm = frm->vlib_main;
@@ -251,7 +250,6 @@ ipfix_classify_send_flows (flow_report_main_t *frm, ipfix_exporter_t *exp,
next_offset = (u32) (((u8 *) (s + 1)) - (u8 *) tp);
record_offset = next_offset;
- records_this_buffer = 0;
}
field_index = 0;
@@ -275,7 +273,6 @@ ipfix_classify_send_flows (flow_report_main_t *frm, ipfix_exporter_t *exp,
sizeof (packets));
next_offset += sizeof (packets);
}
- records_this_buffer++;
stream->sequence_number++;
/* Next record will have the same size as this record */
@@ -483,13 +480,11 @@ ipfix_classify_table_add_del_command_fn (vlib_main_t * vm,
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipfix_classify_table_add_del_command, static) = {
.path = "ipfix classify table",
.short_help = "ipfix classify table add|del <table-index>",
.function = ipfix_classify_table_add_del_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
set_ipfix_classify_stream_command_fn (vlib_main_t * vm,
@@ -526,14 +521,12 @@ set_ipfix_classify_stream_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ipfix_classify_stream_command, static) = {
.path = "set ipfix classify stream",
.short_help = "set ipfix classify stream"
"[domain <domain-id>] [src-port <src-port>]",
.function = set_ipfix_classify_stream_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
flow_report_classify_init (vlib_main_t * vm)
diff --git a/src/vnet/ipip/ipip.c b/src/vnet/ipip/ipip.c
index 600f5421125..aaf21468d1e 100644
--- a/src/vnet/ipip/ipip.c
+++ b/src/vnet/ipip/ipip.c
@@ -148,7 +148,14 @@ ipip64_fixup (vlib_main_t * vm, const ip_adjacency_t * adj, vlib_buffer_t * b,
ip4->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b));
tunnel_encap_fixup_6o4 (flags, ((ip6_header_t *) (ip4 + 1)), ip4);
- ip4->checksum = ip4_header_checksum (ip4);
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data;
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
+ }
+ else
+ ip4->checksum = ip4_header_checksum (ip4);
}
static void
@@ -164,7 +171,14 @@ ipip44_fixup (vlib_main_t * vm, const ip_adjacency_t * adj, vlib_buffer_t * b,
ip4->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b));
tunnel_encap_fixup_4o4 (flags, ip4 + 1, ip4);
- ip4->checksum = ip4_header_checksum (ip4);
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data;
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
+ }
+ else
+ ip4->checksum = ip4_header_checksum (ip4);
}
static void
@@ -185,6 +199,12 @@ ipip46_fixup (vlib_main_t * vm, const ip_adjacency_t * adj, vlib_buffer_t * b,
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) -
sizeof (*ip6));
tunnel_encap_fixup_4o6 (flags, b, ((ip4_header_t *) (ip6 + 1)), ip6);
+
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip6 - b->data;
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
+ }
}
static void
@@ -205,6 +225,12 @@ ipip66_fixup (vlib_main_t * vm,
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) -
sizeof (*ip6));
tunnel_encap_fixup_6o6 (flags, ip6 + 1, ip6);
+
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip6 - b->data;
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
+ }
}
static void
@@ -226,6 +252,12 @@ ipipm6_fixup (vlib_main_t *vm, const ip_adjacency_t *adj, vlib_buffer_t *b,
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) - sizeof (*ip6));
tunnel_encap_fixup_mplso6 (flags, b, (mpls_unicast_header_t *) (ip6 + 1),
ip6);
+
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip6 - b->data;
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
+ }
}
static void
@@ -245,7 +277,15 @@ ipipm4_fixup (vlib_main_t *vm, const ip_adjacency_t *adj, vlib_buffer_t *b,
ip4->length =
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) - sizeof (*ip4));
tunnel_encap_fixup_mplso4 (flags, (mpls_unicast_header_t *) (ip4 + 1), ip4);
- ip4->checksum = ip4_header_checksum (ip4);
+
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data;
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
+ }
+ else
+ ip4->checksum = ip4_header_checksum (ip4);
}
static void
@@ -269,7 +309,6 @@ ipip_tunnel_stack (adj_index_t ai)
}
else
{
- /* *INDENT-OFF* */
fib_prefix_t dst = {
.fp_len = t->transport == IPIP_TRANSPORT_IP6 ? 128 : 32,
.fp_proto = (t->transport == IPIP_TRANSPORT_IP6 ?
@@ -277,7 +316,6 @@ ipip_tunnel_stack (adj_index_t ai)
FIB_PROTOCOL_IP4),
.fp_addr = t->tunnel_dst
};
- /* *INDENT-ON* */
adj_midchain_delegate_stack (ai, t->fib_index, &dst);
}
@@ -512,7 +550,6 @@ ipip_tunnel_desc (u32 sw_if_index,
return (0);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS(ipip_device_class) = {
.name = "IPIP tunnel device",
.format_device_name = format_ipip_tunnel_name,
@@ -542,7 +579,6 @@ VNET_HW_INTERFACE_CLASS(mipip_hw_interface_class) = {
.update_adjacency = mipip_update_adj,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_NBMA,
};
-/* *INDENT-ON* */
ipip_tunnel_t *
ipip_tunnel_db_find (const ipip_tunnel_key_t * key)
diff --git a/src/vnet/ipip/ipip_api.c b/src/vnet/ipip/ipip_api.c
index 50b6731af44..2cb7bdf8dae 100644
--- a/src/vnet/ipip/ipip_api.c
+++ b/src/vnet/ipip/ipip_api.c
@@ -86,12 +86,10 @@ vl_api_ipip_add_tunnel_t_handler (vl_api_ipip_add_tunnel_t * mp)
}
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_IPIP_ADD_TUNNEL_REPLY,
({
rmp->sw_if_index = ntohl(sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -105,29 +103,45 @@ vl_api_ipip_del_tunnel_t_handler (vl_api_ipip_del_tunnel_t * mp)
REPLY_MACRO (VL_API_IPIP_DEL_TUNNEL_REPLY);
}
+static vl_api_tunnel_mode_t
+ipip_tunnel_mode_encode (ipip_mode_t mode)
+{
+ switch (mode)
+ {
+ case IPIP_MODE_P2P:
+ return TUNNEL_API_MODE_P2P;
+ case IPIP_MODE_P2MP:
+ return TUNNEL_API_MODE_MP;
+ case IPIP_MODE_6RD:
+ return TUNNEL_API_MODE_P2P;
+ default:
+ return TUNNEL_API_MODE_P2P;
+ }
+}
+
static void
send_ipip_tunnel_details (ipip_tunnel_t * t, vl_api_ipip_tunnel_dump_t * mp)
{
ipip_main_t *im = &ipip_main;
vl_api_ipip_tunnel_details_t *rmp;
bool is_ipv6 = t->transport == IPIP_TRANSPORT_IP6 ? true : false;
+ ip46_type_t ip_type = is_ipv6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4;
fib_table_t *ft;
- ft = fib_table_get (t->fib_index, (is_ipv6 ? FIB_PROTOCOL_IP6 :
- FIB_PROTOCOL_IP4));
-
- /* *INDENT-OFF* */
- REPLY_MACRO_DETAILS2(VL_API_IPIP_TUNNEL_DETAILS,
- ({
- ip_address_encode (&t->tunnel_src, IP46_TYPE_ANY, &rmp->tunnel.src);
- ip_address_encode (&t->tunnel_dst, IP46_TYPE_ANY, &rmp->tunnel.dst);
- rmp->tunnel.table_id = htonl (ft->ft_table_id);
- rmp->tunnel.instance = htonl (t->user_instance);
- rmp->tunnel.sw_if_index = htonl (t->sw_if_index);
- rmp->tunnel.dscp = ip_dscp_encode(t->dscp);
- rmp->tunnel.flags = tunnel_encap_decap_flags_encode(t->flags);
- }));
- /* *INDENT-ON* */
+ ft = fib_table_get (t->fib_index,
+ (is_ipv6 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4));
+
+ REPLY_MACRO_DETAILS2 (
+ VL_API_IPIP_TUNNEL_DETAILS, ({
+ ip_address_encode (&t->tunnel_src, ip_type, &rmp->tunnel.src);
+ ip_address_encode (&t->tunnel_dst, ip_type, &rmp->tunnel.dst);
+ rmp->tunnel.table_id = htonl (ft->ft_table_id);
+ rmp->tunnel.instance = htonl (t->user_instance);
+ rmp->tunnel.sw_if_index = htonl (t->sw_if_index);
+ rmp->tunnel.dscp = ip_dscp_encode (t->dscp);
+ rmp->tunnel.flags = tunnel_encap_decap_flags_encode (t->flags);
+ rmp->tunnel.mode = ipip_tunnel_mode_encode (t->mode);
+ }));
}
static void
@@ -141,12 +155,10 @@ vl_api_ipip_tunnel_dump_t_handler (vl_api_ipip_tunnel_dump_t * mp)
if (sw_if_index == ~0)
{
- /* *INDENT-OFF* */
pool_foreach (t, im->tunnels)
{
send_ipip_tunnel_details(t, mp);
}
- /* *INDENT-ON* */
}
else
{
@@ -185,12 +197,10 @@ vl_api_ipip_6rd_add_tunnel_t_handler (vl_api_ipip_6rd_add_tunnel_t * mp)
&sixrd_tunnel_index);
}
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IPIP_6RD_ADD_TUNNEL_REPLY,
({
rmp->sw_if_index = htonl (sixrd_tunnel_index);
}));
- /* *INDENT-ON* */
}
static void
diff --git a/src/vnet/ipip/ipip_cli.c b/src/vnet/ipip/ipip_cli.c
index 1a8e8896965..606a1f53f9a 100644
--- a/src/vnet/ipip/ipip_cli.c
+++ b/src/vnet/ipip/ipip_cli.c
@@ -197,7 +197,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(create_ipip_tunnel_command, static) = {
.path = "create ipip tunnel",
.short_help = "create ipip tunnel src <addr> dst <addr> [instance <n>] "
@@ -209,7 +208,6 @@ VLIB_CLI_COMMAND(delete_ipip_tunnel_command, static) = {
.short_help = "delete ipip tunnel sw_if_index <sw_if_index>",
.function = delete_ipip_tunnel_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_ipip_tunnel (u8 * s, va_list * args)
@@ -274,10 +272,8 @@ show_ipip_tunnel_command_fn (vlib_main_t * vm,
if (ti == ~0)
{
- /* *INDENT-OFF* */
pool_foreach (t, gm->tunnels)
{vlib_cli_output(vm, "%U", format_ipip_tunnel, t); }
- /* *INDENT-ON* */
}
else
{
@@ -290,12 +286,10 @@ show_ipip_tunnel_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(show_ipip_tunnel_command, static) = {
.path = "show ipip tunnel",
.function = show_ipip_tunnel_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_ipip_tunnel_key (u8 * s, va_list * args)
@@ -318,12 +312,10 @@ ipip_tunnel_hash_show (vlib_main_t * vm,
ipip_tunnel_key_t *key;
u32 index;
- /* *INDENT-OFF* */
hash_foreach(key, index, im->tunnel_by_key,
({
vlib_cli_output (vm, " %U -> %d", format_ipip_tunnel_key, key, index);
}));
- /* *INDENT-ON* */
return NULL;
}
@@ -331,14 +323,12 @@ ipip_tunnel_hash_show (vlib_main_t * vm,
/**
* show IPSEC tunnel protection hash tables
*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipip_tunnel_hash_show_node, static) =
{
.path = "show ipip tunnel-hash",
.function = ipip_tunnel_hash_show,
.short_help = "show ipip tunnel-hash",
};
-/* *INDENT-ON* */
static clib_error_t *
create_sixrd_tunnel_command_fn (vlib_main_t * vm,
@@ -464,7 +454,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(create_sixrd_tunnel_command, static) = {
.path = "create 6rd tunnel",
.short_help = "create 6rd tunnel ip6-pfx <ip6-pfx> ip4-pfx <ip4-pfx> "
@@ -477,7 +466,6 @@ VLIB_CLI_COMMAND(delete_sixrd_tunnel_command, static) = {
.short_help = "delete 6rd tunnel sw_if_index <sw_if_index>",
.function = delete_sixrd_tunnel_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ipip/node.c b/src/vnet/ipip/node.c
index b008a21a20f..a289cc885df 100644
--- a/src/vnet/ipip/node.c
+++ b/src/vnet/ipip/node.c
@@ -260,7 +260,6 @@ static char *ipip_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE(ipip4_input_node) = {
.name = "ipip4-input",
/* Takes a vector of packets. */
@@ -293,7 +292,6 @@ VLIB_REGISTER_NODE(ipip6_input_node) = {
.format_trace = format_ipip_rx_trace,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ipip/sixrd.c b/src/vnet/ipip/sixrd.c
index 3fb7b52dca6..6e0bfb042cc 100644
--- a/src/vnet/ipip/sixrd.c
+++ b/src/vnet/ipip/sixrd.c
@@ -250,7 +250,6 @@ sixrd_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
return /* no error */ 0;
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS(sixrd_hw_interface_class) = {
.name = "ip6ip-6rd",
.build_rewrite = sixrd_build_rewrite,
@@ -265,7 +264,6 @@ VNET_DEVICE_CLASS(sixrd_device_class) = {
#endif
}
;
-/* *INDENT-ON* */
int
sixrd_add_tunnel (ip6_address_t * ip6_prefix, u8 ip6_prefix_len,
@@ -341,7 +339,6 @@ sixrd_add_tunnel (ip6_address_t * ip6_prefix, u8 ip6_prefix_len,
ip6_sw_interface_enable_disable (t->sw_if_index, true);
/* Create IPv6 route/adjacency */
- /* *INDENT-OFF* */
fib_prefix_t pfx6 = {
.fp_proto = FIB_PROTOCOL_IP6,
.fp_len = t->sixrd.ip6_prefix_len,
@@ -349,7 +346,6 @@ sixrd_add_tunnel (ip6_address_t * ip6_prefix, u8 ip6_prefix_len,
.ip6 = t->sixrd.ip6_prefix,
},
};
- /* *INDENT-ON* */
fib_table_lock (ip6_fib_index, FIB_PROTOCOL_IP6, FIB_SOURCE_6RD);
fib_table_entry_update_one_path (ip6_fib_index, &pfx6, FIB_SOURCE_6RD,
@@ -386,7 +382,6 @@ sixrd_del_tunnel (u32 sw_if_index)
return -1;
}
- /* *INDENT-OFF* */
fib_prefix_t pfx6 = {
.fp_proto = FIB_PROTOCOL_IP6,
.fp_len = t->sixrd.ip6_prefix_len,
@@ -394,7 +389,6 @@ sixrd_del_tunnel (u32 sw_if_index)
.ip6 = t->sixrd.ip6_prefix,
},
};
- /* *INDENT-ON* */
fib_table_entry_path_remove (t->sixrd.ip6_fib_index, &pfx6,
FIB_SOURCE_6RD,
diff --git a/src/vnet/ipsec/ah.h b/src/vnet/ipsec/ah.h
index d0b4c21a4bc..450c9cfd6dc 100644
--- a/src/vnet/ipsec/ah.h
+++ b/src/vnet/ipsec/ah.h
@@ -17,6 +17,7 @@
#include <vnet/ip/ip.h>
#include <vnet/ipsec/ipsec.h>
+#include <vnet/ipsec/ipsec.api_enum.h>
typedef struct
{
@@ -29,19 +30,67 @@ typedef struct
} ah_header_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip4_header_t ip4;
ah_header_t ah;
}) ip4_and_ah_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip6_header_t ip6;
ah_header_t ah;
}) ip6_and_ah_header_t;
-/* *INDENT-ON* */
+
+always_inline u32
+ah_encrypt_err_to_sa_err (u32 err)
+{
+ switch (err)
+ {
+ case AH_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR:
+ return IPSEC_SA_ERROR_CRYPTO_ENGINE_ERROR;
+ case AH_ENCRYPT_ERROR_SEQ_CYCLED:
+ return IPSEC_SA_ERROR_SEQ_CYCLED;
+ }
+ return ~0;
+}
+
+always_inline u32
+ah_decrypt_err_to_sa_err (u32 err)
+{
+ switch (err)
+ {
+ case AH_DECRYPT_ERROR_DECRYPTION_FAILED:
+ return IPSEC_SA_ERROR_DECRYPTION_FAILED;
+ case AH_DECRYPT_ERROR_INTEG_ERROR:
+ return IPSEC_SA_ERROR_INTEG_ERROR;
+ case AH_DECRYPT_ERROR_NO_TAIL_SPACE:
+ return IPSEC_SA_ERROR_NO_TAIL_SPACE;
+ case AH_DECRYPT_ERROR_DROP_FRAGMENTS:
+ return IPSEC_SA_ERROR_DROP_FRAGMENTS;
+ case AH_DECRYPT_ERROR_REPLAY:
+ return IPSEC_SA_ERROR_REPLAY;
+ }
+ return ~0;
+}
+
+always_inline void
+ah_encrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node,
+ u32 thread_index, u32 err, u16 index, u16 *nexts,
+ u16 drop_next, u32 sa_index)
+{
+ ipsec_set_next_index (b, node, thread_index, err,
+ ah_encrypt_err_to_sa_err (err), index, nexts,
+ drop_next, sa_index);
+}
+
+always_inline void
+ah_decrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node,
+ u32 thread_index, u32 err, u16 index, u16 *nexts,
+ u16 drop_next, u32 sa_index)
+{
+ ipsec_set_next_index (b, node, thread_index, err,
+ ah_decrypt_err_to_sa_err (err), index, nexts,
+ drop_next, sa_index);
+}
always_inline u8
ah_calc_icv_padding_len (u8 icv_size, int is_ipv6)
diff --git a/src/vnet/ipsec/ah_decrypt.c b/src/vnet/ipsec/ah_decrypt.c
index c9209d6ceb0..918ebf03f67 100644
--- a/src/vnet/ipsec/ah_decrypt.c
+++ b/src/vnet/ipsec/ah_decrypt.c
@@ -23,7 +23,6 @@
#include <vnet/ipsec/esp.h>
#include <vnet/ipsec/ah.h>
#include <vnet/ipsec/ipsec_io.h>
-#include <vnet/ipsec/ipsec.api_enum.h>
#define foreach_ah_decrypt_next \
_(DROP, "error-drop") \
@@ -104,8 +103,9 @@ ah_process_ops (vlib_main_t * vm, vlib_node_runtime_t * node,
if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
{
u32 bi = op->user_data;
- b[bi]->error = node->errors[AH_DECRYPT_ERROR_INTEG_ERROR];
- nexts[bi] = AH_DECRYPT_NEXT_DROP;
+ ah_decrypt_set_next_index (
+ b[bi], node, vm->thread_index, AH_DECRYPT_ERROR_INTEG_ERROR, bi,
+ nexts, AH_DECRYPT_NEXT_DROP, vnet_buffer (b[bi])->ipsec.sad_index);
n_fail--;
}
op++;
@@ -128,6 +128,7 @@ ah_decrypt_inline (vlib_main_t * vm,
from = vlib_frame_vector_args (from_frame);
n_left = from_frame->n_vectors;
ipsec_sa_t *sa0 = 0;
+ bool anti_replay_result;
u32 current_sa_index = ~0, current_sa_bytes = 0, current_sa_pkts = 0;
clib_memset (pkt_data, 0, VLIB_FRAME_SIZE * sizeof (pkt_data[0]));
@@ -145,8 +146,7 @@ ah_decrypt_inline (vlib_main_t * vm,
{
if (current_sa_index != ~0)
vlib_increment_combined_counter (&ipsec_sa_counters, thread_index,
- current_sa_index,
- current_sa_pkts,
+ current_sa_index, current_sa_pkts,
current_sa_bytes);
current_sa_index = vnet_buffer (b[0])->ipsec.sad_index;
sa0 = ipsec_sa_get (current_sa_index);
@@ -156,7 +156,7 @@ ah_decrypt_inline (vlib_main_t * vm,
thread_index, current_sa_index);
}
- if (PREDICT_FALSE (~0 == sa0->thread_index))
+ if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index))
{
/* this is the first packet to use this SA, claim the SA
* for this thread. this could happen simultaneously on
@@ -190,8 +190,9 @@ ah_decrypt_inline (vlib_main_t * vm,
{
if (ip4_is_fragment (ih4))
{
- b[0]->error = node->errors[AH_DECRYPT_ERROR_DROP_FRAGMENTS];
- next[0] = AH_DECRYPT_NEXT_DROP;
+ ah_decrypt_set_next_index (
+ b[0], node, vm->thread_index, AH_DECRYPT_ERROR_DROP_FRAGMENTS,
+ 0, next, AH_DECRYPT_NEXT_DROP, current_sa_index);
goto next;
}
pd->ip_hdr_size = ip4_header_bytes (ih4);
@@ -201,11 +202,21 @@ ah_decrypt_inline (vlib_main_t * vm,
pd->seq = clib_host_to_net_u32 (ah0->seq_no);
/* anti-replay check */
- if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, ~0, false,
- &pd->seq_hi))
+ if (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa0)))
{
- b[0]->error = node->errors[AH_DECRYPT_ERROR_REPLAY];
- next[0] = AH_DECRYPT_NEXT_DROP;
+ anti_replay_result = ipsec_sa_anti_replay_and_sn_advance (
+ sa0, pd->seq, ~0, false, &pd->seq_hi, true);
+ }
+ else
+ {
+ anti_replay_result = ipsec_sa_anti_replay_and_sn_advance (
+ sa0, pd->seq, ~0, false, &pd->seq_hi, false);
+ }
+ if (anti_replay_result)
+ {
+ ah_decrypt_set_next_index (b[0], node, vm->thread_index,
+ AH_DECRYPT_ERROR_REPLAY, 0, next,
+ AH_DECRYPT_NEXT_DROP, current_sa_index);
goto next;
}
@@ -220,8 +231,9 @@ ah_decrypt_inline (vlib_main_t * vm,
pd->current_data + b[0]->current_length
+ sizeof (u32) > buffer_data_size))
{
- b[0]->error = node->errors[AH_DECRYPT_ERROR_NO_TAIL_SPACE];
- next[0] = AH_DECRYPT_NEXT_DROP;
+ ah_decrypt_set_next_index (
+ b[0], node, vm->thread_index, AH_DECRYPT_ERROR_NO_TAIL_SPACE,
+ 0, next, AH_DECRYPT_NEXT_DROP, current_sa_index);
goto next;
}
@@ -304,23 +316,43 @@ ah_decrypt_inline (vlib_main_t * vm,
if (PREDICT_TRUE (sa0->integ_alg != IPSEC_INTEG_ALG_NONE))
{
/* redo the anti-reply check. see esp_decrypt for details */
- if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, pd->seq_hi,
- true, NULL))
+ if (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa0)))
{
- b[0]->error = node->errors[AH_DECRYPT_ERROR_REPLAY];
- next[0] = AH_DECRYPT_NEXT_DROP;
- goto trace;
+ if (ipsec_sa_anti_replay_and_sn_advance (
+ sa0, pd->seq, pd->seq_hi, true, NULL, true))
+ {
+ ah_decrypt_set_next_index (
+ b[0], node, vm->thread_index, AH_DECRYPT_ERROR_REPLAY, 0,
+ next, AH_DECRYPT_NEXT_DROP, pd->sa_index);
+ goto trace;
+ }
+ n_lost = ipsec_sa_anti_replay_advance (
+ sa0, thread_index, pd->seq, pd->seq_hi, true);
+ }
+ else
+ {
+ if (ipsec_sa_anti_replay_and_sn_advance (
+ sa0, pd->seq, pd->seq_hi, true, NULL, false))
+ {
+ ah_decrypt_set_next_index (
+ b[0], node, vm->thread_index, AH_DECRYPT_ERROR_REPLAY, 0,
+ next, AH_DECRYPT_NEXT_DROP, pd->sa_index);
+ goto trace;
+ }
+ n_lost = ipsec_sa_anti_replay_advance (
+ sa0, thread_index, pd->seq, pd->seq_hi, false);
}
- n_lost = ipsec_sa_anti_replay_advance (sa0, thread_index, pd->seq,
- pd->seq_hi);
- vlib_prefetch_simple_counter (&ipsec_sa_lost_counters, thread_index,
- pd->sa_index);
+ vlib_prefetch_simple_counter (
+ &ipsec_sa_err_counters[IPSEC_SA_ERROR_LOST], thread_index,
+ pd->sa_index);
}
u16 ah_hdr_len = sizeof (ah_header_t) + pd->icv_size
+ pd->icv_padding_len;
vlib_buffer_advance (b[0], pd->ip_hdr_size + ah_hdr_len);
b[0]->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ b[0]->flags &= ~(VNET_BUFFER_F_L4_CHECKSUM_COMPUTED |
+ VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
if (PREDICT_TRUE (ipsec_sa_is_set_IS_TUNNEL (sa0)))
{ /* tunnel mode */
@@ -330,8 +362,10 @@ ah_decrypt_inline (vlib_main_t * vm,
next[0] = AH_DECRYPT_NEXT_IP6_INPUT;
else
{
- b[0]->error = node->errors[AH_DECRYPT_ERROR_DECRYPTION_FAILED];
- next[0] = AH_DECRYPT_NEXT_DROP;
+ ah_decrypt_set_next_index (b[0], node, vm->thread_index,
+ AH_DECRYPT_ERROR_DECRYPTION_FAILED, 0,
+ next, AH_DECRYPT_NEXT_DROP,
+ pd->sa_index);
goto trace;
}
}
@@ -382,8 +416,9 @@ ah_decrypt_inline (vlib_main_t * vm,
}
if (PREDICT_FALSE (n_lost))
- vlib_increment_simple_counter (&ipsec_sa_lost_counters, thread_index,
- pd->sa_index, n_lost);
+ vlib_increment_simple_counter (
+ &ipsec_sa_err_counters[IPSEC_SA_ERROR_LOST], thread_index,
+ pd->sa_index, n_lost);
vnet_buffer (b[0])->sw_if_index[VLIB_TX] = (u32) ~ 0;
trace:
@@ -415,7 +450,6 @@ VLIB_NODE_FN (ah4_decrypt_node) (vlib_main_t * vm,
return ah_decrypt_inline (vm, node, from_frame, 0 /* is_ip6 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ah4_decrypt_node) = {
.name = "ah4-decrypt",
.vector_size = sizeof (u32),
@@ -433,7 +467,6 @@ VLIB_REGISTER_NODE (ah4_decrypt_node) = {
[AH_DECRYPT_NEXT_HANDOFF] = "ah4-decrypt-handoff",
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ah6_decrypt_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -442,7 +475,6 @@ VLIB_NODE_FN (ah6_decrypt_node) (vlib_main_t * vm,
return ah_decrypt_inline (vm, node, from_frame, 1 /* is_ip6 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ah6_decrypt_node) = {
.name = "ah6-decrypt",
.vector_size = sizeof (u32),
@@ -460,7 +492,6 @@ VLIB_REGISTER_NODE (ah6_decrypt_node) = {
[AH_DECRYPT_NEXT_HANDOFF] = "ah6-decrypt-handoff",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
diff --git a/src/vnet/ipsec/ah_encrypt.c b/src/vnet/ipsec/ah_encrypt.c
index 7116a160926..960327f071d 100644
--- a/src/vnet/ipsec/ah_encrypt.c
+++ b/src/vnet/ipsec/ah_encrypt.c
@@ -81,8 +81,10 @@ ah_process_ops (vlib_main_t * vm, vlib_node_runtime_t * node,
if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
{
u32 bi = op->user_data;
- b[bi]->error = node->errors[AH_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR];
- nexts[bi] = AH_ENCRYPT_NEXT_DROP;
+ ah_encrypt_set_next_index (b[bi], node, vm->thread_index,
+ AH_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR, bi,
+ nexts, AH_ENCRYPT_NEXT_DROP,
+ vnet_buffer (b[bi])->ipsec.sad_index);
n_fail--;
}
op++;
@@ -153,19 +155,20 @@ ah_encrypt_inline (vlib_main_t * vm,
{
if (current_sa_index != ~0)
vlib_increment_combined_counter (&ipsec_sa_counters, thread_index,
- current_sa_index,
- current_sa_pkts,
+ current_sa_index, current_sa_pkts,
current_sa_bytes);
current_sa_index = vnet_buffer (b[0])->ipsec.sad_index;
sa0 = ipsec_sa_get (current_sa_index);
current_sa_bytes = current_sa_pkts = 0;
+ vlib_prefetch_combined_counter (&ipsec_sa_counters, thread_index,
+ current_sa_index);
}
pd->sa_index = current_sa_index;
next[0] = AH_ENCRYPT_NEXT_DROP;
- if (PREDICT_FALSE (~0 == sa0->thread_index))
+ if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index))
{
/* this is the first packet to use this SA, claim the SA
* for this thread. this could happen simultaneously on
@@ -183,7 +186,9 @@ ah_encrypt_inline (vlib_main_t * vm,
if (PREDICT_FALSE (esp_seq_advance (sa0)))
{
- b[0]->error = node->errors[AH_ENCRYPT_ERROR_SEQ_CYCLED];
+ ah_encrypt_set_next_index (b[0], node, vm->thread_index,
+ AH_ENCRYPT_ERROR_SEQ_CYCLED, 0, next,
+ AH_ENCRYPT_NEXT_DROP, current_sa_index);
pd->skip = 1;
goto next;
}
@@ -437,7 +442,6 @@ VLIB_NODE_FN (ah4_encrypt_node) (vlib_main_t * vm,
return ah_encrypt_inline (vm, node, from_frame, 0 /* is_ip6 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ah4_encrypt_node) = {
.name = "ah4-encrypt",
.vector_size = sizeof (u32),
@@ -454,7 +458,6 @@ VLIB_REGISTER_NODE (ah4_encrypt_node) = {
[AH_ENCRYPT_NEXT_INTERFACE_OUTPUT] = "interface-output",
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ah6_encrypt_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -463,7 +466,6 @@ VLIB_NODE_FN (ah6_encrypt_node) (vlib_main_t * vm,
return ah_encrypt_inline (vm, node, from_frame, 1 /* is_ip6 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ah6_encrypt_node) = {
.name = "ah6-encrypt",
.vector_size = sizeof (u32),
@@ -480,7 +482,6 @@ VLIB_REGISTER_NODE (ah6_encrypt_node) = {
[AH_ENCRYPT_NEXT_INTERFACE_OUTPUT] = "interface-output",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
diff --git a/src/vnet/ipsec/esp.h b/src/vnet/ipsec/esp.h
index 8d7e0563a59..1c3ce776ad2 100644
--- a/src/vnet/ipsec/esp.h
+++ b/src/vnet/ipsec/esp.h
@@ -18,6 +18,7 @@
#include <vnet/ip/ip.h>
#include <vnet/crypto/crypto.h>
#include <vnet/ipsec/ipsec.h>
+#include <vnet/ipsec/ipsec.api_enum.h>
typedef struct
{
@@ -36,27 +37,21 @@ typedef struct
u8 next_header;
} esp_footer_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip4_header_t ip4;
esp_header_t esp;
}) ip4_and_esp_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip4_header_t ip4;
udp_header_t udp;
esp_header_t esp;
}) ip4_and_udp_and_esp_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip6_header_t ip6;
esp_header_t esp;
}) ip6_and_esp_header_t;
-/* *INDENT-ON* */
/**
* AES counter mode nonce
@@ -85,9 +80,6 @@ typedef struct esp_aead_t_
} __clib_packed esp_aead_t;
#define ESP_SEQ_MAX (4294967295UL)
-#define ESP_MAX_BLOCK_SIZE (16)
-#define ESP_MAX_IV_SIZE (16)
-#define ESP_MAX_ICV_SIZE (32)
u8 *format_esp_header (u8 * s, va_list * args);
@@ -141,38 +133,76 @@ esp_aad_fill (u8 *data, const esp_header_t *esp, const ipsec_sa_t *sa,
}
}
-/* Special case to drop or hand off packets for sync/async modes.
- *
- * Different than sync mode, async mode only enqueue drop or hand-off packets
- * to next nodes.
- */
-always_inline void
-esp_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node, u32 err,
- u16 index, u16 *nexts, u16 drop_next)
+always_inline u32
+esp_encrypt_err_to_sa_err (u32 err)
{
- nexts[index] = drop_next;
- b->error = node->errors[err];
+ switch (err)
+ {
+ case ESP_ENCRYPT_ERROR_HANDOFF:
+ return IPSEC_SA_ERROR_HANDOFF;
+ case ESP_ENCRYPT_ERROR_SEQ_CYCLED:
+ return IPSEC_SA_ERROR_SEQ_CYCLED;
+ case ESP_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR:
+ return IPSEC_SA_ERROR_CRYPTO_ENGINE_ERROR;
+ case ESP_ENCRYPT_ERROR_CRYPTO_QUEUE_FULL:
+ return IPSEC_SA_ERROR_CRYPTO_QUEUE_FULL;
+ case ESP_ENCRYPT_ERROR_NO_BUFFERS:
+ return IPSEC_SA_ERROR_NO_BUFFERS;
+ case ESP_ENCRYPT_ERROR_NO_ENCRYPTION:
+ return IPSEC_SA_ERROR_NO_ENCRYPTION;
+ }
+ return ~0;
}
-/* when submitting a frame is failed, drop all buffers in the frame */
always_inline u32
-esp_async_recycle_failed_submit (vlib_main_t *vm, vnet_crypto_async_frame_t *f,
- vlib_node_runtime_t *node, u32 err, u16 index,
- u32 *from, u16 *nexts, u16 drop_next_index)
+esp_decrypt_err_to_sa_err (u32 err)
{
- u32 n_drop = f->n_elts;
- u32 *bi = f->buffer_indices;
-
- while (n_drop--)
+ switch (err)
{
- from[index] = bi[0];
- esp_set_next_index (vlib_get_buffer (vm, bi[0]), node, err, index, nexts,
- drop_next_index);
- bi++;
- index++;
+ case ESP_DECRYPT_ERROR_HANDOFF:
+ return IPSEC_SA_ERROR_HANDOFF;
+ case ESP_DECRYPT_ERROR_DECRYPTION_FAILED:
+ return IPSEC_SA_ERROR_DECRYPTION_FAILED;
+ case ESP_DECRYPT_ERROR_INTEG_ERROR:
+ return IPSEC_SA_ERROR_INTEG_ERROR;
+ case ESP_DECRYPT_ERROR_CRYPTO_ENGINE_ERROR:
+ return IPSEC_SA_ERROR_CRYPTO_ENGINE_ERROR;
+ case ESP_DECRYPT_ERROR_REPLAY:
+ return IPSEC_SA_ERROR_REPLAY;
+ case ESP_DECRYPT_ERROR_RUNT:
+ return IPSEC_SA_ERROR_RUNT;
+ case ESP_DECRYPT_ERROR_NO_BUFFERS:
+ return IPSEC_SA_ERROR_NO_BUFFERS;
+ case ESP_DECRYPT_ERROR_OVERSIZED_HEADER:
+ return IPSEC_SA_ERROR_OVERSIZED_HEADER;
+ case ESP_DECRYPT_ERROR_NO_TAIL_SPACE:
+ return IPSEC_SA_ERROR_NO_TAIL_SPACE;
+ case ESP_DECRYPT_ERROR_TUN_NO_PROTO:
+ return IPSEC_SA_ERROR_TUN_NO_PROTO;
+ case ESP_DECRYPT_ERROR_UNSUP_PAYLOAD:
+ return IPSEC_SA_ERROR_UNSUP_PAYLOAD;
}
+ return ~0;
+}
- return (f->n_elts);
+always_inline void
+esp_encrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node,
+ u32 thread_index, u32 err, u16 index, u16 *nexts,
+ u16 drop_next, u32 sa_index)
+{
+ ipsec_set_next_index (b, node, thread_index, err,
+ esp_encrypt_err_to_sa_err (err), index, nexts,
+ drop_next, sa_index);
+}
+
+always_inline void
+esp_decrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node,
+ u32 thread_index, u32 err, u16 index, u16 *nexts,
+ u16 drop_next, u32 sa_index)
+{
+ ipsec_set_next_index (b, node, thread_index, err,
+ esp_decrypt_err_to_sa_err (err), index, nexts,
+ drop_next, sa_index);
}
/**
@@ -249,6 +279,43 @@ typedef struct
extern esp_async_post_next_t esp_encrypt_async_next;
extern esp_async_post_next_t esp_decrypt_async_next;
+/* when submitting a frame is failed, drop all buffers in the frame */
+always_inline u32
+esp_async_recycle_failed_submit (vlib_main_t *vm, vnet_crypto_async_frame_t *f,
+ vlib_node_runtime_t *node, u32 err,
+ u32 ipsec_sa_err, u16 index, u32 *from,
+ u16 *nexts, u16 drop_next_index,
+ bool is_encrypt)
+{
+ vlib_buffer_t *b;
+ u32 n_drop = f->n_elts;
+ u32 *bi = f->buffer_indices;
+
+ while (n_drop--)
+ {
+ u32 sa_index;
+
+ from[index] = bi[0];
+ b = vlib_get_buffer (vm, bi[0]);
+
+ if (is_encrypt)
+ {
+ sa_index = vnet_buffer (b)->ipsec.sad_index;
+ }
+ else
+ {
+ sa_index = esp_post_data (b)->decrypt_data.sa_index;
+ }
+
+ ipsec_set_next_index (b, node, vm->thread_index, err, ipsec_sa_err,
+ index, nexts, drop_next_index, sa_index);
+ bi++;
+ index++;
+ }
+
+ return (f->n_elts);
+}
+
#endif /* __ESP_H__ */
/*
diff --git a/src/vnet/ipsec/esp_decrypt.c b/src/vnet/ipsec/esp_decrypt.c
index af90bc4c7ba..26d8ca1deee 100644
--- a/src/vnet/ipsec/esp_decrypt.c
+++ b/src/vnet/ipsec/esp_decrypt.c
@@ -23,7 +23,6 @@
#include <vnet/ipsec/esp.h>
#include <vnet/ipsec/ipsec_io.h>
#include <vnet/ipsec/ipsec_tun.h>
-#include <vnet/ipsec/ipsec.api_enum.h>
#include <vnet/gre/packet.h>
@@ -114,8 +113,9 @@ esp_process_ops (vlib_main_t * vm, vlib_node_runtime_t * node,
err = e;
else
err = ESP_DECRYPT_ERROR_CRYPTO_ENGINE_ERROR;
- b[bi]->error = node->errors[err];
- nexts[bi] = ESP_DECRYPT_NEXT_DROP;
+ esp_decrypt_set_next_index (b[bi], node, vm->thread_index, err, bi,
+ nexts, ESP_DECRYPT_NEXT_DROP,
+ vnet_buffer (b[bi])->ipsec.sad_index);
n_fail--;
}
op++;
@@ -146,8 +146,9 @@ esp_process_chained_ops (vlib_main_t * vm, vlib_node_runtime_t * node,
err = e;
else
err = ESP_DECRYPT_ERROR_CRYPTO_ENGINE_ERROR;
- b[bi]->error = node->errors[err];
- nexts[bi] = ESP_DECRYPT_NEXT_DROP;
+ esp_decrypt_set_next_index (b[bi], node, vm->thread_index, err, bi,
+ nexts, ESP_DECRYPT_NEXT_DROP,
+ vnet_buffer (b[bi])->ipsec.sad_index);
n_fail--;
}
op++;
@@ -160,6 +161,9 @@ esp_remove_tail (vlib_main_t * vm, vlib_buffer_t * b, vlib_buffer_t * last,
{
vlib_buffer_t *before_last = b;
+ if (b != last)
+ b->total_length_not_including_first_buffer -= tail;
+
if (last->current_length > tail)
{
last->current_length -= tail;
@@ -177,6 +181,37 @@ esp_remove_tail (vlib_main_t * vm, vlib_buffer_t * b, vlib_buffer_t * last,
before_last->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
}
+always_inline void
+esp_remove_tail_and_tfc_padding (vlib_main_t *vm, vlib_node_runtime_t *node,
+ const esp_decrypt_packet_data_t *pd,
+ vlib_buffer_t *b, vlib_buffer_t *last,
+ u16 *next, u16 tail, int is_ip6)
+{
+ const u16 total_buffer_length = vlib_buffer_length_in_chain (vm, b);
+ u16 ip_packet_length;
+ if (is_ip6)
+ {
+ const ip6_header_t *ip6 = vlib_buffer_get_current (b);
+ ip_packet_length =
+ clib_net_to_host_u16 (ip6->payload_length) + sizeof (ip6_header_t);
+ }
+ else
+ {
+ const ip4_header_t *ip4 = vlib_buffer_get_current (b);
+ ip_packet_length = clib_net_to_host_u16 (ip4->length);
+ }
+ /* In case of TFC padding, the size of the buffer data needs to be adjusted
+ * to the ip packet length */
+ if (PREDICT_FALSE (total_buffer_length < ip_packet_length + tail))
+ {
+ esp_decrypt_set_next_index (b, node, vm->thread_index,
+ ESP_DECRYPT_ERROR_NO_TAIL_SPACE, 0, next,
+ ESP_DECRYPT_NEXT_DROP, pd->sa_index);
+ return;
+ }
+ esp_remove_tail (vm, b, last, total_buffer_length - ip_packet_length);
+}
+
/* ICV is splitted in last two buffers so move it to the last buffer and
return pointer to it */
static_always_inline u8 *
@@ -202,9 +237,12 @@ esp_move_icv (vlib_main_t * vm, vlib_buffer_t * first,
before_last->current_length -= first_sz;
if (before_last == first)
pd->current_length -= first_sz;
+ else
+ first->total_length_not_including_first_buffer -= first_sz;
clib_memset (vlib_buffer_get_tail (before_last), 0, first_sz);
if (dif)
dif[0] = first_sz;
+ first->total_length_not_including_first_buffer -= last_sz;
pd2->lb = before_last;
pd2->icv_removed = 1;
pd2->free_buffer_index = before_last->next_buffer;
@@ -456,18 +494,16 @@ esp_decrypt_chain_crypto (vlib_main_t * vm, ipsec_per_thread_data_t * ptd,
return total_len;
}
-static_always_inline void
-esp_decrypt_prepare_sync_op (vlib_main_t * vm, vlib_node_runtime_t * node,
- ipsec_per_thread_data_t * ptd,
- vnet_crypto_op_t *** crypto_ops,
- vnet_crypto_op_t *** integ_ops,
- vnet_crypto_op_t * op,
- ipsec_sa_t * sa0, u8 * payload,
- u16 len, u8 icv_sz, u8 iv_sz,
- esp_decrypt_packet_data_t * pd,
- esp_decrypt_packet_data2_t * pd2,
- vlib_buffer_t * b, u16 * next, u32 index)
+static_always_inline esp_decrypt_error_t
+esp_decrypt_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
+ ipsec_sa_t *sa0, u8 *payload, u16 len, u8 icv_sz,
+ u8 iv_sz, esp_decrypt_packet_data_t *pd,
+ esp_decrypt_packet_data2_t *pd2, vlib_buffer_t *b,
+ u32 index)
{
+ vnet_crypto_op_t **crypto_ops;
+ vnet_crypto_op_t **integ_ops;
+ vnet_crypto_op_t _op, *op = &_op;
const u8 esp_sz = sizeof (esp_header_t);
if (PREDICT_TRUE (sa0->integ_op_id != VNET_CRYPTO_OP_NONE))
@@ -484,6 +520,8 @@ esp_decrypt_prepare_sync_op (vlib_main_t * vm, vlib_node_runtime_t * node,
if (pd->is_chain)
{
/* buffer is chained */
+ integ_ops = &ptd->chained_integ_ops;
+
op->len = pd->current_length;
/* special case when ICV is splitted and needs to be reassembled
@@ -509,8 +547,7 @@ esp_decrypt_prepare_sync_op (vlib_main_t * vm, vlib_node_runtime_t * node,
{
/* we now have a single buffer of crypto data, adjust
* the length (second buffer contains only ICV) */
- *integ_ops = &ptd->integ_ops;
- *crypto_ops = &ptd->crypto_ops;
+ integ_ops = &ptd->integ_ops;
len = b->current_length;
goto out;
}
@@ -524,17 +561,16 @@ esp_decrypt_prepare_sync_op (vlib_main_t * vm, vlib_node_runtime_t * node,
if (esp_decrypt_chain_integ (vm, ptd, pd, pd2, sa0, b, icv_sz,
payload, pd->current_length,
&op->digest, &op->n_chunks, 0) < 0)
- {
- b->error = node->errors[ESP_DECRYPT_ERROR_NO_BUFFERS];
- next[0] = ESP_DECRYPT_NEXT_DROP;
- return;
- }
+ return ESP_DECRYPT_ERROR_NO_BUFFERS;
}
else
- esp_insert_esn (vm, sa0, pd, pd2, &op->len, &op->digest, &len, b,
- payload);
+ {
+ integ_ops = &ptd->integ_ops;
+ esp_insert_esn (vm, sa0, pd, pd2, &op->len, &op->digest, &len, b,
+ payload);
+ }
out:
- vec_add_aligned (*(integ_ops[0]), op, 1, CLIB_CACHE_LINE_BYTES);
+ vec_add_aligned (*integ_ops, op, 1, CLIB_CACHE_LINE_BYTES);
}
payload += esp_sz;
@@ -560,6 +596,12 @@ esp_decrypt_prepare_sync_op (vlib_main_t * vm, vlib_node_runtime_t * node,
op->aad_len = esp_aad_fill (op->aad, esp0, sa0, pd->seq_hi);
op->tag = payload + len;
op->tag_len = 16;
+ if (PREDICT_FALSE (ipsec_sa_is_set_IS_NULL_GMAC (sa0)))
+ {
+ /* RFC-4543 ENCR_NULL_AUTH_AES_GMAC: IV is part of AAD */
+ payload -= iv_sz;
+ len += iv_sz;
+ }
}
else
{
@@ -582,26 +624,32 @@ esp_decrypt_prepare_sync_op (vlib_main_t * vm, vlib_node_runtime_t * node,
esp_decrypt_chain_crypto (vm, ptd, pd, pd2, sa0, b, icv_sz,
payload, len - pd->iv_sz + pd->icv_sz,
&op->tag, &op->n_chunks);
+ crypto_ops = &ptd->chained_crypto_ops;
+ }
+ else
+ {
+ crypto_ops = &ptd->crypto_ops;
}
- vec_add_aligned (*(crypto_ops[0]), op, 1, CLIB_CACHE_LINE_BYTES);
+ vec_add_aligned (*crypto_ops, op, 1, CLIB_CACHE_LINE_BYTES);
}
+
+ return ESP_DECRYPT_ERROR_RX_PKTS;
}
static_always_inline esp_decrypt_error_t
-esp_decrypt_prepare_async_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
- ipsec_per_thread_data_t *ptd,
+esp_decrypt_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
vnet_crypto_async_frame_t *f, ipsec_sa_t *sa0,
u8 *payload, u16 len, u8 icv_sz, u8 iv_sz,
esp_decrypt_packet_data_t *pd,
esp_decrypt_packet_data2_t *pd2, u32 bi,
- vlib_buffer_t *b, u16 *next, u16 async_next)
+ vlib_buffer_t *b, u16 async_next)
{
const u8 esp_sz = sizeof (esp_header_t);
esp_decrypt_packet_data_t *async_pd = &(esp_post_data (b))->decrypt_data;
esp_decrypt_packet_data2_t *async_pd2 = esp_post_data2 (b);
u8 *tag = payload + len, *iv = payload + esp_sz, *aad = 0;
- u32 key_index;
+ const u32 key_index = sa0->crypto_key_index;
u32 crypto_len, integ_len = 0;
i16 crypto_start_offset, integ_start_offset = 0;
u8 flags = 0;
@@ -609,7 +657,6 @@ esp_decrypt_prepare_async_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
if (!ipsec_sa_is_set_IS_AEAD (sa0))
{
/* linked algs */
- key_index = sa0->linked_key_index;
integ_start_offset = payload - b->data;
integ_len = len;
if (PREDICT_TRUE (sa0->integ_op_id != VNET_CRYPTO_OP_NONE))
@@ -662,8 +709,6 @@ esp_decrypt_prepare_async_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
else
esp_insert_esn (vm, sa0, pd, pd2, &integ_len, &tag, &len, b, payload);
}
- else
- key_index = sa0->crypto_key_index;
out:
/* crypto */
@@ -683,6 +728,12 @@ out:
aad = (u8 *) nonce - sizeof (esp_aead_t);
esp_aad_fill (aad, esp0, sa0, pd->seq_hi);
tag = payload + len;
+ if (PREDICT_FALSE (ipsec_sa_is_set_IS_NULL_GMAC (sa0)))
+ {
+ /* RFC-4543 ENCR_NULL_AUTH_AES_GMAC: IV is part of AAD */
+ payload -= iv_sz;
+ len += iv_sz;
+ }
}
else
{
@@ -721,7 +772,7 @@ out:
}
static_always_inline void
-esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node,
+esp_decrypt_post_crypto (vlib_main_t *vm, vlib_node_runtime_t *node,
const u16 *next_by_next_header,
const esp_decrypt_packet_data_t *pd,
const esp_decrypt_packet_data2_t *pd2,
@@ -734,6 +785,7 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node,
const u8 tun_flags = IPSEC_SA_FLAG_IS_TUNNEL | IPSEC_SA_FLAG_IS_TUNNEL_V6;
u8 pad_length = 0, next_header = 0;
u16 icv_sz;
+ u64 n_lost;
/*
* redo the anti-reply check
@@ -742,34 +794,50 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node,
* check above we did so against the state of the window (W),
* after packet s-1. So each of the packets in the sequence will be
* accepted.
- * This time s will be cheked against Ws-1, s+1 chceked against Ws
- * (i.e. the window state is updated/advnaced)
- * so this time the successive s+! packet will be dropped.
+ * This time s will be cheked against Ws-1, s+1 checked against Ws
+ * (i.e. the window state is updated/advanced)
+ * so this time the successive s+1 packet will be dropped.
* This is a consequence of batching the decrypts. If the
- * check-dcrypt-advance process was done for each packet it would
+ * check-decrypt-advance process was done for each packet it would
* be fine. But we batch the decrypts because it's much more efficient
* to do so in SW and if we offload to HW and the process is async.
*
* You're probably thinking, but this means an attacker can send the
- * above sequence and cause VPP to perform decrpyts that will fail,
+ * above sequence and cause VPP to perform decrypts that will fail,
* and that's true. But if the attacker can determine s (a valid
* sequence number in the window) which is non-trivial, it can generate
* a sequence s, s+1, s+2, s+3, ... s+n and nothing will prevent any
* implementation, sequential or batching, from decrypting these.
*/
- if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, pd->seq_hi, true,
- NULL))
+ if (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa0)))
{
- b->error = node->errors[ESP_DECRYPT_ERROR_REPLAY];
- next[0] = ESP_DECRYPT_NEXT_DROP;
- return;
+ if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, pd->seq_hi, true,
+ NULL, true))
+ {
+ esp_decrypt_set_next_index (b, node, vm->thread_index,
+ ESP_DECRYPT_ERROR_REPLAY, 0, next,
+ ESP_DECRYPT_NEXT_DROP, pd->sa_index);
+ return;
+ }
+ n_lost = ipsec_sa_anti_replay_advance (sa0, vm->thread_index, pd->seq,
+ pd->seq_hi, true);
+ }
+ else
+ {
+ if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, pd->seq_hi, true,
+ NULL, false))
+ {
+ esp_decrypt_set_next_index (b, node, vm->thread_index,
+ ESP_DECRYPT_ERROR_REPLAY, 0, next,
+ ESP_DECRYPT_NEXT_DROP, pd->sa_index);
+ return;
+ }
+ n_lost = ipsec_sa_anti_replay_advance (sa0, vm->thread_index, pd->seq,
+ pd->seq_hi, false);
}
- u64 n_lost =
- ipsec_sa_anti_replay_advance (sa0, vm->thread_index, pd->seq, pd->seq_hi);
-
- vlib_prefetch_simple_counter (&ipsec_sa_lost_counters, vm->thread_index,
- pd->sa_index);
+ vlib_prefetch_simple_counter (&ipsec_sa_err_counters[IPSEC_SA_ERROR_LOST],
+ vm->thread_index, pd->sa_index);
if (pd->is_chain)
{
@@ -828,7 +896,8 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node,
u16 adv = pd->iv_sz + esp_sz;
u16 tail = sizeof (esp_footer_t) + pad_length + icv_sz;
u16 tail_orig = sizeof (esp_footer_t) + pad_length + pd->icv_sz;
- b->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ b->flags &=
+ ~(VNET_BUFFER_F_L4_CHECKSUM_COMPUTED | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
if ((pd->flags & tun_flags) == 0 && !is_tun) /* transport mode */
{
@@ -878,14 +947,16 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node,
next[0] = ESP_DECRYPT_NEXT_IP4_INPUT;
b->current_data = pd->current_data + adv;
b->current_length = pd->current_length - adv;
- esp_remove_tail (vm, b, lb, tail);
+ esp_remove_tail_and_tfc_padding (vm, node, pd, b, lb, next, tail,
+ false);
}
else if (next_header == IP_PROTOCOL_IPV6)
{
next[0] = ESP_DECRYPT_NEXT_IP6_INPUT;
b->current_data = pd->current_data + adv;
b->current_length = pd->current_length - adv;
- esp_remove_tail (vm, b, lb, tail);
+ esp_remove_tail_and_tfc_padding (vm, node, pd, b, lb, next, tail,
+ true);
}
else if (next_header == IP_PROTOCOL_MPLS_IN_IP)
{
@@ -918,8 +989,9 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node,
next[0] = ESP_DECRYPT_NEXT_IP6_INPUT;
break;
default:
- b->error = node->errors[ESP_DECRYPT_ERROR_UNSUP_PAYLOAD];
- next[0] = ESP_DECRYPT_NEXT_DROP;
+ esp_decrypt_set_next_index (
+ b, node, vm->thread_index, ESP_DECRYPT_ERROR_UNSUP_PAYLOAD, 0,
+ next, ESP_DECRYPT_NEXT_DROP, pd->sa_index);
break;
}
}
@@ -932,8 +1004,9 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node,
}
else
{
- next[0] = ESP_DECRYPT_NEXT_DROP;
- b->error = node->errors[ESP_DECRYPT_ERROR_UNSUP_PAYLOAD];
+ esp_decrypt_set_next_index (b, node, vm->thread_index,
+ ESP_DECRYPT_ERROR_UNSUP_PAYLOAD, 0, next,
+ ESP_DECRYPT_NEXT_DROP, pd->sa_index);
return;
}
@@ -973,8 +1046,10 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node,
!ip46_address_is_equal_v4 (&itp->itp_tun.dst,
&ip4->src_address))
{
- next[0] = ESP_DECRYPT_NEXT_DROP;
- b->error = node->errors[ESP_DECRYPT_ERROR_TUN_NO_PROTO];
+ esp_decrypt_set_next_index (
+ b, node, vm->thread_index,
+ ESP_DECRYPT_ERROR_TUN_NO_PROTO, 0, next,
+ ESP_DECRYPT_NEXT_DROP, pd->sa_index);
}
}
else if (next_header == IP_PROTOCOL_IPV6)
@@ -988,8 +1063,10 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node,
!ip46_address_is_equal_v6 (&itp->itp_tun.dst,
&ip6->src_address))
{
- next[0] = ESP_DECRYPT_NEXT_DROP;
- b->error = node->errors[ESP_DECRYPT_ERROR_TUN_NO_PROTO];
+ esp_decrypt_set_next_index (
+ b, node, vm->thread_index,
+ ESP_DECRYPT_ERROR_TUN_NO_PROTO, 0, next,
+ ESP_DECRYPT_NEXT_DROP, pd->sa_index);
}
}
}
@@ -997,8 +1074,8 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node,
}
if (PREDICT_FALSE (n_lost))
- vlib_increment_simple_counter (&ipsec_sa_lost_counters, vm->thread_index,
- pd->sa_index, n_lost);
+ vlib_increment_simple_counter (&ipsec_sa_err_counters[IPSEC_SA_ERROR_LOST],
+ vm->thread_index, pd->sa_index, n_lost);
}
always_inline uword
@@ -1016,8 +1093,7 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
vlib_buffer_t *sync_bufs[VLIB_FRAME_SIZE];
u16 sync_nexts[VLIB_FRAME_SIZE], *sync_next = sync_nexts, n_sync = 0;
- u16 async_nexts[VLIB_FRAME_SIZE], *async_next = async_nexts;
- u16 noop_nexts[VLIB_FRAME_SIZE], *noop_next = noop_nexts, n_noop = 0;
+ u16 noop_nexts[VLIB_FRAME_SIZE], n_noop = 0;
u32 sync_bi[VLIB_FRAME_SIZE];
u32 noop_bi[VLIB_FRAME_SIZE];
esp_decrypt_packet_data_t pkt_data[VLIB_FRAME_SIZE], *pd = pkt_data;
@@ -1026,9 +1102,7 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
u32 current_sa_index = ~0, current_sa_bytes = 0, current_sa_pkts = 0;
const u8 esp_sz = sizeof (esp_header_t);
ipsec_sa_t *sa0 = 0;
- vnet_crypto_op_t _op, *op = &_op;
- vnet_crypto_op_t **crypto_ops;
- vnet_crypto_op_t **integ_ops;
+ bool anti_replay_result;
int is_async = im->async_mode;
vnet_crypto_async_op_id_t async_op = ~0;
vnet_crypto_async_frame_t *async_frames[VNET_CRYPTO_ASYNC_OP_N_IDS];
@@ -1066,8 +1140,9 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (n_bufs == 0)
{
err = ESP_DECRYPT_ERROR_NO_BUFFERS;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- ESP_DECRYPT_NEXT_DROP);
+ esp_decrypt_set_next_index (b[0], node, thread_index, err, n_noop,
+ noop_nexts, ESP_DECRYPT_NEXT_DROP,
+ vnet_buffer (b[0])->ipsec.sad_index);
goto next;
}
@@ -1075,12 +1150,13 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
if (current_sa_pkts)
vlib_increment_combined_counter (&ipsec_sa_counters, thread_index,
- current_sa_index,
- current_sa_pkts,
+ current_sa_index, current_sa_pkts,
current_sa_bytes);
current_sa_bytes = current_sa_pkts = 0;
current_sa_index = vnet_buffer (b[0])->ipsec.sad_index;
+ vlib_prefetch_combined_counter (&ipsec_sa_counters, thread_index,
+ current_sa_index);
sa0 = ipsec_sa_get (current_sa_index);
/* fetch the second cacheline ASAP */
@@ -1092,7 +1168,7 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
is_async = im->async_mode | ipsec_sa_is_set_IS_ASYNC (sa0);
}
- if (PREDICT_FALSE (~0 == sa0->thread_index))
+ if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index))
{
/* this is the first packet to use this SA, claim the SA
* for this thread. this could happen simultaneously on
@@ -1105,8 +1181,9 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
vnet_buffer (b[0])->ipsec.thread_index = sa0->thread_index;
err = ESP_DECRYPT_ERROR_HANDOFF;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- ESP_DECRYPT_NEXT_HANDOFF);
+ esp_decrypt_set_next_index (b[0], node, thread_index, err, n_noop,
+ noop_nexts, ESP_DECRYPT_NEXT_HANDOFF,
+ current_sa_index);
goto next;
}
@@ -1127,33 +1204,37 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
/* find last buffer in the chain */
while (pd2->lb->flags & VLIB_BUFFER_NEXT_PRESENT)
pd2->lb = vlib_get_buffer (vm, pd2->lb->next_buffer);
+ }
- crypto_ops = &ptd->chained_crypto_ops;
- integ_ops = &ptd->chained_integ_ops;
+ pd->current_length = b[0]->current_length;
+
+ /* anti-reply check */
+ if (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa0)))
+ {
+ anti_replay_result = ipsec_sa_anti_replay_and_sn_advance (
+ sa0, pd->seq, ~0, false, &pd->seq_hi, true);
}
else
{
- crypto_ops = &ptd->crypto_ops;
- integ_ops = &ptd->integ_ops;
+ anti_replay_result = ipsec_sa_anti_replay_and_sn_advance (
+ sa0, pd->seq, ~0, false, &pd->seq_hi, false);
}
- pd->current_length = b[0]->current_length;
-
- /* anti-reply check */
- if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, ~0, false,
- &pd->seq_hi))
+ if (anti_replay_result)
{
err = ESP_DECRYPT_ERROR_REPLAY;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- ESP_DECRYPT_NEXT_DROP);
+ esp_decrypt_set_next_index (b[0], node, thread_index, err, n_noop,
+ noop_nexts, ESP_DECRYPT_NEXT_DROP,
+ current_sa_index);
goto next;
}
if (pd->current_length < cpd.icv_sz + esp_sz + cpd.iv_sz)
{
err = ESP_DECRYPT_ERROR_RUNT;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- ESP_DECRYPT_NEXT_DROP);
+ esp_decrypt_set_next_index (b[0], node, thread_index, err, n_noop,
+ noop_nexts, ESP_DECRYPT_NEXT_DROP,
+ current_sa_index);
goto next;
}
@@ -1172,31 +1253,47 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
async_frames[async_op] =
vnet_crypto_async_get_frame (vm, async_op);
+ if (PREDICT_FALSE (!async_frames[async_op]))
+ {
+ err = ESP_DECRYPT_ERROR_NO_AVAIL_FRAME;
+ esp_decrypt_set_next_index (
+ b[0], node, thread_index, err, n_noop, noop_nexts,
+ ESP_DECRYPT_NEXT_DROP, current_sa_index);
+ goto next;
+ }
+
/* Save the frame to the list we'll submit at the end */
vec_add1 (ptd->async_frames, async_frames[async_op]);
}
err = esp_decrypt_prepare_async_frame (
- vm, node, ptd, async_frames[async_op], sa0, payload, len,
- cpd.icv_sz, cpd.iv_sz, pd, pd2, from[b - bufs], b[0], async_next,
- async_next_node);
+ vm, ptd, async_frames[async_op], sa0, payload, len, cpd.icv_sz,
+ cpd.iv_sz, pd, pd2, from[b - bufs], b[0], async_next_node);
if (ESP_DECRYPT_ERROR_RX_PKTS != err)
{
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- ESP_DECRYPT_NEXT_DROP);
+ esp_decrypt_set_next_index (
+ b[0], node, thread_index, err, n_noop, noop_nexts,
+ ESP_DECRYPT_NEXT_DROP, current_sa_index);
}
}
else
- esp_decrypt_prepare_sync_op (
- vm, node, ptd, &crypto_ops, &integ_ops, op, sa0, payload, len,
- cpd.icv_sz, cpd.iv_sz, pd, pd2, b[0], sync_next, b - bufs);
+ {
+ err = esp_decrypt_prepare_sync_op (vm, ptd, sa0, payload, len,
+ cpd.icv_sz, cpd.iv_sz, pd, pd2,
+ b[0], n_sync);
+ if (err != ESP_DECRYPT_ERROR_RX_PKTS)
+ {
+ esp_decrypt_set_next_index (b[0], node, thread_index, err, 0,
+ sync_next, ESP_DECRYPT_NEXT_DROP,
+ current_sa_index);
+ }
+ }
/* next */
next:
if (ESP_DECRYPT_ERROR_RX_PKTS != err)
{
noop_bi[n_noop] = from[b - bufs];
n_noop++;
- noop_next++;
}
else if (!is_async)
{
@@ -1207,8 +1304,6 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
pd += 1;
pd2 += 1;
}
- else
- async_next++;
n_left -= 1;
b += 1;
@@ -1234,7 +1329,8 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
n_noop += esp_async_recycle_failed_submit (
vm, *async_frame, node, ESP_DECRYPT_ERROR_CRYPTO_ENGINE_ERROR,
- n_noop, noop_bi, noop_nexts, ESP_DECRYPT_NEXT_DROP);
+ IPSEC_SA_ERROR_CRYPTO_ENGINE_ERROR, n_noop, noop_bi, noop_nexts,
+ ESP_DECRYPT_NEXT_DROP, false);
vnet_crypto_async_reset_frame (*async_frame);
vnet_crypto_async_free_frame (vm, *async_frame);
}
@@ -1448,7 +1544,6 @@ VLIB_NODE_FN (esp6_decrypt_tun_post_node) (vlib_main_t * vm,
return esp_decrypt_post_inline (vm, node, from_frame, 1, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp4_decrypt_node) = {
.name = "esp4-decrypt",
.vector_size = sizeof (u32),
@@ -1572,7 +1667,6 @@ VLIB_REGISTER_NODE (esp6_decrypt_tun_post_node) = {
.sibling_of = "esp6-decrypt-tun",
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
diff --git a/src/vnet/ipsec/esp_encrypt.c b/src/vnet/ipsec/esp_encrypt.c
index 4ed3bf72c3f..dd47053874c 100644
--- a/src/vnet/ipsec/esp_encrypt.c
+++ b/src/vnet/ipsec/esp_encrypt.c
@@ -18,6 +18,7 @@
#include <vnet/vnet.h>
#include <vnet/api_errno.h>
#include <vnet/ip/ip.h>
+#include <vnet/interface_output.h>
#include <vnet/crypto/crypto.h>
@@ -94,8 +95,7 @@ format_esp_post_encrypt_trace (u8 * s, va_list * args)
/* pad packet in input buffer */
static_always_inline u8 *
esp_add_footer_and_icv (vlib_main_t *vm, vlib_buffer_t **last, u8 esp_align,
- u8 icv_sz, vlib_node_runtime_t *node,
- u16 buffer_data_size, uword total_len)
+ u8 icv_sz, u16 buffer_data_size, uword total_len)
{
static const u8 pad_data[ESP_MAX_BLOCK_SIZE] = {
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
@@ -149,11 +149,9 @@ esp_update_ip4_hdr (ip4_header_t * ip4, u16 len, int is_transport, int is_udp)
if (is_transport)
{
u8 prot = is_udp ? IP_PROTOCOL_UDP : IP_PROTOCOL_IPSEC_ESP;
-
- sum = ip_csum_update (ip4->checksum, ip4->protocol,
- prot, ip4_header_t, protocol);
+ sum = ip_csum_update (ip4->checksum, ip4->protocol, prot, ip4_header_t,
+ protocol);
ip4->protocol = prot;
-
sum = ip_csum_update (sum, old_len, len, ip4_header_t, length);
}
else
@@ -182,9 +180,9 @@ ext_hdr_is_pre_esp (u8 nexthdr)
return !u8x16_is_all_zero (ext_hdr_types == u8x16_splat (nexthdr));
#else
- return ((nexthdr ^ IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) |
- (nexthdr ^ IP_PROTOCOL_IPV6_ROUTE) |
- ((nexthdr ^ IP_PROTOCOL_IPV6_FRAGMENTATION) != 0));
+ return (!(nexthdr ^ IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) ||
+ !(nexthdr ^ IP_PROTOCOL_IPV6_ROUTE) ||
+ !(nexthdr ^ IP_PROTOCOL_IPV6_FRAGMENTATION));
#endif
}
@@ -215,6 +213,25 @@ esp_get_ip6_hdr_len (ip6_header_t * ip6, ip6_ext_header_t ** ext_hdr)
return len;
}
+/* IPsec IV generation: IVs requirements differ depending of the
+ * encryption mode: IVs must be unpredictable for AES-CBC whereas it can
+ * be predictable but should never be reused with the same key material
+ * for CTR and GCM.
+ * To avoid reusing the same IVs between multiple VPP instances and between
+ * restarts, we use a properly chosen PRNG to generate IVs. To ensure the IV is
+ * unpredictable for CBC, it is then encrypted using the same key as the
+ * message. You can refer to NIST SP800-38a and NIST SP800-38d for more
+ * details. */
+static_always_inline void *
+esp_generate_iv (ipsec_sa_t *sa, void *payload, int iv_sz)
+{
+ ASSERT (iv_sz >= sizeof (u64));
+ u64 *iv = (u64 *) (payload - iv_sz);
+ clib_memset_u8 (iv, 0, iv_sz);
+ *iv = clib_pcg64i_random_r (&sa->iv_prng);
+ return iv;
+}
+
static_always_inline void
esp_process_chained_ops (vlib_main_t * vm, vlib_node_runtime_t * node,
vnet_crypto_op_t * ops, vlib_buffer_t * b[],
@@ -236,8 +253,10 @@ esp_process_chained_ops (vlib_main_t * vm, vlib_node_runtime_t * node,
if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
{
u32 bi = op->user_data;
- b[bi]->error = node->errors[ESP_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR];
- nexts[bi] = drop_next;
+ esp_encrypt_set_next_index (b[bi], node, vm->thread_index,
+ ESP_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR,
+ bi, nexts, drop_next,
+ vnet_buffer (b[bi])->ipsec.sad_index);
n_fail--;
}
op++;
@@ -264,8 +283,10 @@ esp_process_ops (vlib_main_t * vm, vlib_node_runtime_t * node,
if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
{
u32 bi = op->user_data;
- b[bi]->error = node->errors[ESP_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR];
- nexts[bi] = drop_next;
+ esp_encrypt_set_next_index (b[bi], node, vm->thread_index,
+ ESP_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR,
+ bi, nexts, drop_next,
+ vnet_buffer (b[bi])->ipsec.sad_index);
n_fail--;
}
op++;
@@ -368,28 +389,36 @@ esp_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
vnet_crypto_op_t *op;
vec_add2_aligned (crypto_ops[0], op, 1, CLIB_CACHE_LINE_BYTES);
vnet_crypto_op_init (op, sa0->crypto_enc_op_id);
+ u8 *crypto_start = payload;
+ /* esp_add_footer_and_icv() in esp_encrypt_inline() makes sure we always
+ * have enough space for ESP header and footer which includes ICV */
+ ASSERT (payload_len > icv_sz);
+ u16 crypto_len = payload_len - icv_sz;
+
+ /* generate the IV in front of the payload */
+ void *pkt_iv = esp_generate_iv (sa0, payload, iv_sz);
- op->src = op->dst = payload;
op->key_index = sa0->crypto_key_index;
- op->len = payload_len - icv_sz;
op->user_data = bi;
if (ipsec_sa_is_set_IS_CTR (sa0))
{
- ASSERT (sizeof (u64) == iv_sz);
/* construct nonce in a scratch space in front of the IP header */
esp_ctr_nonce_t *nonce =
- (esp_ctr_nonce_t *) (payload - sizeof (u64) - hdr_len -
- sizeof (*nonce));
- u64 *pkt_iv = (u64 *) (payload - sizeof (u64));
-
+ (esp_ctr_nonce_t *) (pkt_iv - hdr_len - sizeof (*nonce));
if (ipsec_sa_is_set_IS_AEAD (sa0))
{
/* constuct aad in a scratch space in front of the nonce */
op->aad = (u8 *) nonce - sizeof (esp_aead_t);
op->aad_len = esp_aad_fill (op->aad, esp, sa0, seq_hi);
- op->tag = payload + op->len;
+ op->tag = payload + crypto_len;
op->tag_len = 16;
+ if (PREDICT_FALSE (ipsec_sa_is_set_IS_NULL_GMAC (sa0)))
+ {
+ /* RFC-4543 ENCR_NULL_AUTH_AES_GMAC: IV is part of AAD */
+ crypto_start -= iv_sz;
+ crypto_len += iv_sz;
+ }
}
else
{
@@ -397,23 +426,34 @@ esp_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
}
nonce->salt = sa0->salt;
- nonce->iv = *pkt_iv = clib_host_to_net_u64 (sa0->ctr_iv_counter++);
+ nonce->iv = *(u64 *) pkt_iv;
op->iv = (u8 *) nonce;
}
else
{
- op->iv = payload - iv_sz;
- op->flags = VNET_CRYPTO_OP_FLAG_INIT_IV;
+ /* construct zero iv in front of the IP header */
+ op->iv = pkt_iv - hdr_len - iv_sz;
+ clib_memset_u8 (op->iv, 0, iv_sz);
+ /* include iv field in crypto */
+ crypto_start -= iv_sz;
+ crypto_len += iv_sz;
}
- if (lb != b[0])
+ if (PREDICT_FALSE (lb != b[0]))
{
/* is chained */
op->flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS;
op->chunk_index = vec_len (ptd->chunks);
op->tag = vlib_buffer_get_tail (lb) - icv_sz;
- esp_encrypt_chain_crypto (vm, ptd, sa0, b[0], lb, icv_sz, payload,
- payload_len, &op->n_chunks);
+ esp_encrypt_chain_crypto (vm, ptd, sa0, b[0], lb, icv_sz,
+ crypto_start, crypto_len + icv_sz,
+ &op->n_chunks);
+ }
+ else
+ {
+ /* not chained */
+ op->src = op->dst = crypto_start;
+ op->len = crypto_len;
}
}
@@ -462,33 +502,36 @@ esp_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
esp_post_data_t *post = esp_post_data (b);
u8 *tag, *iv, *aad = 0;
u8 flag = 0;
- u32 key_index;
- i16 crypto_start_offset, integ_start_offset = 0;
+ const u32 key_index = sa->crypto_key_index;
+ i16 crypto_start_offset, integ_start_offset;
u16 crypto_total_len, integ_total_len;
post->next_index = next;
/* crypto */
- crypto_start_offset = payload - b->data;
+ crypto_start_offset = integ_start_offset = payload - b->data;
crypto_total_len = integ_total_len = payload_len - icv_sz;
tag = payload + crypto_total_len;
- key_index = sa->linked_key_index;
+ /* generate the IV in front of the payload */
+ void *pkt_iv = esp_generate_iv (sa, payload, iv_sz);
if (ipsec_sa_is_set_IS_CTR (sa))
{
- ASSERT (sizeof (u64) == iv_sz);
/* construct nonce in a scratch space in front of the IP header */
- esp_ctr_nonce_t *nonce = (esp_ctr_nonce_t *) (payload - sizeof (u64) -
- hdr_len - sizeof (*nonce));
- u64 *pkt_iv = (u64 *) (payload - sizeof (u64));
-
+ esp_ctr_nonce_t *nonce =
+ (esp_ctr_nonce_t *) (pkt_iv - hdr_len - sizeof (*nonce));
if (ipsec_sa_is_set_IS_AEAD (sa))
{
/* constuct aad in a scratch space in front of the nonce */
aad = (u8 *) nonce - sizeof (esp_aead_t);
esp_aad_fill (aad, esp, sa, sa->seq_hi);
- key_index = sa->crypto_key_index;
+ if (PREDICT_FALSE (ipsec_sa_is_set_IS_NULL_GMAC (sa)))
+ {
+ /* RFC-4543 ENCR_NULL_AUTH_AES_GMAC: IV is part of AAD */
+ crypto_start_offset -= iv_sz;
+ crypto_total_len += iv_sz;
+ }
}
else
{
@@ -496,13 +539,17 @@ esp_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
}
nonce->salt = sa->salt;
- nonce->iv = *pkt_iv = clib_host_to_net_u64 (sa->ctr_iv_counter++);
+ nonce->iv = *(u64 *) pkt_iv;
iv = (u8 *) nonce;
}
else
{
- iv = payload - iv_sz;
- flag |= VNET_CRYPTO_OP_FLAG_INIT_IV;
+ /* construct zero iv in front of the IP header */
+ iv = pkt_iv - hdr_len - iv_sz;
+ clib_memset_u8 (iv, 0, iv_sz);
+ /* include iv field in crypto */
+ crypto_start_offset -= iv_sz;
+ crypto_total_len += iv_sz;
}
if (lb != b)
@@ -510,13 +557,14 @@ esp_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
/* chain */
flag |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS;
tag = vlib_buffer_get_tail (lb) - icv_sz;
- crypto_total_len = esp_encrypt_chain_crypto (vm, ptd, sa, b, lb, icv_sz,
- payload, payload_len, 0);
+ crypto_total_len = esp_encrypt_chain_crypto (
+ vm, ptd, sa, b, lb, icv_sz, b->data + crypto_start_offset,
+ crypto_total_len + icv_sz, 0);
}
if (sa->integ_op_id)
{
- integ_start_offset = crypto_start_offset - iv_sz - sizeof (esp_header_t);
+ integ_start_offset -= iv_sz + sizeof (esp_header_t);
integ_total_len += iv_sz + sizeof (esp_header_t);
if (b != lb)
@@ -557,6 +605,7 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
u32 current_sa_bytes = 0, spi = 0;
u8 esp_align = 4, iv_sz = 0, icv_sz = 0;
ipsec_sa_t *sa0 = 0;
+ u8 sa_drop_no_crypto = 0;
vlib_buffer_t *lb;
vnet_crypto_op_t **crypto_ops = &ptd->crypto_ops;
vnet_crypto_op_t **integ_ops = &ptd->integ_ops;
@@ -573,8 +622,8 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
ESP_ENCRYPT_NEXT_HANDOFF_MPLS));
vlib_buffer_t *sync_bufs[VLIB_FRAME_SIZE];
u16 sync_nexts[VLIB_FRAME_SIZE], *sync_next = sync_nexts, n_sync = 0;
- u16 async_nexts[VLIB_FRAME_SIZE], *async_next = async_nexts, n_async = 0;
- u16 noop_nexts[VLIB_FRAME_SIZE], *noop_next = noop_nexts, n_noop = 0;
+ u16 n_async = 0;
+ u16 noop_nexts[VLIB_FRAME_SIZE], n_noop = 0;
u32 sync_bi[VLIB_FRAME_SIZE];
u32 noop_bi[VLIB_FRAME_SIZE];
esp_encrypt_error_t err;
@@ -613,6 +662,10 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
CLIB_CACHE_LINE_BYTES, LOAD);
}
+ vnet_calc_checksums_inline (vm, b[0], b[0]->flags & VNET_BUFFER_F_IS_IP4,
+ b[0]->flags & VNET_BUFFER_F_IS_IP6);
+ vnet_calc_outer_checksums_inline (vm, b[0]);
+
if (is_tun)
{
/* we are on a ipsec tunnel's feature arc */
@@ -623,8 +676,8 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (PREDICT_FALSE (INDEX_INVALID == sa_index0))
{
err = ESP_ENCRYPT_ERROR_NO_PROTECTION;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- drop_next);
+ noop_nexts[n_noop] = drop_next;
+ b[0]->error = node->errors[err];
goto trace;
}
}
@@ -634,27 +687,24 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (sa_index0 != current_sa_index)
{
if (current_sa_packets)
- vlib_increment_combined_counter (&ipsec_sa_counters, thread_index,
- current_sa_index,
- current_sa_packets,
- current_sa_bytes);
+ vlib_increment_combined_counter (
+ &ipsec_sa_counters, thread_index, current_sa_index,
+ current_sa_packets, current_sa_bytes);
current_sa_packets = current_sa_bytes = 0;
sa0 = ipsec_sa_get (sa_index0);
+ current_sa_index = sa_index0;
+
+ sa_drop_no_crypto = ((sa0->crypto_alg == IPSEC_CRYPTO_ALG_NONE &&
+ sa0->integ_alg == IPSEC_INTEG_ALG_NONE) &&
+ !ipsec_sa_is_set_NO_ALGO_NO_DROP (sa0));
+
+ vlib_prefetch_combined_counter (&ipsec_sa_counters, thread_index,
+ current_sa_index);
- if (PREDICT_FALSE ((sa0->crypto_alg == IPSEC_CRYPTO_ALG_NONE &&
- sa0->integ_alg == IPSEC_INTEG_ALG_NONE) &&
- !ipsec_sa_is_set_NO_ALGO_NO_DROP (sa0)))
- {
- err = ESP_ENCRYPT_ERROR_NO_ENCRYPTION;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- drop_next);
- goto trace;
- }
/* fetch the second cacheline ASAP */
clib_prefetch_load (sa0->cacheline1);
- current_sa_index = sa_index0;
spi = clib_net_to_host_u32 (sa0->spi);
esp_align = sa0->esp_block_align;
icv_sz = sa0->integ_icv_size;
@@ -662,7 +712,15 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
is_async = im->async_mode | ipsec_sa_is_set_IS_ASYNC (sa0);
}
- if (PREDICT_FALSE (~0 == sa0->thread_index))
+ if (PREDICT_FALSE (sa_drop_no_crypto != 0))
+ {
+ err = ESP_ENCRYPT_ERROR_NO_ENCRYPTION;
+ esp_encrypt_set_next_index (b[0], node, thread_index, err, n_noop,
+ noop_nexts, drop_next, sa_index0);
+ goto trace;
+ }
+
+ if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index))
{
/* this is the first packet to use this SA, claim the SA
* for this thread. this could happen simultaneously on
@@ -675,8 +733,9 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
vnet_buffer (b[0])->ipsec.thread_index = sa0->thread_index;
err = ESP_ENCRYPT_ERROR_HANDOFF;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- handoff_next);
+ esp_encrypt_set_next_index (b[0], node, thread_index, err, n_noop,
+ noop_nexts, handoff_next,
+ current_sa_index);
goto trace;
}
@@ -685,7 +744,8 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (n_bufs == 0)
{
err = ESP_ENCRYPT_ERROR_NO_BUFFERS;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts, drop_next);
+ esp_encrypt_set_next_index (b[0], node, thread_index, err, n_noop,
+ noop_nexts, drop_next, current_sa_index);
goto trace;
}
@@ -699,7 +759,8 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (PREDICT_FALSE (esp_seq_advance (sa0)))
{
err = ESP_ENCRYPT_ERROR_SEQ_CYCLED;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts, drop_next);
+ esp_encrypt_set_next_index (b[0], node, thread_index, err, n_noop,
+ noop_nexts, drop_next, current_sa_index);
goto trace;
}
@@ -710,13 +771,14 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
payload = vlib_buffer_get_current (b[0]);
next_hdr_ptr = esp_add_footer_and_icv (
- vm, &lb, esp_align, icv_sz, node, buffer_data_size,
+ vm, &lb, esp_align, icv_sz, buffer_data_size,
vlib_buffer_length_in_chain (vm, b[0]));
if (!next_hdr_ptr)
{
err = ESP_ENCRYPT_ERROR_NO_BUFFERS;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- drop_next);
+ esp_encrypt_set_next_index (b[0], node, thread_index, err,
+ n_noop, noop_nexts, drop_next,
+ current_sa_index);
goto trace;
}
b[0]->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
@@ -837,21 +899,23 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if ((old_ip_hdr - ip_len) < &b[0]->pre_data[0])
{
err = ESP_ENCRYPT_ERROR_NO_BUFFERS;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- drop_next);
+ esp_encrypt_set_next_index (b[0], node, thread_index, err,
+ n_noop, noop_nexts, drop_next,
+ current_sa_index);
goto trace;
}
vlib_buffer_advance (b[0], ip_len);
payload = vlib_buffer_get_current (b[0]);
next_hdr_ptr = esp_add_footer_and_icv (
- vm, &lb, esp_align, icv_sz, node, buffer_data_size,
+ vm, &lb, esp_align, icv_sz, buffer_data_size,
vlib_buffer_length_in_chain (vm, b[0]));
if (!next_hdr_ptr)
{
err = ESP_ENCRYPT_ERROR_NO_BUFFERS;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- drop_next);
+ esp_encrypt_set_next_index (b[0], node, thread_index, err,
+ n_noop, noop_nexts, drop_next,
+ current_sa_index);
goto trace;
}
@@ -952,6 +1016,16 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
async_frames[async_op] =
vnet_crypto_async_get_frame (vm, async_op);
+
+ if (PREDICT_FALSE (!async_frames[async_op]))
+ {
+ err = ESP_ENCRYPT_ERROR_NO_AVAIL_FRAME;
+ esp_encrypt_set_next_index (b[0], node, thread_index, err,
+ n_noop, noop_nexts, drop_next,
+ current_sa_index);
+ goto trace;
+ }
+
/* Save the frame to the list we'll submit at the end */
vec_add1 (ptd->async_frames, async_frames[async_op]);
}
@@ -995,7 +1069,6 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
noop_bi[n_noop] = from[b - bufs];
n_noop++;
- noop_next++;
}
else if (!is_async)
{
@@ -1007,7 +1080,6 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
else
{
n_async++;
- async_next++;
}
n_left -= 1;
b += 1;
@@ -1042,7 +1114,8 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
n_noop += esp_async_recycle_failed_submit (
vm, *async_frame, node, ESP_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR,
- n_noop, noop_bi, noop_nexts, drop_next);
+ IPSEC_SA_ERROR_CRYPTO_ENGINE_ERROR, n_noop, noop_bi,
+ noop_nexts, drop_next, true);
vnet_crypto_async_reset_frame (*async_frame);
vnet_crypto_async_free_frame (vm, *async_frame);
}
@@ -1151,7 +1224,6 @@ VLIB_NODE_FN (esp4_encrypt_node) (vlib_main_t * vm,
esp_encrypt_async_next.esp4_post_next);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp4_encrypt_node) = {
.name = "esp4-encrypt",
.vector_size = sizeof (u32),
@@ -1170,7 +1242,6 @@ VLIB_REGISTER_NODE (esp4_encrypt_node) = {
[ESP_ENCRYPT_NEXT_HANDOFF_MPLS] = "error-drop",
[ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT] = "interface-output" },
};
-/* *INDENT-ON* */
VLIB_NODE_FN (esp4_encrypt_post_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1179,7 +1250,6 @@ VLIB_NODE_FN (esp4_encrypt_post_node) (vlib_main_t * vm,
return esp_encrypt_post_inline (vm, node, from_frame);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp4_encrypt_post_node) = {
.name = "esp4-encrypt-post",
.vector_size = sizeof (u32),
@@ -1190,7 +1260,6 @@ VLIB_REGISTER_NODE (esp4_encrypt_post_node) = {
.n_errors = ESP_ENCRYPT_N_ERROR,
.error_counters = esp_encrypt_error_counters,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (esp6_encrypt_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1200,7 +1269,6 @@ VLIB_NODE_FN (esp6_encrypt_node) (vlib_main_t * vm,
esp_encrypt_async_next.esp6_post_next);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp6_encrypt_node) = {
.name = "esp6-encrypt",
.vector_size = sizeof (u32),
@@ -1211,7 +1279,6 @@ VLIB_REGISTER_NODE (esp6_encrypt_node) = {
.n_errors = ESP_ENCRYPT_N_ERROR,
.error_counters = esp_encrypt_error_counters,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (esp6_encrypt_post_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1220,7 +1287,6 @@ VLIB_NODE_FN (esp6_encrypt_post_node) (vlib_main_t * vm,
return esp_encrypt_post_inline (vm, node, from_frame);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp6_encrypt_post_node) = {
.name = "esp6-encrypt-post",
.vector_size = sizeof (u32),
@@ -1231,7 +1297,6 @@ VLIB_REGISTER_NODE (esp6_encrypt_post_node) = {
.n_errors = ESP_ENCRYPT_N_ERROR,
.error_counters = esp_encrypt_error_counters,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (esp4_encrypt_tun_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1241,7 +1306,6 @@ VLIB_NODE_FN (esp4_encrypt_tun_node) (vlib_main_t * vm,
esp_encrypt_async_next.esp4_tun_post_next);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp4_encrypt_tun_node) = {
.name = "esp4-encrypt-tun",
.vector_size = sizeof (u32),
@@ -1270,7 +1334,6 @@ VLIB_NODE_FN (esp4_encrypt_tun_post_node) (vlib_main_t * vm,
return esp_encrypt_post_inline (vm, node, from_frame);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp4_encrypt_tun_post_node) = {
.name = "esp4-encrypt-tun-post",
.vector_size = sizeof (u32),
@@ -1281,7 +1344,6 @@ VLIB_REGISTER_NODE (esp4_encrypt_tun_post_node) = {
.n_errors = ESP_ENCRYPT_N_ERROR,
.error_counters = esp_encrypt_error_counters,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (esp6_encrypt_tun_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1291,7 +1353,6 @@ VLIB_NODE_FN (esp6_encrypt_tun_node) (vlib_main_t * vm,
esp_encrypt_async_next.esp6_tun_post_next);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp6_encrypt_tun_node) = {
.name = "esp6-encrypt-tun",
.vector_size = sizeof (u32),
@@ -1313,7 +1374,6 @@ VLIB_REGISTER_NODE (esp6_encrypt_tun_node) = {
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (esp6_encrypt_tun_post_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1322,7 +1382,6 @@ VLIB_NODE_FN (esp6_encrypt_tun_post_node) (vlib_main_t * vm,
return esp_encrypt_post_inline (vm, node, from_frame);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp6_encrypt_tun_post_node) = {
.name = "esp6-encrypt-tun-post",
.vector_size = sizeof (u32),
@@ -1333,7 +1392,6 @@ VLIB_REGISTER_NODE (esp6_encrypt_tun_post_node) = {
.n_errors = ESP_ENCRYPT_N_ERROR,
.error_counters = esp_encrypt_error_counters,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (esp_mpls_encrypt_tun_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
diff --git a/src/vnet/ipsec/ipsec.api b/src/vnet/ipsec/ipsec.api
index 401564bd39b..68efe8f50f7 100644
--- a/src/vnet/ipsec/ipsec.api
+++ b/src/vnet/ipsec/ipsec.api
@@ -96,6 +96,7 @@ define ipsec_spd_entry_add_del_v2
*/
define ipsec_spd_entry_add_del_reply
{
+ option deprecated;
u32 context;
i32 retval;
u32 stat_index;
@@ -166,6 +167,7 @@ define ipsec_spd_details {
define ipsec_sad_entry_add_del
{
option deprecated;
+
u32 client_index;
u32 context;
bool is_add;
@@ -174,6 +176,8 @@ define ipsec_sad_entry_add_del
define ipsec_sad_entry_add_del_v2
{
+ option deprecated;
+
u32 client_index;
u32 context;
bool is_add;
@@ -187,12 +191,21 @@ define ipsec_sad_entry_add_del_v3
bool is_add;
vl_api_ipsec_sad_entry_v3_t entry;
};
+
define ipsec_sad_entry_add
{
u32 client_index;
u32 context;
vl_api_ipsec_sad_entry_v3_t entry;
};
+
+define ipsec_sad_entry_add_v2
+{
+ u32 client_index;
+ u32 context;
+ vl_api_ipsec_sad_entry_v4_t entry;
+};
+
autoreply define ipsec_sad_entry_del
{
u32 client_index;
@@ -200,9 +213,55 @@ autoreply define ipsec_sad_entry_del
u32 id;
};
+
+/** \brief An API to bind an SAD entry to a specific worker
+
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sa_id - the id of the SA to bind
+ @param worker - the worker's index to which the SA will be bound to
+ */
+autoreply define ipsec_sad_bind
+{
+ u32 client_index;
+ u32 context;
+ u32 sa_id;
+ u32 worker;
+};
+
+autoreply define ipsec_sad_unbind
+{
+ u32 client_index;
+ u32 context;
+ u32 sa_id;
+};
+
+/** \brief An API to update the tunnel parameters and the ports associated with an SA
+
+ Used in the NAT-T case when the NAT data changes
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sa_id - the id of the SA to update
+ @param is_tun - update the tunnel if non-zero, else update only the ports
+ @param tunnel - sender context, to match reply w/ request
+ @param udp_src_port - new src port for NAT-T. Used if different from 0xffff
+ @param udp_dst_port - new dst port for NAT-T. Used if different from 0xffff
+ */
+autoreply define ipsec_sad_entry_update
+{
+ u32 client_index;
+ u32 context;
+ u32 sad_id;
+ bool is_tun;
+ vl_api_tunnel_t tunnel;
+ u16 udp_src_port [default=0xffff];
+ u16 udp_dst_port [default=0xffff];
+};
+
define ipsec_sad_entry_add_del_reply
{
option deprecated;
+
u32 context;
i32 retval;
u32 stat_index;
@@ -210,6 +269,8 @@ define ipsec_sad_entry_add_del_reply
define ipsec_sad_entry_add_del_v2_reply
{
+ option deprecated;
+
u32 context;
i32 retval;
u32 stat_index;
@@ -221,6 +282,7 @@ define ipsec_sad_entry_add_del_v3_reply
i32 retval;
u32 stat_index;
};
+
define ipsec_sad_entry_add_reply
{
u32 context;
@@ -228,6 +290,13 @@ define ipsec_sad_entry_add_reply
u32 stat_index;
};
+define ipsec_sad_entry_add_v2_reply
+{
+ u32 context;
+ i32 retval;
+ u32 stat_index;
+};
+
/** \brief Add or Update Protection for a tunnel with IPSEC
Tunnel protection directly associates an SA with all packets
@@ -391,12 +460,15 @@ define ipsec_itf_details
define ipsec_sa_dump
{
option deprecated;
+
u32 client_index;
u32 context;
u32 sa_id;
};
define ipsec_sa_v2_dump
{
+ option deprecated;
+
u32 client_index;
u32 context;
u32 sa_id;
@@ -407,6 +479,18 @@ define ipsec_sa_v3_dump
u32 context;
u32 sa_id;
};
+define ipsec_sa_v4_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 sa_id;
+};
+define ipsec_sa_v5_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 sa_id;
+};
/** \brief IPsec security association database response
@param context - sender context which was passed in the request
@@ -422,6 +506,7 @@ define ipsec_sa_v3_dump
*/
define ipsec_sa_details {
option deprecated;
+
u32 context;
vl_api_ipsec_sad_entry_t entry;
@@ -434,6 +519,8 @@ define ipsec_sa_details {
u32 stat_index;
};
define ipsec_sa_v2_details {
+ option deprecated;
+
u32 context;
vl_api_ipsec_sad_entry_v2_t entry;
@@ -456,6 +543,28 @@ define ipsec_sa_v3_details {
u32 stat_index;
};
+define ipsec_sa_v4_details {
+ u32 context;
+ vl_api_ipsec_sad_entry_v3_t entry;
+
+ vl_api_interface_index_t sw_if_index;
+ u64 seq_outbound;
+ u64 last_seq_inbound;
+ u64 replay_window;
+ u32 thread_index;
+ u32 stat_index;
+};
+define ipsec_sa_v5_details {
+ u32 context;
+ vl_api_ipsec_sad_entry_v4_t entry;
+
+ vl_api_interface_index_t sw_if_index;
+ u64 seq_outbound;
+ u64 last_seq_inbound;
+ u64 replay_window;
+ u32 thread_index;
+ u32 stat_index;
+};
/** \brief Dump IPsec backends
@param client_index - opaque cookie to identify the sender
@@ -584,6 +693,12 @@ counters esp_decrypt {
units "packets";
description "unsupported payload";
};
+ no_avail_frame {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no available frame (packet dropped)";
+ };
};
counters esp_encrypt {
@@ -641,6 +756,12 @@ counters esp_encrypt {
units "packets";
description "no Encrypting SA (packet dropped)";
};
+ no_avail_frame {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no available frame (packet dropped)";
+ };
};
counters ah_encrypt {
diff --git a/src/vnet/ipsec/ipsec.c b/src/vnet/ipsec/ipsec.c
index 3ea2e4d62df..f8c39c327ed 100644
--- a/src/vnet/ipsec/ipsec.c
+++ b/src/vnet/ipsec/ipsec.c
@@ -275,8 +275,7 @@ ipsec_register_esp_backend (
const char *esp6_decrypt_node_name, const char *esp6_decrypt_tun_node_name,
const char *esp_mpls_encrypt_node_tun_name,
check_support_cb_t esp_check_support_cb,
- add_del_sa_sess_cb_t esp_add_del_sa_sess_cb,
- enable_disable_cb_t enable_disable_cb)
+ add_del_sa_sess_cb_t esp_add_del_sa_sess_cb)
{
ipsec_esp_backend_t *b;
@@ -307,7 +306,6 @@ ipsec_register_esp_backend (
b->check_support_cb = esp_check_support_cb;
b->add_del_sa_sess_cb = esp_add_del_sa_sess_cb;
- b->enable_disable_cb = enable_disable_cb;
return b - im->esp_backends;
}
@@ -358,18 +356,6 @@ ipsec_select_esp_backend (ipsec_main_t * im, u32 backend_idx)
if (pool_is_free_index (im->esp_backends, backend_idx))
return VNET_API_ERROR_INVALID_VALUE;
- /* disable current backend */
- if (im->esp_current_backend != ~0)
- {
- ipsec_esp_backend_t *cb = pool_elt_at_index (im->esp_backends,
- im->esp_current_backend);
- if (cb->enable_disable_cb)
- {
- if ((cb->enable_disable_cb) (0) != 0)
- return -1;
- }
- }
-
ipsec_esp_backend_t *b = pool_elt_at_index (im->esp_backends, backend_idx);
im->esp_current_backend = backend_idx;
im->esp4_encrypt_node_index = b->esp4_encrypt_node_index;
@@ -388,11 +374,6 @@ ipsec_select_esp_backend (ipsec_main_t * im, u32 backend_idx)
im->esp6_encrypt_tun_node_index = b->esp6_encrypt_tun_node_index;
im->esp_mpls_encrypt_tun_node_index = b->esp_mpls_encrypt_tun_node_index;
- if (b->enable_disable_cb)
- {
- if ((b->enable_disable_cb) (1) != 0)
- return -1;
- }
return 0;
}
@@ -402,16 +383,11 @@ ipsec_set_async_mode (u32 is_enabled)
ipsec_main_t *im = &ipsec_main;
ipsec_sa_t *sa;
- vnet_crypto_request_async_mode (is_enabled);
-
im->async_mode = is_enabled;
/* change SA crypto op data */
pool_foreach (sa, ipsec_sa_pool)
- {
- sa->crypto_op_data =
- (is_enabled ? sa->async_op_data.data : sa->sync_op_data.data);
- }
+ ipsec_sa_set_async_mode (sa, is_enabled);
}
static void
@@ -485,7 +461,7 @@ ipsec_init (vlib_main_t * vm)
vm, im, "crypto engine backend", "esp4-encrypt", "esp4-encrypt-tun",
"esp4-decrypt", "esp4-decrypt-tun", "esp6-encrypt", "esp6-encrypt-tun",
"esp6-decrypt", "esp6-decrypt-tun", "esp-mpls-encrypt-tun",
- ipsec_check_esp_support, NULL, crypto_dispatch_enable_disable);
+ ipsec_check_esp_support, NULL);
im->esp_default_backend = idx;
rv = ipsec_select_esp_backend (im, idx);
@@ -586,6 +562,30 @@ ipsec_init (vlib_main_t * vm)
a->iv_size = 8;
a->icv_size = 16;
+ a = im->crypto_algs + IPSEC_CRYPTO_ALG_AES_NULL_GMAC_128;
+ a->enc_op_id = VNET_CRYPTO_OP_AES_128_NULL_GMAC_ENC;
+ a->dec_op_id = VNET_CRYPTO_OP_AES_128_NULL_GMAC_DEC;
+ a->alg = VNET_CRYPTO_ALG_AES_128_GCM;
+ a->iv_size = 8;
+ a->block_align = 1;
+ a->icv_size = 16;
+
+ a = im->crypto_algs + IPSEC_CRYPTO_ALG_AES_NULL_GMAC_192;
+ a->enc_op_id = VNET_CRYPTO_OP_AES_192_NULL_GMAC_ENC;
+ a->dec_op_id = VNET_CRYPTO_OP_AES_192_NULL_GMAC_DEC;
+ a->alg = VNET_CRYPTO_ALG_AES_192_GCM;
+ a->iv_size = 8;
+ a->block_align = 1;
+ a->icv_size = 16;
+
+ a = im->crypto_algs + IPSEC_CRYPTO_ALG_AES_NULL_GMAC_256;
+ a->enc_op_id = VNET_CRYPTO_OP_AES_256_NULL_GMAC_ENC;
+ a->dec_op_id = VNET_CRYPTO_OP_AES_256_NULL_GMAC_DEC;
+ a->alg = VNET_CRYPTO_ALG_AES_256_GCM;
+ a->iv_size = 8;
+ a->block_align = 1;
+ a->icv_size = 16;
+
vec_validate (im->integ_algs, IPSEC_INTEG_N_ALG - 1);
ipsec_main_integ_alg_t *i;
diff --git a/src/vnet/ipsec/ipsec.h b/src/vnet/ipsec/ipsec.h
index 69aa661683a..4aa09d7560e 100644
--- a/src/vnet/ipsec/ipsec.h
+++ b/src/vnet/ipsec/ipsec.h
@@ -93,8 +93,6 @@ typedef struct
add_del_sa_sess_cb_t add_del_sa_sess_cb;
/* check support function */
check_support_cb_t check_support_cb;
- /* enable or disable function */
- enable_disable_cb_t enable_disable_cb;
u32 esp4_encrypt_node_index;
u32 esp4_decrypt_node_index;
u32 esp4_encrypt_next_index;
@@ -347,6 +345,23 @@ ipsec_spinlock_unlock (i32 *lock)
clib_atomic_release (lock);
}
+/* Special case to drop or hand off packets for sync/async modes.
+ *
+ * Different than sync mode, async mode only enqueue drop or hand-off packets
+ * to next nodes.
+ */
+always_inline void
+ipsec_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node,
+ u32 thread_index, u32 err, u32 ipsec_sa_err, u16 index,
+ u16 *nexts, u16 drop_next, u32 sa_index)
+{
+ nexts[index] = drop_next;
+ b->error = node->errors[err];
+ if (PREDICT_TRUE (ipsec_sa_err != ~0))
+ vlib_increment_simple_counter (&ipsec_sa_err_counters[ipsec_sa_err],
+ thread_index, sa_index, 1);
+}
+
u32 ipsec_register_ah_backend (vlib_main_t * vm, ipsec_main_t * im,
const char *name,
const char *ah4_encrypt_node_name,
@@ -364,8 +379,7 @@ u32 ipsec_register_esp_backend (
const char *esp6_decrypt_node_name, const char *esp6_decrypt_tun_node_name,
const char *esp_mpls_encrypt_tun_node_name,
check_support_cb_t esp_check_support_cb,
- add_del_sa_sess_cb_t esp_add_del_sa_sess_cb,
- enable_disable_cb_t enable_disable_cb);
+ add_del_sa_sess_cb_t esp_add_del_sa_sess_cb);
int ipsec_select_ah_backend (ipsec_main_t * im, u32 ah_backend_idx);
int ipsec_select_esp_backend (ipsec_main_t * im, u32 esp_backend_idx);
diff --git a/src/vnet/ipsec/ipsec_api.c b/src/vnet/ipsec/ipsec_api.c
index 767dc82dca7..21216b1a614 100644
--- a/src/vnet/ipsec/ipsec_api.c
+++ b/src/vnet/ipsec/ipsec_api.c
@@ -150,12 +150,10 @@ send_ipsec_tunnel_protect_details (index_t itpi, void *arg)
sa = ipsec_sa_get (itp->itp_out_sa);
mp->tun.sa_out = htonl (sa->id);
mp->tun.n_sa_in = itp->itp_n_sa_in;
- /* *INDENT-OFF* */
FOR_EACH_IPSEC_PROTECT_INPUT_SA(itp, sa,
({
mp->tun.sa_in[ii++] = htonl (sa->id);
}));
- /* *INDENT-ON* */
vl_api_send_msg (ctx->reg, (u8 *) mp);
@@ -264,12 +262,10 @@ static void vl_api_ipsec_spd_entry_add_del_t_handler
goto out;
out:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IPSEC_SPD_ENTRY_ADD_DEL_REPLY,
({
rmp->stat_index = ntohl(stat_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -327,7 +323,7 @@ vl_api_ipsec_spd_entry_add_del_v2_t_handler (
goto out;
out:
- REPLY_MACRO2 (VL_API_IPSEC_SPD_ENTRY_ADD_DEL_REPLY,
+ REPLY_MACRO2 (VL_API_IPSEC_SPD_ENTRY_ADD_DEL_V2_REPLY,
({ rmp->stat_index = ntohl (stat_index); }));
}
@@ -382,18 +378,16 @@ static void vl_api_ipsec_sad_entry_add_del_t_handler
ip_address_decode2 (&mp->entry.tunnel_src, &tun.t_src);
ip_address_decode2 (&mp->entry.tunnel_dst, &tun.t_dst);
- rv = ipsec_sa_add_and_lock (id, spi, proto, crypto_alg, &crypto_key,
- integ_alg, &integ_key, flags, mp->entry.salt,
- htons (mp->entry.udp_src_port),
- htons (mp->entry.udp_dst_port), &tun, &sa_index);
+ rv = ipsec_sa_add_and_lock (
+ id, spi, proto, crypto_alg, &crypto_key, integ_alg, &integ_key, flags,
+ mp->entry.salt, htons (mp->entry.udp_src_port),
+ htons (mp->entry.udp_dst_port), 0, &tun, &sa_index);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IPSEC_SAD_ENTRY_ADD_DEL_REPLY,
{
rmp->stat_index = htonl (sa_index);
});
- /* *INDENT-ON* */
}
static void vl_api_ipsec_sad_entry_add_del_v2_t_handler
@@ -456,18 +450,16 @@ static void vl_api_ipsec_sad_entry_add_del_v2_t_handler
ip_address_decode2 (&mp->entry.tunnel_src, &tun.t_src);
ip_address_decode2 (&mp->entry.tunnel_dst, &tun.t_dst);
- rv = ipsec_sa_add_and_lock (
- id, spi, proto, crypto_alg, &crypto_key, integ_alg, &integ_key, flags,
- mp->entry.salt, htons (mp->entry.udp_src_port),
- htons (mp->entry.udp_dst_port), &tun, &sa_index);
+ rv = ipsec_sa_add_and_lock (
+ id, spi, proto, crypto_alg, &crypto_key, integ_alg, &integ_key, flags,
+ mp->entry.salt, htons (mp->entry.udp_src_port),
+ htons (mp->entry.udp_dst_port), 0, &tun, &sa_index);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IPSEC_SAD_ENTRY_ADD_DEL_V2_REPLY,
{
rmp->stat_index = htonl (sa_index);
});
- /* *INDENT-ON* */
}
static int
@@ -514,10 +506,10 @@ ipsec_sad_entry_add_v3 (const vl_api_ipsec_sad_entry_v3_t *entry,
ipsec_key_decode (&entry->crypto_key, &crypto_key);
ipsec_key_decode (&entry->integrity_key, &integ_key);
- return ipsec_sa_add_and_lock (id, spi, proto, crypto_alg, &crypto_key,
- integ_alg, &integ_key, flags, entry->salt,
- htons (entry->udp_src_port),
- htons (entry->udp_dst_port), &tun, sa_index);
+ return ipsec_sa_add_and_lock (
+ id, spi, proto, crypto_alg, &crypto_key, integ_alg, &integ_key, flags,
+ entry->salt, htons (entry->udp_src_port), htons (entry->udp_dst_port), 0,
+ &tun, sa_index);
}
static void
@@ -543,6 +535,56 @@ vl_api_ipsec_sad_entry_add_del_v3_t_handler (
{ rmp->stat_index = htonl (sa_index); });
}
+static int
+ipsec_sad_entry_add_v4 (const vl_api_ipsec_sad_entry_v4_t *entry,
+ u32 *sa_index)
+{
+ ipsec_key_t crypto_key, integ_key;
+ ipsec_crypto_alg_t crypto_alg;
+ ipsec_integ_alg_t integ_alg;
+ ipsec_protocol_t proto;
+ ipsec_sa_flags_t flags;
+ u32 id, spi;
+ tunnel_t tun = { 0 };
+ int rv;
+
+ id = ntohl (entry->sad_id);
+ spi = ntohl (entry->spi);
+
+ rv = ipsec_proto_decode (entry->protocol, &proto);
+
+ if (rv)
+ return rv;
+
+ rv = ipsec_crypto_algo_decode (entry->crypto_algorithm, &crypto_alg);
+
+ if (rv)
+ return rv;
+
+ rv = ipsec_integ_algo_decode (entry->integrity_algorithm, &integ_alg);
+
+ if (rv)
+ return rv;
+
+ flags = ipsec_sa_flags_decode (entry->flags);
+
+ if (flags & IPSEC_SA_FLAG_IS_TUNNEL)
+ {
+ rv = tunnel_decode (&entry->tunnel, &tun);
+
+ if (rv)
+ return rv;
+ }
+
+ ipsec_key_decode (&entry->crypto_key, &crypto_key);
+ ipsec_key_decode (&entry->integrity_key, &integ_key);
+
+ return ipsec_sa_add_and_lock (
+ id, spi, proto, crypto_alg, &crypto_key, integ_alg, &integ_key, flags,
+ entry->salt, htons (entry->udp_src_port), htons (entry->udp_dst_port),
+ ntohl (entry->anti_replay_window_size), &tun, sa_index);
+}
+
static void
vl_api_ipsec_sad_entry_del_t_handler (vl_api_ipsec_sad_entry_del_t *mp)
{
@@ -568,6 +610,74 @@ vl_api_ipsec_sad_entry_add_t_handler (vl_api_ipsec_sad_entry_add_t *mp)
}
static void
+vl_api_ipsec_sad_entry_add_v2_t_handler (vl_api_ipsec_sad_entry_add_v2_t *mp)
+{
+ vl_api_ipsec_sad_entry_add_reply_t *rmp;
+ u32 sa_index = ~0;
+ int rv;
+
+ rv = ipsec_sad_entry_add_v4 (&mp->entry, &sa_index);
+
+ REPLY_MACRO2 (VL_API_IPSEC_SAD_ENTRY_ADD_V2_REPLY,
+ { rmp->stat_index = htonl (sa_index); });
+}
+
+static void
+vl_api_ipsec_sad_entry_update_t_handler (vl_api_ipsec_sad_entry_update_t *mp)
+{
+ vl_api_ipsec_sad_entry_update_reply_t *rmp;
+ u32 id;
+ tunnel_t tun = { 0 };
+ int rv;
+
+ id = ntohl (mp->sad_id);
+
+ if (mp->is_tun)
+ {
+ rv = tunnel_decode (&mp->tunnel, &tun);
+
+ if (rv)
+ goto out;
+ }
+
+ rv = ipsec_sa_update (id, htons (mp->udp_src_port), htons (mp->udp_dst_port),
+ &tun, mp->is_tun);
+
+out:
+ REPLY_MACRO (VL_API_IPSEC_SAD_ENTRY_UPDATE_REPLY);
+}
+
+static void
+vl_api_ipsec_sad_bind_t_handler (vl_api_ipsec_sad_bind_t *mp)
+{
+ vl_api_ipsec_sad_bind_reply_t *rmp;
+ u32 sa_id;
+ u32 worker;
+ int rv;
+
+ sa_id = ntohl (mp->sa_id);
+ worker = ntohl (mp->worker);
+
+ rv = ipsec_sa_bind (sa_id, worker, true /* bind */);
+
+ REPLY_MACRO (VL_API_IPSEC_SAD_BIND_REPLY);
+}
+
+static void
+vl_api_ipsec_sad_unbind_t_handler (vl_api_ipsec_sad_unbind_t *mp)
+{
+ vl_api_ipsec_sad_unbind_reply_t *rmp;
+ u32 sa_id;
+ int rv;
+
+ sa_id = ntohl (mp->sa_id);
+
+ rv = ipsec_sa_bind (sa_id, ~0, false /* bind */);
+
+ REPLY_MACRO (VL_API_IPSEC_SAD_UNBIND_REPLY);
+}
+
+static void
send_ipsec_spds_details (ipsec_spd_t * spd, vl_api_registration_t * reg,
u32 context)
{
@@ -721,12 +831,10 @@ vl_api_ipsec_spd_interface_dump_t_handler (vl_api_ipsec_spd_interface_dump_t *
if (mp->spd_index_valid)
{
spd_index = ntohl (mp->spd_index);
- /* *INDENT-OFF* */
hash_foreach(k, v, im->spd_index_by_sw_if_index, ({
if (v == spd_index)
send_ipsec_spd_interface_details(reg, v, k, mp->context);
}));
- /* *INDENT-ON* */
}
else
{
@@ -749,12 +857,10 @@ vl_api_ipsec_itf_create_t_handler (vl_api_ipsec_itf_create_t * mp)
if (!rv)
rv = ipsec_itf_create (ntohl (mp->itf.user_instance), mode, &sw_if_index);
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IPSEC_ITF_CREATE_REPLY,
({
rmp->sw_if_index = htonl (sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -898,7 +1004,10 @@ send_ipsec_sa_details (ipsec_sa_t * sa, void *arg)
mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
}
if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
- mp->replay_window = clib_host_to_net_u64 (sa->replay_window);
+ {
+ mp->replay_window =
+ clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa));
+ }
mp->stat_index = clib_host_to_net_u32 (sa->stat_index);
@@ -985,7 +1094,10 @@ send_ipsec_sa_v2_details (ipsec_sa_t * sa, void *arg)
mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
}
if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
- mp->replay_window = clib_host_to_net_u64 (sa->replay_window);
+ {
+ mp->replay_window =
+ clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa));
+ }
mp->stat_index = clib_host_to_net_u32 (sa->stat_index);
@@ -1065,7 +1177,10 @@ send_ipsec_sa_v3_details (ipsec_sa_t *sa, void *arg)
mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
}
if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
- mp->replay_window = clib_host_to_net_u64 (sa->replay_window);
+ {
+ mp->replay_window =
+ clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa));
+ }
mp->stat_index = clib_host_to_net_u32 (sa->stat_index);
@@ -1091,8 +1206,179 @@ vl_api_ipsec_sa_v3_dump_t_handler (vl_api_ipsec_sa_v3_dump_t *mp)
ipsec_sa_walk (send_ipsec_sa_v3_details, &ctx);
}
+static walk_rc_t
+send_ipsec_sa_v4_details (ipsec_sa_t *sa, void *arg)
+{
+ ipsec_dump_walk_ctx_t *ctx = arg;
+ vl_api_ipsec_sa_v4_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ clib_memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_IPSEC_SA_V4_DETAILS);
+ mp->context = ctx->context;
+
+ mp->entry.sad_id = htonl (sa->id);
+ mp->entry.spi = htonl (sa->spi);
+ mp->entry.protocol = ipsec_proto_encode (sa->protocol);
+
+ mp->entry.crypto_algorithm = ipsec_crypto_algo_encode (sa->crypto_alg);
+ ipsec_key_encode (&sa->crypto_key, &mp->entry.crypto_key);
+
+ mp->entry.integrity_algorithm = ipsec_integ_algo_encode (sa->integ_alg);
+ ipsec_key_encode (&sa->integ_key, &mp->entry.integrity_key);
+
+ mp->entry.flags = ipsec_sad_flags_encode (sa);
+ mp->entry.salt = clib_host_to_net_u32 (sa->salt);
+
+ if (ipsec_sa_is_set_IS_PROTECT (sa))
+ {
+ ipsec_sa_dump_match_ctx_t ctx = {
+ .sai = sa - ipsec_sa_pool,
+ .sw_if_index = ~0,
+ };
+ ipsec_tun_protect_walk (ipsec_sa_dump_match_sa, &ctx);
+
+ mp->sw_if_index = htonl (ctx.sw_if_index);
+ }
+ else
+ mp->sw_if_index = ~0;
+
+ if (ipsec_sa_is_set_IS_TUNNEL (sa))
+ tunnel_encode (&sa->tunnel, &mp->entry.tunnel);
+
+ if (ipsec_sa_is_set_UDP_ENCAP (sa))
+ {
+ mp->entry.udp_src_port = sa->udp_hdr.src_port;
+ mp->entry.udp_dst_port = sa->udp_hdr.dst_port;
+ }
+
+ mp->seq_outbound = clib_host_to_net_u64 (((u64) sa->seq));
+ mp->last_seq_inbound = clib_host_to_net_u64 (((u64) sa->seq));
+ if (ipsec_sa_is_set_USE_ESN (sa))
+ {
+ mp->seq_outbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
+ mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
+ }
+ if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
+ {
+ mp->replay_window =
+ clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa));
+ }
+
+ mp->thread_index = clib_host_to_net_u32 (sa->thread_index);
+ mp->stat_index = clib_host_to_net_u32 (sa->stat_index);
+
+ vl_api_send_msg (ctx->reg, (u8 *) mp);
+
+ return (WALK_CONTINUE);
+}
+
+static void
+vl_api_ipsec_sa_v4_dump_t_handler (vl_api_ipsec_sa_v4_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ ipsec_dump_walk_ctx_t ctx = {
+ .reg = reg,
+ .context = mp->context,
+ };
+
+ ipsec_sa_walk (send_ipsec_sa_v4_details, &ctx);
+}
+
+static walk_rc_t
+send_ipsec_sa_v5_details (ipsec_sa_t *sa, void *arg)
+{
+ ipsec_dump_walk_ctx_t *ctx = arg;
+ vl_api_ipsec_sa_v5_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ clib_memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_IPSEC_SA_V5_DETAILS);
+ mp->context = ctx->context;
+
+ mp->entry.sad_id = htonl (sa->id);
+ mp->entry.spi = htonl (sa->spi);
+ mp->entry.protocol = ipsec_proto_encode (sa->protocol);
+
+ mp->entry.crypto_algorithm = ipsec_crypto_algo_encode (sa->crypto_alg);
+ ipsec_key_encode (&sa->crypto_key, &mp->entry.crypto_key);
+
+ mp->entry.integrity_algorithm = ipsec_integ_algo_encode (sa->integ_alg);
+ ipsec_key_encode (&sa->integ_key, &mp->entry.integrity_key);
+
+ mp->entry.flags = ipsec_sad_flags_encode (sa);
+ mp->entry.salt = clib_host_to_net_u32 (sa->salt);
+
+ if (ipsec_sa_is_set_IS_PROTECT (sa))
+ {
+ ipsec_sa_dump_match_ctx_t ctx = {
+ .sai = sa - ipsec_sa_pool,
+ .sw_if_index = ~0,
+ };
+ ipsec_tun_protect_walk (ipsec_sa_dump_match_sa, &ctx);
+
+ mp->sw_if_index = htonl (ctx.sw_if_index);
+ }
+ else
+ mp->sw_if_index = ~0;
+
+ if (ipsec_sa_is_set_IS_TUNNEL (sa))
+ tunnel_encode (&sa->tunnel, &mp->entry.tunnel);
+
+ if (ipsec_sa_is_set_UDP_ENCAP (sa))
+ {
+ mp->entry.udp_src_port = sa->udp_hdr.src_port;
+ mp->entry.udp_dst_port = sa->udp_hdr.dst_port;
+ }
+
+ mp->seq_outbound = clib_host_to_net_u64 (((u64) sa->seq));
+ mp->last_seq_inbound = clib_host_to_net_u64 (((u64) sa->seq));
+ if (ipsec_sa_is_set_USE_ESN (sa))
+ {
+ mp->seq_outbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
+ mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
+ }
+ if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
+ {
+ mp->replay_window =
+ clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa));
+
+ mp->entry.anti_replay_window_size =
+ clib_host_to_net_u32 (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (sa));
+ }
+
+ mp->thread_index = clib_host_to_net_u32 (sa->thread_index);
+ mp->stat_index = clib_host_to_net_u32 (sa->stat_index);
+
+ vl_api_send_msg (ctx->reg, (u8 *) mp);
+
+ return (WALK_CONTINUE);
+}
+
+static void
+vl_api_ipsec_sa_v5_dump_t_handler (vl_api_ipsec_sa_v5_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ ipsec_dump_walk_ctx_t ctx = {
+ .reg = reg,
+ .context = mp->context,
+ };
+
+ ipsec_sa_walk (send_ipsec_sa_v5_details, &ctx);
+}
+
static void
-vl_api_ipsec_backend_dump_t_handler (vl_api_ipsec_backend_dump_t * mp)
+vl_api_ipsec_backend_dump_t_handler (vl_api_ipsec_backend_dump_t *mp)
{
vl_api_registration_t *rp;
ipsec_main_t *im = &ipsec_main;
@@ -1108,7 +1394,6 @@ vl_api_ipsec_backend_dump_t_handler (vl_api_ipsec_backend_dump_t * mp)
ipsec_ah_backend_t *ab;
ipsec_esp_backend_t *eb;
- /* *INDENT-OFF* */
pool_foreach (ab, im->ah_backends) {
vl_api_ipsec_backend_details_t *mp = vl_msg_api_alloc (sizeof (*mp));
clib_memset (mp, 0, sizeof (*mp));
@@ -1133,7 +1418,6 @@ vl_api_ipsec_backend_dump_t_handler (vl_api_ipsec_backend_dump_t * mp)
mp->active = mp->index == im->esp_current_backend ? 1 : 0;
vl_api_send_msg (rp, (u8 *)mp);
}
- /* *INDENT-ON* */
}
static void
diff --git a/src/vnet/ipsec/ipsec_cli.c b/src/vnet/ipsec/ipsec_cli.c
index 8b436b6b805..07d9df8f204 100644
--- a/src/vnet/ipsec/ipsec_cli.c
+++ b/src/vnet/ipsec/ipsec_cli.c
@@ -71,14 +71,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_spd_command, static) = {
.path = "set interface ipsec spd",
.short_help =
"set interface ipsec spd <int> <id>",
.function = set_interface_spd_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
ipsec_sa_add_del_command_fn (vlib_main_t * vm,
@@ -88,6 +86,7 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm,
unformat_input_t _line_input, *line_input = &_line_input;
ipsec_crypto_alg_t crypto_alg;
ipsec_integ_alg_t integ_alg;
+ u32 anti_replay_window_size;
ipsec_protocol_t proto;
ipsec_sa_flags_t flags;
clib_error_t *error;
@@ -105,6 +104,7 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm,
is_add = 0;
flags = IPSEC_SA_FLAG_NONE;
proto = IPSEC_PROTOCOL_ESP;
+ anti_replay_window_size = 0;
integ_alg = IPSEC_INTEG_ALG_NONE;
crypto_alg = IPSEC_CRYPTO_ALG_NONE;
udp_src = udp_dst = IPSEC_UDP_PORT_NONE;
@@ -153,6 +153,9 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm,
udp_src = i;
else if (unformat (line_input, "udp-dst-port %d", &i))
udp_dst = i;
+ else if (unformat (line_input, "anti-replay-size %d",
+ &anti_replay_window_size))
+ flags |= IPSEC_SA_FLAG_USE_ANTI_REPLAY;
else if (unformat (line_input, "inbound"))
flags |= IPSEC_SA_FLAG_IS_INBOUND;
else if (unformat (line_input, "use-anti-replay"))
@@ -184,9 +187,10 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm,
error = clib_error_return (0, "missing spi");
goto done;
}
- rv = ipsec_sa_add_and_lock (id, spi, proto, crypto_alg, &ck, integ_alg,
- &ik, flags, clib_host_to_net_u32 (salt),
- udp_src, udp_dst, &tun, &sai);
+ rv =
+ ipsec_sa_add_and_lock (id, spi, proto, crypto_alg, &ck, integ_alg, &ik,
+ flags, clib_host_to_net_u32 (salt), udp_src,
+ udp_dst, anti_replay_window_size, &tun, &sai);
}
else
{
@@ -202,14 +206,77 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_sa_add_del_command, static) = {
.path = "ipsec sa",
.short_help =
"ipsec sa [add|del]",
.function = ipsec_sa_add_del_command_fn,
};
-/* *INDENT-ON* */
+
+static clib_error_t *
+ipsec_sa_bind_cli (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 id = ~0;
+ u32 worker = ~0;
+ bool bind = 1;
+ int rv;
+ clib_error_t *error = NULL;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "unbind"))
+ bind = 0;
+ else if (id == ~0 && unformat (line_input, "%u", &id))
+ ;
+ else if (unformat (line_input, "%u", &worker))
+ ;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (id == ~0)
+ {
+ error = clib_error_return (0, "please specify SA ID");
+ goto done;
+ }
+
+ if (bind && ~0 == worker)
+ {
+ error = clib_error_return (0, "please specify worker to bind to");
+ goto done;
+ }
+
+ rv = ipsec_sa_bind (id, worker, bind);
+ switch (rv)
+ {
+ case VNET_API_ERROR_INVALID_VALUE:
+ error = clib_error_return (0, "please specify a valid SA ID");
+ break;
+ case VNET_API_ERROR_INVALID_WORKER:
+ error = clib_error_return (0, "please specify a valid worker index");
+ break;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (ipsec_sa_bind_cmd, static) = {
+ .path = "ipsec sa bind",
+ .short_help = "ipsec sa [unbind] <sa-id> <worker>",
+ .function = ipsec_sa_bind_cli,
+};
static clib_error_t *
ipsec_spd_add_del_command_fn (vlib_main_t * vm,
@@ -254,14 +321,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_spd_add_del_command, static) = {
.path = "ipsec spd",
.short_help =
"ipsec spd [add|del] <id>",
.function = ipsec_spd_add_del_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -396,27 +461,23 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_policy_add_del_command, static) = {
.path = "ipsec policy",
.short_help =
"ipsec policy [add|del] spd <id> priority <n> ",
.function = ipsec_policy_add_del_command_fn,
};
-/* *INDENT-ON* */
static void
ipsec_sa_show_all (vlib_main_t * vm, ipsec_main_t * im, u8 detail)
{
u32 sai;
- /* *INDENT-OFF* */
pool_foreach_index (sai, ipsec_sa_pool)
{
vlib_cli_output (vm, "%U", format_ipsec_sa, sai,
(detail ? IPSEC_FORMAT_DETAIL : IPSEC_FORMAT_BRIEF));
}
- /* *INDENT-ON* */
}
static void
@@ -424,7 +485,6 @@ ipsec_spd_show_all (vlib_main_t * vm, ipsec_main_t * im)
{
u32 spdi;
- /* *INDENT-OFF* */
pool_foreach_index (spdi, im->spds) {
vlib_cli_output(vm, "%U", format_ipsec_spd, spdi);
}
@@ -437,7 +497,6 @@ ipsec_spd_show_all (vlib_main_t * vm, ipsec_main_t * im)
{
vlib_cli_output (vm, "%U", format_ipsec_in_spd_flow_cache);
}
- /* *INDENT-ON* */
}
static void
@@ -448,14 +507,12 @@ ipsec_spd_bindings_show_all (vlib_main_t * vm, ipsec_main_t * im)
vlib_cli_output (vm, "SPD Bindings:");
- /* *INDENT-OFF* */
hash_foreach(sw_if_index, spd_id, im->spd_index_by_sw_if_index, ({
spd = pool_elt_at_index (im->spds, spd_id);
vlib_cli_output (vm, " %d -> %U", spd->id,
format_vnet_sw_if_index_name, im->vnet_main,
sw_if_index);
}));
- /* *INDENT-ON* */
}
static walk_rc_t
@@ -489,13 +546,11 @@ show_ipsec_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ipsec_command, static) = {
.path = "show ipsec all",
.short_help = "show ipsec all",
.function = show_ipsec_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_ipsec_sa_command_fn (vlib_main_t * vm,
@@ -540,12 +595,10 @@ clear_ipsec_sa_command_fn (vlib_main_t * vm,
if (~0 == sai)
{
- /* *INDENT-OFF* */
pool_foreach_index (sai, ipsec_sa_pool)
{
ipsec_sa_clear (sai);
}
- /* *INDENT-ON* */
}
else
{
@@ -558,7 +611,6 @@ clear_ipsec_sa_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ipsec_sa_command, static) = {
.path = "show ipsec sa",
.short_help = "show ipsec sa [index]",
@@ -570,7 +622,6 @@ VLIB_CLI_COMMAND (clear_ipsec_sa_command, static) = {
.short_help = "clear ipsec sa [index]",
.function = clear_ipsec_sa_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_ipsec_spd_command_fn (vlib_main_t * vm,
@@ -600,13 +651,11 @@ show_ipsec_spd_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ipsec_spd_command, static) = {
.path = "show ipsec spd",
.short_help = "show ipsec spd [index]",
.function = show_ipsec_spd_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_ipsec_tunnel_command_fn (vlib_main_t * vm,
@@ -618,13 +667,11 @@ show_ipsec_tunnel_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ipsec_tunnel_command, static) = {
.path = "show ipsec tunnel",
.short_help = "show ipsec tunnel",
.function = show_ipsec_tunnel_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
ipsec_show_backends_command_fn (vlib_main_t * vm,
@@ -639,7 +686,6 @@ ipsec_show_backends_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "IPsec AH backends available:");
u8 *s = format (NULL, "%=25s %=25s %=10s\n", "Name", "Index", "Active");
ipsec_ah_backend_t *ab;
- /* *INDENT-OFF* */
pool_foreach (ab, im->ah_backends) {
s = format (s, "%=25s %=25u %=10s\n", ab->name, ab - im->ah_backends,
ab - im->ah_backends == im->ah_current_backend ? "yes" : "no");
@@ -655,13 +701,11 @@ ipsec_show_backends_command_fn (vlib_main_t * vm,
s = format (s, " dec6 %s (next %d)\n", n->name, ab->ah6_decrypt_next_index);
}
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "%v", s);
vec_set_len (s, 0);
vlib_cli_output (vm, "IPsec ESP backends available:");
s = format (s, "%=25s %=25s %=10s\n", "Name", "Index", "Active");
ipsec_esp_backend_t *eb;
- /* *INDENT-OFF* */
pool_foreach (eb, im->esp_backends) {
s = format (s, "%=25s %=25u %=10s\n", eb->name, eb - im->esp_backends,
eb - im->esp_backends == im->esp_current_backend ? "yes"
@@ -678,20 +722,17 @@ ipsec_show_backends_command_fn (vlib_main_t * vm,
s = format (s, " dec6 %s (next %d)\n", n->name, eb->esp6_decrypt_next_index);
}
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "%v", s);
vec_free (s);
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_show_backends_command, static) = {
.path = "show ipsec backends",
.short_help = "show ipsec backends",
.function = ipsec_show_backends_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
ipsec_select_backend_command_fn (vlib_main_t * vm,
@@ -753,14 +794,12 @@ ipsec_select_backend_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_select_backend_command, static) = {
.path = "ipsec select backend",
.short_help = "ipsec select backend <ah|esp> <backend index>",
.function = ipsec_select_backend_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
clear_ipsec_counters_command_fn (vlib_main_t * vm,
@@ -769,18 +808,17 @@ clear_ipsec_counters_command_fn (vlib_main_t * vm,
{
vlib_clear_combined_counters (&ipsec_spd_policy_counters);
vlib_clear_combined_counters (&ipsec_sa_counters);
- vlib_clear_simple_counters (&ipsec_sa_lost_counters);
+ for (int i = 0; i < IPSEC_SA_N_ERRORS; i++)
+ vlib_clear_simple_counters (&ipsec_sa_err_counters[i]);
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_ipsec_counters_command, static) = {
.path = "clear ipsec counters",
.short_help = "clear ipsec counters",
.function = clear_ipsec_counters_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
ipsec_tun_protect_cmd (vlib_main_t * vm,
@@ -830,7 +868,6 @@ ipsec_tun_protect_cmd (vlib_main_t * vm,
/**
* Protect tunnel with IPSEC
*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_tun_protect_cmd_node, static) =
{
.path = "ipsec tunnel protect",
@@ -838,7 +875,6 @@ VLIB_CLI_COMMAND (ipsec_tun_protect_cmd_node, static) =
.short_help = "ipsec tunnel protect <interface> input-sa <SA> output-sa <SA> [add|del]",
// this is not MP safe
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -853,14 +889,12 @@ ipsec_tun_protect_show (vlib_main_t * vm,
/**
* show IPSEC tunnel protection
*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_tun_protect_show_node, static) =
{
.path = "show ipsec protect",
.function = ipsec_tun_protect_show,
.short_help = "show ipsec protect",
};
-/* *INDENT-ON* */
static int
ipsec_tun_protect4_hash_show_one (clib_bihash_kv_8_16_t * kv, void *arg)
@@ -909,14 +943,12 @@ ipsec_tun_protect_hash_show (vlib_main_t * vm,
/**
* show IPSEC tunnel protection hash tables
*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_tun_protect_hash_show_node, static) =
{
.path = "show ipsec protect-hash",
.function = ipsec_tun_protect_hash_show,
.short_help = "show ipsec protect-hash",
};
-/* *INDENT-ON* */
clib_error_t *
ipsec_cli_init (vlib_main_t * vm)
@@ -953,13 +985,11 @@ set_async_mode_command_fn (vlib_main_t * vm, unformat_input_t * input,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_async_mode_command, static) = {
.path = "set ipsec async mode",
.short_help = "set ipsec async mode on|off",
.function = set_async_mode_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ipsec/ipsec_format.c b/src/vnet/ipsec/ipsec_format.c
index 9204b1c090d..e421a0d96b4 100644
--- a/src/vnet/ipsec/ipsec_format.c
+++ b/src/vnet/ipsec/ipsec_format.c
@@ -210,11 +210,145 @@ format_ipsec_policy (u8 *s, va_list *args)
}
u8 *
-format_ipsec_policy_fp (u8 *s, va_list *args)
+format_ipsec_fp_policy (u8 *s, va_list *args)
{
return format_ipsec_policy_with_suffix (s, args, (u8 *) "<fast-path>");
}
+/**
+ * @brief Context when walking the fp bihash table. We need to filter
+ * only those policies that are of given type as we walk the table.
+ */
+typedef struct ipsec_spd_policy_ctx_t_
+{
+ u32 *policies;
+ ipsec_spd_policy_type_t t;
+} ipsec_fp_walk_ctx_t;
+
+static int
+ipsec_fp_table_walk_ip4_cb (clib_bihash_kv_16_8_t *kvp, void *arg)
+{
+ ipsec_fp_walk_ctx_t *ctx = (ipsec_fp_walk_ctx_t *) arg;
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_policy_t *p;
+
+ ipsec_fp_lookup_value_t *val = (ipsec_fp_lookup_value_t *) &kvp->value;
+
+ u32 *policy_id;
+
+ vec_foreach (policy_id, val->fp_policies_ids)
+ {
+ p = pool_elt_at_index (im->policies, *policy_id);
+ if (p->type == ctx->t)
+ vec_add1 (ctx->policies, *policy_id);
+ }
+
+ return BIHASH_WALK_CONTINUE;
+}
+
+static int
+ipsec_fp_table_walk_ip6_cb (clib_bihash_kv_40_8_t *kvp, void *arg)
+{
+ ipsec_fp_walk_ctx_t *ctx = (ipsec_fp_walk_ctx_t *) arg;
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_policy_t *p;
+
+ ipsec_fp_lookup_value_t *val = (ipsec_fp_lookup_value_t *) &kvp->value;
+
+ u32 *policy_id;
+
+ vec_foreach (policy_id, val->fp_policies_ids)
+ {
+ p = pool_elt_at_index (im->policies, *policy_id);
+ if (p->type == ctx->t)
+ vec_add1 (ctx->policies, *policy_id);
+ }
+
+ return BIHASH_WALK_CONTINUE;
+}
+
+u8 *
+format_ipsec_fp_policies (u8 *s, va_list *args)
+{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_spd_t *spd = va_arg (*args, ipsec_spd_t *);
+ ipsec_spd_policy_type_t t = va_arg (*args, ipsec_spd_policy_type_t);
+ u32 *i;
+ ipsec_fp_walk_ctx_t ctx = {
+ .policies = 0,
+ .t = t,
+ };
+
+ u32 ip4_in_lookup_hash_idx = spd->fp_spd.ip4_in_lookup_hash_idx;
+ u32 ip4_out_lookup_hash_idx = spd->fp_spd.ip4_out_lookup_hash_idx;
+ u32 ip6_in_lookup_hash_idx = spd->fp_spd.ip6_in_lookup_hash_idx;
+ u32 ip6_out_lookup_hash_idx = spd->fp_spd.ip6_out_lookup_hash_idx;
+
+ switch (t)
+ {
+ case IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT:
+ case IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS:
+ case IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD:
+ if (INDEX_INVALID != ip4_in_lookup_hash_idx)
+ {
+ clib_bihash_16_8_t *bihash_table = pool_elt_at_index (
+ im->fp_ip4_lookup_hashes_pool, ip4_in_lookup_hash_idx);
+
+ clib_bihash_foreach_key_value_pair_16_8 (
+ bihash_table, ipsec_fp_table_walk_ip4_cb, &ctx);
+ }
+
+ break;
+
+ case IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT:
+ case IPSEC_SPD_POLICY_IP6_INBOUND_BYPASS:
+ case IPSEC_SPD_POLICY_IP6_INBOUND_DISCARD:
+ if (INDEX_INVALID != ip6_in_lookup_hash_idx)
+ {
+ clib_bihash_40_8_t *bihash_table = pool_elt_at_index (
+ im->fp_ip6_lookup_hashes_pool, ip6_in_lookup_hash_idx);
+
+ clib_bihash_foreach_key_value_pair_40_8 (
+ bihash_table, ipsec_fp_table_walk_ip6_cb, &ctx);
+ }
+
+ break;
+ case IPSEC_SPD_POLICY_IP4_OUTBOUND:
+ if (INDEX_INVALID != ip4_out_lookup_hash_idx)
+ {
+ clib_bihash_16_8_t *bihash_table = pool_elt_at_index (
+ im->fp_ip4_lookup_hashes_pool, ip4_out_lookup_hash_idx);
+
+ clib_bihash_foreach_key_value_pair_16_8 (
+ bihash_table, ipsec_fp_table_walk_ip4_cb, &ctx);
+ }
+
+ break;
+ case IPSEC_SPD_POLICY_IP6_OUTBOUND:
+ if (INDEX_INVALID != ip6_out_lookup_hash_idx)
+ {
+ clib_bihash_40_8_t *bihash_table = pool_elt_at_index (
+ im->fp_ip6_lookup_hashes_pool, ip6_out_lookup_hash_idx);
+
+ clib_bihash_foreach_key_value_pair_40_8 (
+ bihash_table, ipsec_fp_table_walk_ip6_cb, &ctx);
+ }
+
+ break;
+ default:
+ break;
+ }
+
+ vec_foreach (i, ctx.policies)
+ {
+ s = format (s, "\n %U", format_ipsec_fp_policy, *i);
+ }
+
+ vec_free (ctx.policies);
+
+ return s;
+}
+
u8 *
format_ipsec_spd (u8 * s, va_list * args)
{
@@ -239,10 +373,7 @@ format_ipsec_spd (u8 * s, va_list * args)
{ \
s = format (s, "\n %U", format_ipsec_policy, *i); \
} \
- vec_foreach (i, spd->fp_spd.fp_policies[IPSEC_SPD_POLICY_##v]) \
- { \
- s = format (s, "\n %U", format_ipsec_policy_fp, *i); \
- }
+ s = format (s, "\n %U", format_ipsec_fp_policies, spd, IPSEC_SPD_POLICY_##v);
foreach_ipsec_spd_policy_type;
#undef _
@@ -313,7 +444,7 @@ format_ipsec_sa (u8 * s, va_list * args)
u32 sai = va_arg (*args, u32);
ipsec_format_flags_t flags = va_arg (*args, ipsec_format_flags_t);
vlib_counter_t counts;
- counter_t lost;
+ counter_t errors;
ipsec_sa_t *sa;
if (pool_is_free_index (ipsec_sa_pool, sai))
@@ -335,16 +466,18 @@ format_ipsec_sa (u8 * s, va_list * args)
s = format (s, "\n salt 0x%x", clib_net_to_host_u32 (sa->salt));
s = format (s, "\n thread-index:%d", sa->thread_index);
s = format (s, "\n seq %u seq-hi %u", sa->seq, sa->seq_hi);
- s = format (s, "\n window %U", format_ipsec_replay_window,
- sa->replay_window);
- s = format (s, "\n crypto alg %U",
- format_ipsec_crypto_alg, sa->crypto_alg);
+ s = format (s, "\n window-size: %llu",
+ IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (sa));
+ s = format (s, "\n window: Bl <- %U Tl", format_ipsec_replay_window,
+ ipsec_sa_anti_replay_get_64b_window (sa));
+ s =
+ format (s, "\n crypto alg %U", format_ipsec_crypto_alg, sa->crypto_alg);
if (sa->crypto_alg && (flags & IPSEC_FORMAT_INSECURE))
s = format (s, " key %U", format_ipsec_key, &sa->crypto_key);
else
s = format (s, " key [redacted]");
- s = format (s, "\n integrity alg %U",
- format_ipsec_integ_alg, sa->integ_alg);
+ s =
+ format (s, "\n integrity alg %U", format_ipsec_integ_alg, sa->integ_alg);
if (sa->integ_alg && (flags & IPSEC_FORMAT_INSECURE))
s = format (s, " key %U", format_ipsec_key, &sa->integ_key);
else
@@ -354,12 +487,17 @@ format_ipsec_sa (u8 * s, va_list * args)
clib_host_to_net_u16 (sa->udp_hdr.dst_port));
vlib_get_combined_counter (&ipsec_sa_counters, sai, &counts);
- lost = vlib_get_simple_counter (&ipsec_sa_lost_counters, sai);
- s = format (s, "\n tx/rx:[packets:%Ld bytes:%Ld], lost:[packets:%Ld]",
- counts.packets, counts.bytes, lost);
+ s = format (s, "\n tx/rx:[packets:%Ld bytes:%Ld]", counts.packets,
+ counts.bytes);
+ s = format (s, "\n SA errors:");
+#define _(index, val, err, desc) \
+ errors = vlib_get_simple_counter (&ipsec_sa_err_counters[index], sai); \
+ s = format (s, "\n " #desc ":[packets:%Ld]", errors);
+ foreach_ipsec_sa_err
+#undef _
- if (ipsec_sa_is_set_IS_TUNNEL (sa))
- s = format (s, "\n%U", format_tunnel, &sa->tunnel, 3);
+ if (ipsec_sa_is_set_IS_TUNNEL (sa)) s =
+ format (s, "\n%U", format_tunnel, &sa->tunnel, 3);
done:
return (s);
@@ -411,12 +549,10 @@ format_ipsec_tun_protect (u8 * s, va_list * args)
IPSEC_FORMAT_BRIEF);
s = format (s, "\n input-sa:");
- /* *INDENT-OFF* */
FOR_EACH_IPSEC_PROTECT_INPUT_SAI(itp, sai,
({
s = format (s, "\n %U", format_ipsec_sa, sai, IPSEC_FORMAT_BRIEF);
}));
- /* *INDENT-ON* */
return (s);
}
diff --git a/src/vnet/ipsec/ipsec_handoff.c b/src/vnet/ipsec/ipsec_handoff.c
index e8daa1a6a23..68a859cf732 100644
--- a/src/vnet/ipsec/ipsec_handoff.c
+++ b/src/vnet/ipsec/ipsec_handoff.c
@@ -259,7 +259,6 @@ VLIB_NODE_FN (ah6_decrypt_handoff) (vlib_main_t * vm,
return ipsec_handoff (vm, node, from_frame, im->ah6_dec_fq_index);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp4_encrypt_handoff) = {
.name = "esp4-encrypt-handoff",
.vector_size = sizeof (u32),
@@ -416,7 +415,6 @@ VLIB_REGISTER_NODE (ah6_decrypt_handoff) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ipsec/ipsec_input.c b/src/vnet/ipsec/ipsec_input.c
index 62723d4ffa8..6ccc0be2622 100644
--- a/src/vnet/ipsec/ipsec_input.c
+++ b/src/vnet/ipsec/ipsec_input.c
@@ -153,24 +153,24 @@ ipsec4_input_spd_find_flow_cache_entry (ipsec_main_t *im, u32 sa, u32 da,
}
always_inline void
-ipsec_fp_in_5tuple_from_ip4_range (ipsec_fp_5tuple_t *tuple, u32 la, u32 ra,
+ipsec_fp_in_5tuple_from_ip4_range (ipsec_fp_5tuple_t *tuple, u32 sa, u32 da,
u32 spi, u8 action)
{
clib_memset (tuple->l3_zero_pad, 0, sizeof (tuple->l3_zero_pad));
- tuple->laddr.as_u32 = la;
- tuple->raddr.as_u32 = ra;
+ tuple->laddr.as_u32 = da;
+ tuple->raddr.as_u32 = sa;
tuple->spi = spi;
tuple->action = action;
tuple->is_ipv6 = 0;
}
always_inline void
-ipsec_fp_in_5tuple_from_ip6_range (ipsec_fp_5tuple_t *tuple, ip6_address_t *la,
- ip6_address_t *ra, u32 spi, u8 action)
+ipsec_fp_in_5tuple_from_ip6_range (ipsec_fp_5tuple_t *tuple, ip6_address_t *sa,
+ ip6_address_t *da, u32 spi, u8 action)
{
- clib_memcpy (&tuple->ip6_laddr, la, sizeof (ip6_address_t));
- clib_memcpy (&tuple->ip6_raddr, ra, sizeof (ip6_address_t));
+ clib_memcpy (&tuple->ip6_laddr, da, sizeof (ip6_address_t));
+ clib_memcpy (&tuple->ip6_raddr, sa, sizeof (ip6_address_t));
tuple->spi = spi;
tuple->action = action;
@@ -273,6 +273,193 @@ ip6_addr_match_range (ip6_address_t * a, ip6_address_t * la,
return 0;
}
+always_inline void
+ipsec_esp_packet_process (vlib_main_t *vm, ipsec_main_t *im, ip4_header_t *ip0,
+ esp_header_t *esp0, u32 thread_index,
+ ipsec_spd_t *spd0, vlib_buffer_t **b,
+ vlib_node_runtime_t *node, u64 *ipsec_bypassed,
+ u64 *ipsec_dropped, u64 *ipsec_matched,
+ u64 *ipsec_unprocessed, u16 *next)
+
+{
+ ipsec_policy_t *p0 = NULL;
+ u32 pi0;
+ u8 has_space0;
+ bool search_flow_cache = false;
+ ipsec_policy_t *policies[1];
+ ipsec_fp_5tuple_t tuples[1];
+ bool ip_v6 = true;
+
+ /* if flow cache is enabled, first search through flow cache for a
+ * policy match for either protect, bypass or discard rules, in that
+ * order. if no match is found search_flow_cache is set to false (1)
+ * and we revert back to linear search
+ */
+
+ search_flow_cache = im->input_flow_cache_flag;
+udp_or_esp:
+
+ /* SPI ID field in the ESP header MUST NOT be a zero value */
+ if (esp0->spi == 0)
+ {
+ /* Drop the packet if SPI ID is zero */
+ *ipsec_unprocessed += 1;
+ next[0] = IPSEC_INPUT_NEXT_DROP;
+ return;
+ }
+
+ if (im->fp_spd_ipv4_in_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip4_in_lookup_hash_idx))
+ {
+ ipsec_fp_in_5tuple_from_ip4_range (&tuples[0], ip0->src_address.as_u32,
+ ip0->dst_address.as_u32,
+ clib_net_to_host_u32 (esp0->spi),
+ IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT);
+ ipsec_fp_in_policy_match_n (&spd0->fp_spd, !ip_v6, tuples, policies, 1);
+ p0 = policies[0];
+ }
+ else if (search_flow_cache) /* attempt to match policy in flow cache */
+ {
+ p0 = ipsec4_input_spd_find_flow_cache_entry (
+ im, ip0->src_address.as_u32, ip0->dst_address.as_u32,
+ IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT);
+ }
+
+ else /* linear search if flow cache is not enabled,
+ or flow cache search just failed */
+ {
+ p0 = ipsec_input_protect_policy_match (
+ spd0, clib_net_to_host_u32 (ip0->src_address.as_u32),
+ clib_net_to_host_u32 (ip0->dst_address.as_u32),
+ clib_net_to_host_u32 (esp0->spi));
+ }
+ has_space0 = vlib_buffer_has_space (b[0], (clib_address_t) (esp0 + 1) -
+ (clib_address_t) ip0);
+
+ if (PREDICT_TRUE ((p0 != NULL) & (has_space0)))
+ {
+ *ipsec_matched += 1;
+
+ pi0 = p0 - im->policies;
+ vlib_increment_combined_counter (&ipsec_spd_policy_counters,
+ thread_index, pi0, 1,
+ clib_net_to_host_u16 (ip0->length));
+
+ vnet_buffer (b[0])->ipsec.sad_index = p0->sa_index;
+ next[0] = im->esp4_decrypt_next_index;
+ vlib_buffer_advance (b[0], ((u8 *) esp0 - (u8 *) ip0));
+ goto trace0;
+ }
+ else
+ {
+ p0 = 0;
+ pi0 = ~0;
+ }
+ if (im->fp_spd_ipv4_in_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip4_in_lookup_hash_idx))
+ {
+ tuples->action = IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS;
+ ipsec_fp_in_policy_match_n (&spd0->fp_spd, !ip_v6, tuples, policies, 1);
+ p0 = policies[0];
+ }
+ else if (search_flow_cache)
+ {
+ p0 = ipsec4_input_spd_find_flow_cache_entry (
+ im, ip0->src_address.as_u32, ip0->dst_address.as_u32,
+ IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS);
+ }
+
+ else
+ {
+ p0 = ipsec_input_policy_match (
+ spd0, clib_net_to_host_u32 (ip0->src_address.as_u32),
+ clib_net_to_host_u32 (ip0->dst_address.as_u32),
+ IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS);
+ }
+
+ if (PREDICT_TRUE ((p0 != NULL)))
+ {
+ *ipsec_bypassed += 1;
+
+ pi0 = p0 - im->policies;
+ vlib_increment_combined_counter (&ipsec_spd_policy_counters,
+ thread_index, pi0, 1,
+ clib_net_to_host_u16 (ip0->length));
+
+ goto trace0;
+ }
+ else
+ {
+ p0 = 0;
+ pi0 = ~0;
+ };
+ if (im->fp_spd_ipv4_in_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip4_in_lookup_hash_idx))
+ {
+ tuples->action = IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD;
+ ipsec_fp_in_policy_match_n (&spd0->fp_spd, !ip_v6, tuples, policies, 1);
+ p0 = policies[0];
+ }
+ else
+
+ if (search_flow_cache)
+ {
+ p0 = ipsec4_input_spd_find_flow_cache_entry (
+ im, ip0->src_address.as_u32, ip0->dst_address.as_u32,
+ IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD);
+ }
+
+ else
+ {
+ p0 = ipsec_input_policy_match (
+ spd0, clib_net_to_host_u32 (ip0->src_address.as_u32),
+ clib_net_to_host_u32 (ip0->dst_address.as_u32),
+ IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD);
+ }
+
+ if (PREDICT_TRUE ((p0 != NULL)))
+ {
+ *ipsec_dropped += 1;
+
+ pi0 = p0 - im->policies;
+ vlib_increment_combined_counter (&ipsec_spd_policy_counters,
+ thread_index, pi0, 1,
+ clib_net_to_host_u16 (ip0->length));
+
+ next[0] = IPSEC_INPUT_NEXT_DROP;
+ goto trace0;
+ }
+ else
+ {
+ p0 = 0;
+ pi0 = ~0;
+ };
+ /* flow cache search failed, try again with linear search */
+ if (search_flow_cache && p0 == NULL)
+ {
+ search_flow_cache = false;
+ goto udp_or_esp;
+ }
+
+ /* Drop by default if no match on PROTECT, BYPASS or DISCARD */
+ *ipsec_unprocessed += 1;
+ next[0] = IPSEC_INPUT_NEXT_DROP;
+
+trace0:
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+ PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ipsec_input_trace_t *tr = vlib_add_trace (vm, node, b[0], sizeof (*tr));
+
+ tr->proto = ip0->protocol;
+ tr->sa_id = p0 ? p0->sa_id : ~0;
+ tr->spi = has_space0 ? clib_net_to_host_u32 (esp0->spi) : ~0;
+ tr->seq = has_space0 ? clib_net_to_host_u32 (esp0->seq) : ~0;
+ tr->spd = spd0->id;
+ tr->policy_index = pi0;
+ }
+}
+
always_inline ipsec_policy_t *
ipsec6_input_protect_policy_match (ipsec_spd_t * spd,
ip6_address_t * sa,
@@ -345,9 +532,6 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm,
ipsec_policy_t *p0 = NULL;
u8 has_space0;
bool search_flow_cache = false;
- ipsec_policy_t *policies[1];
- ipsec_fp_5tuple_t tuples[1];
- bool ip_v6 = true;
if (n_left_from > 2)
{
@@ -363,189 +547,37 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm,
ip0 = vlib_buffer_get_current (b[0]);
- if (PREDICT_TRUE
- (ip0->protocol == IP_PROTOCOL_IPSEC_ESP
- || ip0->protocol == IP_PROTOCOL_UDP))
+ if (ip0->protocol == IP_PROTOCOL_UDP)
{
+ udp_header_t *udp0 = NULL;
+ udp0 = (udp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0));
- esp0 = (esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0));
- if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_UDP))
- {
- /* FIXME Skip, if not a UDP encapsulated packet */
- esp0 = (esp_header_t *) ((u8 *) esp0 + sizeof (udp_header_t));
- }
-
- // if flow cache is enabled, first search through flow cache for a
- // policy match for either protect, bypass or discard rules, in that
- // order. if no match is found search_flow_cache is set to false (1)
- // and we revert back to linear search
- search_flow_cache = im->input_flow_cache_flag;
-
- esp_or_udp:
- if (im->fp_spd_ipv4_in_is_enabled &&
- PREDICT_TRUE (INDEX_INVALID !=
- spd0->fp_spd.ip4_in_lookup_hash_idx))
- {
- ipsec_fp_in_5tuple_from_ip4_range (
- &tuples[0], ip0->src_address.as_u32, ip0->dst_address.as_u32,
- clib_net_to_host_u32 (esp0->spi),
- IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT);
- ipsec_fp_in_policy_match_n (&spd0->fp_spd, !ip_v6, tuples,
- policies, 1);
- p0 = policies[0];
- }
- else if (search_flow_cache) // attempt to match policy in flow cache
- {
- p0 = ipsec4_input_spd_find_flow_cache_entry (
- im, ip0->src_address.as_u32, ip0->dst_address.as_u32,
- IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT);
- }
-
- else // linear search if flow cache is not enabled,
- // or flow cache search just failed
- {
- p0 = ipsec_input_protect_policy_match (
- spd0, clib_net_to_host_u32 (ip0->src_address.as_u32),
- clib_net_to_host_u32 (ip0->dst_address.as_u32),
- clib_net_to_host_u32 (esp0->spi));
- }
-
- has_space0 =
- vlib_buffer_has_space (b[0],
- (clib_address_t) (esp0 + 1) -
- (clib_address_t) ip0);
-
- if (PREDICT_TRUE ((p0 != NULL) & (has_space0)))
- {
- ipsec_matched += 1;
-
- pi0 = p0 - im->policies;
- vlib_increment_combined_counter
- (&ipsec_spd_policy_counters,
- thread_index, pi0, 1, clib_net_to_host_u16 (ip0->length));
-
- vnet_buffer (b[0])->ipsec.sad_index = p0->sa_index;
- next[0] = im->esp4_decrypt_next_index;
- vlib_buffer_advance (b[0], ((u8 *) esp0 - (u8 *) ip0));
- goto trace0;
- }
- else
- {
- p0 = 0;
- pi0 = ~0;
- };
-
- if (im->fp_spd_ipv4_in_is_enabled &&
- PREDICT_TRUE (INDEX_INVALID !=
- spd0->fp_spd.ip4_in_lookup_hash_idx))
- {
- tuples->action = IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS;
- ipsec_fp_in_policy_match_n (&spd0->fp_spd, !ip_v6, tuples,
- policies, 1);
- p0 = policies[0];
- }
- else if (search_flow_cache)
- {
- p0 = ipsec4_input_spd_find_flow_cache_entry (
- im, ip0->src_address.as_u32, ip0->dst_address.as_u32,
- IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS);
- }
-
- else
- {
- p0 = ipsec_input_policy_match (
- spd0, clib_net_to_host_u32 (ip0->src_address.as_u32),
- clib_net_to_host_u32 (ip0->dst_address.as_u32),
- IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS);
- }
-
- if (PREDICT_TRUE ((p0 != NULL)))
- {
- ipsec_bypassed += 1;
-
- pi0 = p0 - im->policies;
- vlib_increment_combined_counter (
- &ipsec_spd_policy_counters, thread_index, pi0, 1,
- clib_net_to_host_u16 (ip0->length));
-
- goto trace0;
- }
- else
- {
- p0 = 0;
- pi0 = ~0;
- };
-
- if (im->fp_spd_ipv4_in_is_enabled &&
- PREDICT_TRUE (INDEX_INVALID !=
- spd0->fp_spd.ip4_in_lookup_hash_idx))
- {
- tuples->action = IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD;
- ipsec_fp_in_policy_match_n (&spd0->fp_spd, !ip_v6, tuples,
- policies, 1);
- p0 = policies[0];
- }
- else
-
- if (search_flow_cache)
- {
- p0 = ipsec4_input_spd_find_flow_cache_entry (
- im, ip0->src_address.as_u32, ip0->dst_address.as_u32,
- IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD);
- }
-
- else
- {
- p0 = ipsec_input_policy_match (
- spd0, clib_net_to_host_u32 (ip0->src_address.as_u32),
- clib_net_to_host_u32 (ip0->dst_address.as_u32),
- IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD);
- }
-
- if (PREDICT_TRUE ((p0 != NULL)))
- {
- ipsec_dropped += 1;
-
- pi0 = p0 - im->policies;
- vlib_increment_combined_counter (
- &ipsec_spd_policy_counters, thread_index, pi0, 1,
- clib_net_to_host_u16 (ip0->length));
-
- next[0] = IPSEC_INPUT_NEXT_DROP;
- goto trace0;
- }
- else
- {
- p0 = 0;
- pi0 = ~0;
- };
-
- // flow cache search failed, try again with linear search
- if (search_flow_cache && p0 == NULL)
- {
- search_flow_cache = false;
- goto esp_or_udp;
- }
-
- /* Drop by default if no match on PROTECT, BYPASS or DISCARD */
- ipsec_unprocessed += 1;
- next[0] = IPSEC_INPUT_NEXT_DROP;
+ /* As per rfc3948 in UDP Encapsulated Header, UDP checksum must be
+ * Zero, and receivers must not depen upon UPD checksum.
+ * inside ESP header , SPI ID value MUST NOT be a zero value
+ * */
- trace0:
- if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
- PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
+ if (udp0->checksum == 0)
{
- ipsec_input_trace_t *tr =
- vlib_add_trace (vm, node, b[0], sizeof (*tr));
+ esp0 = (esp_header_t *) ((u8 *) udp0 + sizeof (udp_header_t));
- tr->proto = ip0->protocol;
- tr->sa_id = p0 ? p0->sa_id : ~0;
- tr->spi = has_space0 ? clib_net_to_host_u32 (esp0->spi) : ~0;
- tr->seq = has_space0 ? clib_net_to_host_u32 (esp0->seq) : ~0;
- tr->spd = spd0->id;
- tr->policy_index = pi0;
+ ipsec_esp_packet_process (vm, im, ip0, esp0, thread_index, spd0,
+ b, node, &ipsec_bypassed,
+ &ipsec_dropped, &ipsec_matched,
+ &ipsec_unprocessed, next);
+ if (ipsec_bypassed > 0)
+ goto ipsec_bypassed;
}
}
+ else if (PREDICT_TRUE (ip0->protocol == IP_PROTOCOL_IPSEC_ESP))
+ {
+ esp0 = (esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0));
+ ipsec_esp_packet_process (vm, im, ip0, esp0, thread_index, spd0, b,
+ node, &ipsec_bypassed, &ipsec_dropped,
+ &ipsec_matched, &ipsec_unprocessed, next);
+ if (ipsec_bypassed > 0)
+ goto ipsec_bypassed;
+ }
else if (ip0->protocol == IP_PROTOCOL_IPSEC_AH)
{
ah0 = (ah_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0));
@@ -687,6 +719,7 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm,
}
else
{
+ ipsec_bypassed:
ipsec_unprocessed += 1;
}
n_left_from -= 1;
@@ -718,8 +751,6 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ipsec4_input_node) = {
.name = "ipsec4-input-feature",
.vector_size = sizeof (u32),
@@ -734,7 +765,6 @@ VLIB_REGISTER_NODE (ipsec4_input_node) = {
#undef _
},
};
-/* *INDENT-ON* */
extern vlib_node_registration_t ipsec6_input_node;
@@ -916,7 +946,6 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ipsec6_input_node) = {
.name = "ipsec6-input-feature",
.vector_size = sizeof (u32),
@@ -931,7 +960,6 @@ VLIB_REGISTER_NODE (ipsec6_input_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ipsec/ipsec_itf.c b/src/vnet/ipsec/ipsec_itf.c
index f9c1d77a37d..b86bf6a110c 100644
--- a/src/vnet/ipsec/ipsec_itf.c
+++ b/src/vnet/ipsec/ipsec_itf.c
@@ -188,7 +188,6 @@ ipsec_itf_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
(ai, NULL, NULL, ADJ_FLAG_MIDCHAIN_IP_STACK, ipsec_itf_build_rewrite ());
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (ipsec_itf_device_class) = {
.name = "IPSEC Tunnel",
.format_device_name = format_ipsec_itf_name,
@@ -208,7 +207,6 @@ VNET_HW_INTERFACE_CLASS(ipsec_p2mp_hw_interface_class) = {
.update_adjacency = ipsec_itf_update_adj,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_NBMA,
};
-/* *INDENT-ON* */
/*
* Maintain a bitmap of allocated ipsec_itf instance numbers.
@@ -383,6 +381,7 @@ ipsec_itf_create_cli (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
unformat_input_t _line_input, *line_input = &_line_input;
+ tunnel_mode_t mode = TUNNEL_MODE_P2P;
u32 instance, sw_if_index;
clib_error_t *error;
mac_address_t mac;
@@ -398,6 +397,8 @@ ipsec_itf_create_cli (vlib_main_t * vm,
{
if (unformat (line_input, "instance %d", &instance))
;
+ else if (unformat (line_input, "p2mp"))
+ mode = TUNNEL_MODE_MP;
else
{
error = clib_error_return (0, "unknown input: %U",
@@ -412,7 +413,7 @@ ipsec_itf_create_cli (vlib_main_t * vm,
return error;
}
- rv = ipsec_itf_create (instance, TUNNEL_MODE_P2P, &sw_if_index);
+ rv = ipsec_itf_create (instance, mode, &sw_if_index);
if (rv)
return clib_error_return (0, "iPSec interface create failed");
@@ -427,17 +428,15 @@ ipsec_itf_create_cli (vlib_main_t * vm,
*
* @cliexpar
* The following two command syntaxes are equivalent:
- * @cliexcmd{ipsec itf create [instance <instance>]}
+ * @cliexcmd{ipsec itf create [instance <instance>] [p2mp]}
* Example of how to create a ipsec interface:
* @cliexcmd{ipsec itf create}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_itf_create_command, static) = {
.path = "ipsec itf create",
- .short_help = "ipsec itf create [instance <instance>]",
+ .short_help = "ipsec itf create [instance <instance>] [p2mp]",
.function = ipsec_itf_create_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
ipsec_itf_delete_cli (vlib_main_t * vm,
@@ -482,13 +481,11 @@ ipsec_itf_delete_cli (vlib_main_t * vm,
* Example of how to create a ipsec_itf interface:
* @cliexcmd{ipsec itf delete ipsec0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_itf_delete_command, static) = {
.path = "ipsec itf delete",
.short_help = "ipsec itf delete <interface>",
.function = ipsec_itf_delete_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
ipsec_interface_show (vlib_main_t * vm,
@@ -496,12 +493,10 @@ ipsec_interface_show (vlib_main_t * vm,
{
index_t ii;
- /* *INDENT-OFF* */
pool_foreach_index (ii, ipsec_itf_pool)
{
vlib_cli_output (vm, "%U", format_ipsec_itf, ii);
}
- /* *INDENT-ON* */
return NULL;
}
@@ -509,14 +504,12 @@ ipsec_interface_show (vlib_main_t * vm,
/**
* show IPSEC tunnel protection hash tables
*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_interface_show_node, static) =
{
.path = "show ipsec interface",
.function = ipsec_interface_show,
.short_help = "show ipsec interface",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ipsec/ipsec_output.c b/src/vnet/ipsec/ipsec_output.c
index 028d9761c07..787da9359e0 100644
--- a/src/vnet/ipsec/ipsec_output.c
+++ b/src/vnet/ipsec/ipsec_output.c
@@ -335,7 +335,6 @@ VLIB_NODE_FN (ipsec4_output_node) (vlib_main_t * vm,
return ipsec_output_inline (vm, node, frame, 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ipsec4_output_node) = {
.name = "ipsec4-output-feature",
.vector_size = sizeof (u32),
@@ -352,7 +351,6 @@ VLIB_REGISTER_NODE (ipsec4_output_node) = {
#undef _
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ipsec6_output_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
diff --git a/src/vnet/ipsec/ipsec_sa.c b/src/vnet/ipsec/ipsec_sa.c
index a330abcb244..1d5195ec793 100644
--- a/src/vnet/ipsec/ipsec_sa.c
+++ b/src/vnet/ipsec/ipsec_sa.c
@@ -13,12 +13,14 @@
* limitations under the License.
*/
+#include <sys/random.h>
#include <vnet/ipsec/ipsec.h>
#include <vnet/ipsec/esp.h>
#include <vnet/udp/udp_local.h>
#include <vnet/fib/fib_table.h>
#include <vnet/fib/fib_entry_track.h>
#include <vnet/ipsec/ipsec_tun.h>
+#include <vnet/ipsec/ipsec.api_enum.h>
/**
* @brief
@@ -28,10 +30,8 @@ vlib_combined_counter_main_t ipsec_sa_counters = {
.name = "SA",
.stat_segment_name = "/net/ipsec/sa",
};
-vlib_simple_counter_main_t ipsec_sa_lost_counters = {
- .name = "SA-lost",
- .stat_segment_name = "/net/ipsec/sa/lost",
-};
+/* Per-SA error counters */
+vlib_simple_counter_main_t ipsec_sa_err_counters[IPSEC_SA_N_ERRORS];
ipsec_sa_t *ipsec_sa_pool;
@@ -93,14 +93,35 @@ ipsec_sa_stack (ipsec_sa_t * sa)
}
void
+ipsec_sa_set_async_mode (ipsec_sa_t *sa, int is_enabled)
+{
+ if (is_enabled)
+ {
+ sa->crypto_key_index = sa->crypto_async_key_index;
+ sa->crypto_enc_op_id = sa->crypto_async_enc_op_id;
+ sa->crypto_dec_op_id = sa->crypto_async_dec_op_id;
+ sa->integ_key_index = ~0;
+ sa->integ_op_id = ~0;
+ }
+ else
+ {
+ sa->crypto_key_index = sa->crypto_sync_key_index;
+ sa->crypto_enc_op_id = sa->crypto_sync_enc_op_id;
+ sa->crypto_dec_op_id = sa->crypto_sync_dec_op_id;
+ sa->integ_key_index = sa->integ_sync_key_index;
+ sa->integ_op_id = sa->integ_sync_op_id;
+ }
+}
+
+void
ipsec_sa_set_crypto_alg (ipsec_sa_t * sa, ipsec_crypto_alg_t crypto_alg)
{
ipsec_main_t *im = &ipsec_main;
sa->crypto_alg = crypto_alg;
sa->crypto_iv_size = im->crypto_algs[crypto_alg].iv_size;
sa->esp_block_align = clib_max (4, im->crypto_algs[crypto_alg].block_align);
- sa->sync_op_data.crypto_enc_op_id = im->crypto_algs[crypto_alg].enc_op_id;
- sa->sync_op_data.crypto_dec_op_id = im->crypto_algs[crypto_alg].dec_op_id;
+ sa->crypto_sync_enc_op_id = im->crypto_algs[crypto_alg].enc_op_id;
+ sa->crypto_sync_dec_op_id = im->crypto_algs[crypto_alg].dec_op_id;
sa->crypto_calg = im->crypto_algs[crypto_alg].alg;
ASSERT (sa->crypto_iv_size <= ESP_MAX_IV_SIZE);
ASSERT (sa->esp_block_align <= ESP_MAX_BLOCK_SIZE);
@@ -115,6 +136,13 @@ ipsec_sa_set_crypto_alg (ipsec_sa_t * sa, ipsec_crypto_alg_t crypto_alg)
{
ipsec_sa_set_IS_CTR (sa);
}
+ else if (IPSEC_CRYPTO_ALG_IS_NULL_GMAC (crypto_alg))
+ {
+ sa->integ_icv_size = im->crypto_algs[crypto_alg].icv_size;
+ ipsec_sa_set_IS_CTR (sa);
+ ipsec_sa_set_IS_AEAD (sa);
+ ipsec_sa_set_IS_NULL_GMAC (sa);
+ }
}
void
@@ -123,7 +151,7 @@ ipsec_sa_set_integ_alg (ipsec_sa_t * sa, ipsec_integ_alg_t integ_alg)
ipsec_main_t *im = &ipsec_main;
sa->integ_alg = integ_alg;
sa->integ_icv_size = im->integ_algs[integ_alg].icv_size;
- sa->sync_op_data.integ_op_id = im->integ_algs[integ_alg].op_id;
+ sa->integ_sync_op_id = im->integ_algs[integ_alg].op_id;
sa->integ_calg = im->integ_algs[integ_alg].alg;
ASSERT (sa->integ_icv_size <= ESP_MAX_ICV_SIZE);
}
@@ -131,44 +159,167 @@ ipsec_sa_set_integ_alg (ipsec_sa_t * sa, ipsec_integ_alg_t integ_alg)
void
ipsec_sa_set_async_op_ids (ipsec_sa_t * sa)
{
- /* *INDENT-OFF* */
if (ipsec_sa_is_set_USE_ESN (sa))
{
-#define _(n, s, k) \
- if( sa->sync_op_data.crypto_enc_op_id == VNET_CRYPTO_OP_##n##_ENC ) \
- sa->async_op_data.crypto_async_enc_op_id = \
- VNET_CRYPTO_OP_##n##_TAG16_AAD12_ENC; \
- if( sa->sync_op_data.crypto_dec_op_id == VNET_CRYPTO_OP_##n##_DEC ) \
- sa->async_op_data.crypto_async_dec_op_id = \
- VNET_CRYPTO_OP_##n##_TAG16_AAD12_DEC;
- foreach_crypto_aead_alg
+#define _(n, s, k) \
+ if (sa->crypto_sync_enc_op_id == VNET_CRYPTO_OP_##n##_ENC) \
+ sa->crypto_async_enc_op_id = VNET_CRYPTO_OP_##n##_TAG16_AAD12_ENC; \
+ if (sa->crypto_sync_dec_op_id == VNET_CRYPTO_OP_##n##_DEC) \
+ sa->crypto_async_dec_op_id = VNET_CRYPTO_OP_##n##_TAG16_AAD12_DEC;
+ foreach_crypto_aead_alg
#undef _
}
else
{
-#define _(n, s, k) \
- if( sa->sync_op_data.crypto_enc_op_id == VNET_CRYPTO_OP_##n##_ENC ) \
- sa->async_op_data.crypto_async_enc_op_id = \
- VNET_CRYPTO_OP_##n##_TAG16_AAD8_ENC; \
- if( sa->sync_op_data.crypto_dec_op_id == VNET_CRYPTO_OP_##n##_DEC ) \
- sa->async_op_data.crypto_async_dec_op_id = \
- VNET_CRYPTO_OP_##n##_TAG16_AAD8_DEC;
- foreach_crypto_aead_alg
+#define _(n, s, k) \
+ if (sa->crypto_sync_enc_op_id == VNET_CRYPTO_OP_##n##_ENC) \
+ sa->crypto_async_enc_op_id = VNET_CRYPTO_OP_##n##_TAG16_AAD8_ENC; \
+ if (sa->crypto_sync_dec_op_id == VNET_CRYPTO_OP_##n##_DEC) \
+ sa->crypto_async_dec_op_id = VNET_CRYPTO_OP_##n##_TAG16_AAD8_DEC;
+ foreach_crypto_aead_alg
#undef _
}
-#define _(c, h, s, k ,d) \
- if( sa->sync_op_data.crypto_enc_op_id == VNET_CRYPTO_OP_##c##_ENC && \
- sa->sync_op_data.integ_op_id == VNET_CRYPTO_OP_##h##_HMAC) \
- sa->async_op_data.crypto_async_enc_op_id = \
- VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC; \
- if( sa->sync_op_data.crypto_dec_op_id == VNET_CRYPTO_OP_##c##_DEC && \
- sa->sync_op_data.integ_op_id == VNET_CRYPTO_OP_##h##_HMAC) \
- sa->async_op_data.crypto_async_dec_op_id = \
- VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC;
+#define _(c, h, s, k, d) \
+ if (sa->crypto_sync_enc_op_id == VNET_CRYPTO_OP_##c##_ENC && \
+ sa->integ_sync_op_id == VNET_CRYPTO_OP_##h##_HMAC) \
+ sa->crypto_async_enc_op_id = VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC; \
+ if (sa->crypto_sync_dec_op_id == VNET_CRYPTO_OP_##c##_DEC && \
+ sa->integ_sync_op_id == VNET_CRYPTO_OP_##h##_HMAC) \
+ sa->crypto_async_dec_op_id = VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC;
foreach_crypto_link_async_alg
#undef _
- /* *INDENT-ON* */
+}
+
+int
+ipsec_sa_update (u32 id, u16 src_port, u16 dst_port, const tunnel_t *tun,
+ bool is_tun)
+{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_sa_t *sa;
+ u32 sa_index;
+ uword *p;
+ int rv;
+
+ p = hash_get (im->sa_index_by_sa_id, id);
+ if (!p)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ sa = ipsec_sa_get (p[0]);
+ sa_index = sa - ipsec_sa_pool;
+
+ if (is_tun && ipsec_sa_is_set_IS_TUNNEL (sa) &&
+ (ip_address_cmp (&tun->t_src, &sa->tunnel.t_src) != 0 ||
+ ip_address_cmp (&tun->t_dst, &sa->tunnel.t_dst) != 0))
+ {
+ /* if the source IP is updated for an inbound SA under a tunnel protect,
+ we need to update the tun_protect DB with the new src IP */
+ if (ipsec_sa_is_set_IS_INBOUND (sa) &&
+ ip_address_cmp (&tun->t_src, &sa->tunnel.t_src) != 0 &&
+ !ip46_address_is_zero (&tun->t_src.ip))
+ {
+ if (ip46_address_is_ip4 (&sa->tunnel.t_src.ip))
+ {
+ ipsec4_tunnel_kv_t old_key, new_key;
+ clib_bihash_kv_8_16_t res,
+ *bkey = (clib_bihash_kv_8_16_t *) &old_key;
+
+ ipsec4_tunnel_mk_key (&old_key, &sa->tunnel.t_src.ip.ip4,
+ clib_host_to_net_u32 (sa->spi));
+ ipsec4_tunnel_mk_key (&new_key, &tun->t_src.ip.ip4,
+ clib_host_to_net_u32 (sa->spi));
+
+ if (!clib_bihash_search_8_16 (&im->tun4_protect_by_key, bkey,
+ &res))
+ {
+ clib_bihash_add_del_8_16 (&im->tun4_protect_by_key, &res, 0);
+ res.key = new_key.key;
+ clib_bihash_add_del_8_16 (&im->tun4_protect_by_key, &res, 1);
+ }
+ }
+ else
+ {
+ ipsec6_tunnel_kv_t old_key = {
+ .key = {
+ .remote_ip = sa->tunnel.t_src.ip.ip6,
+ .spi = clib_host_to_net_u32 (sa->spi),
+ },
+ }, new_key = {
+ .key = {
+ .remote_ip = tun->t_src.ip.ip6,
+ .spi = clib_host_to_net_u32 (sa->spi),
+ }};
+ clib_bihash_kv_24_16_t res,
+ *bkey = (clib_bihash_kv_24_16_t *) &old_key;
+
+ if (!clib_bihash_search_24_16 (&im->tun6_protect_by_key, bkey,
+ &res))
+ {
+ clib_bihash_add_del_24_16 (&im->tun6_protect_by_key, &res,
+ 0);
+ clib_memcpy (&res.key, &new_key.key, 3);
+ clib_bihash_add_del_24_16 (&im->tun6_protect_by_key, &res,
+ 1);
+ }
+ }
+ }
+ tunnel_unresolve (&sa->tunnel);
+ tunnel_copy (tun, &sa->tunnel);
+ if (!ipsec_sa_is_set_IS_INBOUND (sa))
+ {
+ dpo_reset (&sa->dpo);
+
+ sa->tunnel_flags = sa->tunnel.t_encap_decap_flags;
+
+ rv = tunnel_resolve (&sa->tunnel, FIB_NODE_TYPE_IPSEC_SA, sa_index);
+
+ if (rv)
+ {
+ hash_unset (im->sa_index_by_sa_id, sa->id);
+ pool_put (ipsec_sa_pool, sa);
+ return rv;
+ }
+ ipsec_sa_stack (sa);
+ /* generate header templates */
+ if (ipsec_sa_is_set_IS_TUNNEL_V6 (sa))
+ {
+ tunnel_build_v6_hdr (&sa->tunnel,
+ (ipsec_sa_is_set_UDP_ENCAP (sa) ?
+ IP_PROTOCOL_UDP :
+ IP_PROTOCOL_IPSEC_ESP),
+ &sa->ip6_hdr);
+ }
+ else
+ {
+ tunnel_build_v4_hdr (&sa->tunnel,
+ (ipsec_sa_is_set_UDP_ENCAP (sa) ?
+ IP_PROTOCOL_UDP :
+ IP_PROTOCOL_IPSEC_ESP),
+ &sa->ip4_hdr);
+ }
+ }
+ }
+
+ if (ipsec_sa_is_set_UDP_ENCAP (sa))
+ {
+ if (dst_port != IPSEC_UDP_PORT_NONE &&
+ dst_port != clib_net_to_host_u16 (sa->udp_hdr.dst_port))
+ {
+ if (ipsec_sa_is_set_IS_INBOUND (sa))
+ {
+ ipsec_unregister_udp_port (
+ clib_net_to_host_u16 (sa->udp_hdr.dst_port),
+ !ipsec_sa_is_set_IS_TUNNEL_V6 (sa));
+ ipsec_register_udp_port (dst_port,
+ !ipsec_sa_is_set_IS_TUNNEL_V6 (sa));
+ }
+ sa->udp_hdr.dst_port = clib_host_to_net_u16 (dst_port);
+ }
+ if (src_port != IPSEC_UDP_PORT_NONE &&
+ src_port != clib_net_to_host_u16 (sa->udp_hdr.src_port))
+ sa->udp_hdr.src_port = clib_host_to_net_u16 (src_port);
+ }
+ return (0);
}
int
@@ -176,13 +327,15 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
ipsec_crypto_alg_t crypto_alg, const ipsec_key_t *ck,
ipsec_integ_alg_t integ_alg, const ipsec_key_t *ik,
ipsec_sa_flags_t flags, u32 salt, u16 src_port,
- u16 dst_port, const tunnel_t *tun, u32 *sa_out_index)
+ u16 dst_port, u32 anti_replay_window_size,
+ const tunnel_t *tun, u32 *sa_out_index)
{
vlib_main_t *vm = vlib_get_main ();
ipsec_main_t *im = &ipsec_main;
clib_error_t *err;
ipsec_sa_t *sa;
u32 sa_index;
+ u64 rand[2];
uword *p;
int rv;
@@ -190,16 +343,24 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
if (p)
return VNET_API_ERROR_ENTRY_ALREADY_EXISTS;
+ if (getrandom (rand, sizeof (rand), 0) != sizeof (rand))
+ return VNET_API_ERROR_INIT_FAILED;
+
pool_get_aligned_zero (ipsec_sa_pool, sa, CLIB_CACHE_LINE_BYTES);
+ clib_pcg64i_srandom_r (&sa->iv_prng, rand[0], rand[1]);
+
fib_node_init (&sa->node, FIB_NODE_TYPE_IPSEC_SA);
fib_node_lock (&sa->node);
sa_index = sa - ipsec_sa_pool;
vlib_validate_combined_counter (&ipsec_sa_counters, sa_index);
vlib_zero_combined_counter (&ipsec_sa_counters, sa_index);
- vlib_validate_simple_counter (&ipsec_sa_lost_counters, sa_index);
- vlib_zero_simple_counter (&ipsec_sa_lost_counters, sa_index);
+ for (int i = 0; i < IPSEC_SA_N_ERRORS; i++)
+ {
+ vlib_validate_simple_counter (&ipsec_sa_err_counters[i], sa_index);
+ vlib_zero_simple_counter (&ipsec_sa_err_counters[i], sa_index);
+ }
tunnel_copy (tun, &sa->tunnel);
sa->id = id;
@@ -217,12 +378,14 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
ipsec_sa_set_crypto_alg (sa, crypto_alg);
ipsec_sa_set_async_op_ids (sa);
+ if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && anti_replay_window_size > 64)
+ ipsec_sa_set_ANTI_REPLAY_HUGE (sa);
+
clib_memcpy (&sa->crypto_key, ck, sizeof (sa->crypto_key));
- sa->crypto_key_index = vnet_crypto_key_add (vm,
- im->crypto_algs[crypto_alg].alg,
- (u8 *) ck->data, ck->len);
- if (~0 == sa->crypto_key_index)
+ sa->crypto_sync_key_index = vnet_crypto_key_add (
+ vm, im->crypto_algs[crypto_alg].alg, (u8 *) ck->data, ck->len);
+ if (~0 == sa->crypto_sync_key_index)
{
pool_put (ipsec_sa_pool, sa);
return VNET_API_ERROR_KEY_LENGTH;
@@ -230,42 +393,39 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
if (integ_alg != IPSEC_INTEG_ALG_NONE)
{
- sa->integ_key_index = vnet_crypto_key_add (vm,
- im->
- integ_algs[integ_alg].alg,
- (u8 *) ik->data, ik->len);
- if (~0 == sa->integ_key_index)
+ sa->integ_sync_key_index = vnet_crypto_key_add (
+ vm, im->integ_algs[integ_alg].alg, (u8 *) ik->data, ik->len);
+ if (~0 == sa->integ_sync_key_index)
{
pool_put (ipsec_sa_pool, sa);
return VNET_API_ERROR_KEY_LENGTH;
}
}
- if (sa->async_op_data.crypto_async_enc_op_id &&
- !ipsec_sa_is_set_IS_AEAD (sa))
- { //AES-CBC & HMAC
- sa->async_op_data.linked_key_index =
- vnet_crypto_key_add_linked (vm, sa->crypto_key_index,
- sa->integ_key_index);
- }
+ if (sa->crypto_async_enc_op_id && !ipsec_sa_is_set_IS_AEAD (sa))
+ sa->crypto_async_key_index =
+ vnet_crypto_key_add_linked (vm, sa->crypto_sync_key_index,
+ sa->integ_sync_key_index); // AES-CBC & HMAC
+ else
+ sa->crypto_async_key_index = sa->crypto_sync_key_index;
if (im->async_mode)
- sa->crypto_op_data = sa->async_op_data.data;
+ {
+ ipsec_sa_set_async_mode (sa, 1);
+ }
+ else if (ipsec_sa_is_set_IS_ASYNC (sa))
+ {
+ ipsec_sa_set_async_mode (sa, 1 /* is_enabled */);
+ }
else
{
- if (ipsec_sa_is_set_IS_ASYNC (sa))
- {
- vnet_crypto_request_async_mode (1);
- sa->crypto_op_data = sa->async_op_data.data;
- }
- else
- sa->crypto_op_data = sa->sync_op_data.data;
+ ipsec_sa_set_async_mode (sa, 0 /* is_enabled */);
}
err = ipsec_check_support_cb (im, sa);
if (err)
{
- clib_warning ("%s", err->what);
+ clib_warning ("%v", err->what);
pool_put (ipsec_sa_pool, sa);
return VNET_API_ERROR_UNIMPLEMENTED;
}
@@ -330,6 +490,18 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
!ipsec_sa_is_set_IS_TUNNEL_V6 (sa));
}
+ /* window size rounded up to next power of 2 */
+ if (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa))
+ {
+ anti_replay_window_size = 1 << max_log2 (anti_replay_window_size);
+ sa->replay_window_huge =
+ clib_bitmap_set_region (0, 0, 1, anti_replay_window_size);
+ }
+ else
+ {
+ sa->replay_window = ~0;
+ }
+
hash_set (im->sa_index_by_sa_id, sa->id, sa_index);
if (sa_out_index)
@@ -353,19 +525,51 @@ ipsec_sa_del (ipsec_sa_t * sa)
(void) ipsec_call_add_del_callbacks (im, sa, sa_index, 0);
if (ipsec_sa_is_set_IS_ASYNC (sa))
- vnet_crypto_request_async_mode (0);
+ {
+ if (!ipsec_sa_is_set_IS_AEAD (sa))
+ vnet_crypto_key_del (vm, sa->crypto_async_key_index);
+ }
+
if (ipsec_sa_is_set_UDP_ENCAP (sa) && ipsec_sa_is_set_IS_INBOUND (sa))
ipsec_unregister_udp_port (clib_net_to_host_u16 (sa->udp_hdr.dst_port),
!ipsec_sa_is_set_IS_TUNNEL_V6 (sa));
if (ipsec_sa_is_set_IS_TUNNEL (sa) && !ipsec_sa_is_set_IS_INBOUND (sa))
dpo_reset (&sa->dpo);
- vnet_crypto_key_del (vm, sa->crypto_key_index);
+ vnet_crypto_key_del (vm, sa->crypto_sync_key_index);
if (sa->integ_alg != IPSEC_INTEG_ALG_NONE)
- vnet_crypto_key_del (vm, sa->integ_key_index);
+ vnet_crypto_key_del (vm, sa->integ_sync_key_index);
+ if (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa))
+ clib_bitmap_free (sa->replay_window_huge);
pool_put (ipsec_sa_pool, sa);
}
+int
+ipsec_sa_bind (u32 id, u32 worker, bool bind)
+{
+ ipsec_main_t *im = &ipsec_main;
+ uword *p;
+ ipsec_sa_t *sa;
+
+ p = hash_get (im->sa_index_by_sa_id, id);
+ if (!p)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ sa = ipsec_sa_get (p[0]);
+
+ if (!bind)
+ {
+ sa->thread_index = ~0;
+ return 0;
+ }
+
+ if (worker >= vlib_num_workers ())
+ return VNET_API_ERROR_INVALID_WORKER;
+
+ sa->thread_index = vlib_get_worker_thread_index (worker);
+ return 0;
+}
+
void
ipsec_sa_unlock (index_t sai)
{
@@ -431,7 +635,8 @@ void
ipsec_sa_clear (index_t sai)
{
vlib_zero_combined_counter (&ipsec_sa_counters, sai);
- vlib_zero_simple_counter (&ipsec_sa_lost_counters, sai);
+ for (int i = 0; i < IPSEC_SA_N_ERRORS; i++)
+ vlib_zero_simple_counter (&ipsec_sa_err_counters[i], sai);
}
void
@@ -439,13 +644,11 @@ ipsec_sa_walk (ipsec_sa_walk_cb_t cb, void *ctx)
{
ipsec_sa_t *sa;
- /* *INDENT-OFF* */
pool_foreach (sa, ipsec_sa_pool)
{
if (WALK_CONTINUE != cb (sa, ctx))
break;
}
- /* *INDENT-ON* */
}
/**
@@ -462,19 +665,18 @@ ipsec_sa_fib_node_get (fib_node_index_t index)
}
static ipsec_sa_t *
-ipsec_sa_from_fib_node (fib_node_t * node)
+ipsec_sa_from_fib_node (fib_node_t *node)
{
ASSERT (FIB_NODE_TYPE_IPSEC_SA == node->fn_type);
- return ((ipsec_sa_t *) (((char *) node) -
- STRUCT_OFFSET_OF (ipsec_sa_t, node)));
-
+ return (
+ (ipsec_sa_t *) (((char *) node) - STRUCT_OFFSET_OF (ipsec_sa_t, node)));
}
/**
* Function definition to inform the FIB node that its last lock has gone.
*/
static void
-ipsec_sa_last_lock_gone (fib_node_t * node)
+ipsec_sa_last_lock_gone (fib_node_t *node)
{
/*
* The ipsec SA is a root of the graph. As such
@@ -487,7 +689,7 @@ ipsec_sa_last_lock_gone (fib_node_t * node)
* Function definition to backwalk a FIB node
*/
static fib_node_back_walk_rc_t
-ipsec_sa_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx)
+ipsec_sa_back_walk (fib_node_t *node, fib_node_back_walk_ctx_t *ctx)
{
ipsec_sa_stack (ipsec_sa_from_fib_node (node));
@@ -504,16 +706,24 @@ const static fib_node_vft_t ipsec_sa_vft = {
.fnv_back_walk = ipsec_sa_back_walk,
};
-/* force inclusion from application's main.c */
+/* Init per-SA error counters and node type */
clib_error_t *
-ipsec_sa_interface_init (vlib_main_t * vm)
+ipsec_sa_init (vlib_main_t *vm)
{
fib_node_register_type (FIB_NODE_TYPE_IPSEC_SA, &ipsec_sa_vft);
- return 0;
+#define _(index, val, err, desc) \
+ ipsec_sa_err_counters[index].name = \
+ (char *) format (0, "SA-" #err "%c", 0); \
+ ipsec_sa_err_counters[index].stat_segment_name = \
+ (char *) format (0, "/net/ipsec/sa/err/" #err "%c", 0); \
+ ipsec_sa_err_counters[index].counters = 0;
+ foreach_ipsec_sa_err
+#undef _
+ return 0;
}
-VLIB_INIT_FUNCTION (ipsec_sa_interface_init);
+VLIB_INIT_FUNCTION (ipsec_sa_init);
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ipsec/ipsec_sa.h b/src/vnet/ipsec/ipsec_sa.h
index 057e8cd9bff..4f73f1eab0f 100644
--- a/src/vnet/ipsec/ipsec_sa.h
+++ b/src/vnet/ipsec/ipsec_sa.h
@@ -16,11 +16,16 @@
#define __IPSEC_SPD_SA_H__
#include <vlib/vlib.h>
+#include <vppinfra/pcg.h>
#include <vnet/crypto/crypto.h>
#include <vnet/ip/ip.h>
#include <vnet/fib/fib_node.h>
#include <vnet/tunnel/tunnel.h>
+#define ESP_MAX_ICV_SIZE (32)
+#define ESP_MAX_IV_SIZE (16)
+#define ESP_MAX_BLOCK_SIZE (16)
+
#define foreach_ipsec_crypto_alg \
_ (0, NONE, "none") \
_ (1, AES_CBC_128, "aes-cbc-128") \
@@ -34,7 +39,10 @@
_ (9, AES_GCM_256, "aes-gcm-256") \
_ (10, DES_CBC, "des-cbc") \
_ (11, 3DES_CBC, "3des-cbc") \
- _ (12, CHACHA20_POLY1305, "chacha20-poly1305")
+ _ (12, CHACHA20_POLY1305, "chacha20-poly1305") \
+ _ (13, AES_NULL_GMAC_128, "aes-null-gmac-128") \
+ _ (14, AES_NULL_GMAC_192, "aes-null-gmac-192") \
+ _ (15, AES_NULL_GMAC_256, "aes-null-gmac-256")
typedef enum
{
@@ -44,6 +52,11 @@ typedef enum
IPSEC_CRYPTO_N_ALG,
} __clib_packed ipsec_crypto_alg_t;
+#define IPSEC_CRYPTO_ALG_IS_NULL_GMAC(_alg) \
+ ((_alg == IPSEC_CRYPTO_ALG_AES_NULL_GMAC_128) || \
+ (_alg == IPSEC_CRYPTO_ALG_AES_NULL_GMAC_192) || \
+ (_alg == IPSEC_CRYPTO_ALG_AES_NULL_GMAC_256))
+
#define IPSEC_CRYPTO_ALG_IS_GCM(_alg) \
(((_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) || \
(_alg == IPSEC_CRYPTO_ALG_AES_GCM_192) || \
@@ -107,7 +120,9 @@ typedef struct ipsec_key_t_
_ (128, IS_AEAD, "aead") \
_ (256, IS_CTR, "ctr") \
_ (512, IS_ASYNC, "async") \
- _ (1024, NO_ALGO_NO_DROP, "no-algo-no-drop")
+ _ (1024, NO_ALGO_NO_DROP, "no-algo-no-drop") \
+ _ (2048, IS_NULL_GMAC, "null-gmac") \
+ _ (4096, ANTI_REPLAY_HUGE, "anti-replay-huge")
typedef enum ipsec_sad_flags_t_
{
@@ -118,51 +133,64 @@ typedef enum ipsec_sad_flags_t_
STATIC_ASSERT (sizeof (ipsec_sa_flags_t) == 2, "IPSEC SA flags != 2 byte");
+#define foreach_ipsec_sa_err \
+ _ (0, LOST, lost, "packets lost") \
+ _ (1, HANDOFF, handoff, "hand-off") \
+ _ (2, INTEG_ERROR, integ_error, "Integrity check failed") \
+ _ (3, DECRYPTION_FAILED, decryption_failed, "Decryption failed") \
+ _ (4, CRYPTO_ENGINE_ERROR, crypto_engine_error, \
+ "crypto engine error (dropped)") \
+ _ (5, REPLAY, replay, "SA replayed packet") \
+ _ (6, RUNT, runt, "undersized packet") \
+ _ (7, NO_BUFFERS, no_buffers, "no buffers (dropped)") \
+ _ (8, OVERSIZED_HEADER, oversized_header, \
+ "buffer with oversized header (dropped)") \
+ _ (9, NO_TAIL_SPACE, no_tail_space, \
+ "no enough buffer tail space (dropped)") \
+ _ (10, TUN_NO_PROTO, tun_no_proto, "no tunnel protocol") \
+ _ (11, UNSUP_PAYLOAD, unsup_payload, "unsupported payload") \
+ _ (12, SEQ_CYCLED, seq_cycled, "sequence number cycled (dropped)") \
+ _ (13, CRYPTO_QUEUE_FULL, crypto_queue_full, "crypto queue full (dropped)") \
+ _ (14, NO_ENCRYPTION, no_encryption, "no Encrypting SA (dropped)") \
+ _ (15, DROP_FRAGMENTS, drop_fragments, "IP fragments drop")
+
+typedef enum
+{
+#define _(v, f, s, d) IPSEC_SA_ERROR_##f = v,
+ foreach_ipsec_sa_err
+#undef _
+ IPSEC_SA_N_ERRORS,
+} __clib_packed ipsec_sa_err_t;
+
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- /* flags */
- ipsec_sa_flags_t flags;
-
- u8 crypto_iv_size;
- u8 esp_block_align;
- u8 integ_icv_size;
+ clib_pcg64i_random_t iv_prng;
- u8 __pad1[3];
-
- u32 thread_index;
-
- u32 spi;
- u32 seq;
- u32 seq_hi;
- u64 replay_window;
- u64 ctr_iv_counter;
+ union
+ {
+ u64 replay_window;
+ clib_bitmap_t *replay_window_huge;
+ };
dpo_id_t dpo;
vnet_crypto_key_index_t crypto_key_index;
vnet_crypto_key_index_t integ_key_index;
- /* Union data shared by sync and async ops, updated when mode is
- * changed. */
- union
- {
- struct
- {
- vnet_crypto_op_id_t crypto_enc_op_id:16;
- vnet_crypto_op_id_t crypto_dec_op_id:16;
- vnet_crypto_op_id_t integ_op_id:16;
- };
+ u32 spi;
+ u32 seq;
+ u32 seq_hi;
- struct
- {
- vnet_crypto_async_op_id_t crypto_async_enc_op_id:16;
- vnet_crypto_async_op_id_t crypto_async_dec_op_id:16;
- vnet_crypto_key_index_t linked_key_index;
- };
+ u16 crypto_enc_op_id;
+ u16 crypto_dec_op_id;
+ u16 integ_op_id;
+ ipsec_sa_flags_t flags;
+ u16 thread_index;
- u64 crypto_op_data;
- };
+ u16 integ_icv_size : 6;
+ u16 crypto_iv_size : 5;
+ u16 esp_block_align : 5;
CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
@@ -184,30 +212,7 @@ typedef struct
CLIB_CACHE_LINE_ALIGN_MARK (cacheline2);
/* Elements with u64 size multiples */
- union
- {
- struct
- {
- vnet_crypto_op_id_t crypto_enc_op_id:16;
- vnet_crypto_op_id_t crypto_dec_op_id:16;
- vnet_crypto_op_id_t integ_op_id:16;
- };
- u64 data;
- } sync_op_data;
-
- union
- {
- struct
- {
- vnet_crypto_async_op_id_t crypto_async_enc_op_id:16;
- vnet_crypto_async_op_id_t crypto_async_dec_op_id:16;
- vnet_crypto_key_index_t linked_key_index;
- };
- u64 data;
- } async_op_data;
-
tunnel_t tunnel;
-
fib_node_t node;
/* elements with u32 size */
@@ -215,6 +220,16 @@ typedef struct
u32 stat_index;
vnet_crypto_alg_t integ_calg;
vnet_crypto_alg_t crypto_calg;
+ u32 crypto_sync_key_index;
+ u32 integ_sync_key_index;
+ u32 crypto_async_key_index;
+
+ /* elements with u16 size */
+ u16 crypto_sync_enc_op_id;
+ u16 crypto_sync_dec_op_id;
+ u16 integ_sync_op_id;
+ u16 crypto_async_enc_op_id;
+ u16 crypto_async_dec_op_id;
/* else u8 packed */
ipsec_crypto_alg_t crypto_alg;
@@ -224,6 +239,10 @@ typedef struct
ipsec_key_t crypto_key;
} ipsec_sa_t;
+STATIC_ASSERT (VNET_CRYPTO_N_OP_IDS < (1 << 16), "crypto ops overflow");
+STATIC_ASSERT (ESP_MAX_ICV_SIZE < (1 << 6), "integer icv overflow");
+STATIC_ASSERT (ESP_MAX_IV_SIZE < (1 << 5), "esp iv overflow");
+STATIC_ASSERT (ESP_MAX_BLOCK_SIZE < (1 << 5), "esp alignment overflow");
STATIC_ASSERT_OFFSET_OF (ipsec_sa_t, cacheline1, CLIB_CACHE_LINE_BYTES);
STATIC_ASSERT_OFFSET_OF (ipsec_sa_t, cacheline2, 2 * CLIB_CACHE_LINE_BYTES);
@@ -240,90 +259,149 @@ STATIC_ASSERT (STRUCT_OFFSET_OF (vnet_buffer_opaque_t, ipsec.sad_index) ==
STRUCT_OFFSET_OF (vnet_buffer_opaque_t, ip.save_protocol),
"IPSec data is overlapping with IP data");
-#define _(a,v,s) \
- always_inline int \
- ipsec_sa_is_set_##v (const ipsec_sa_t *sa) { \
- return (sa->flags & IPSEC_SA_FLAG_##v); \
+#define _(a, v, s) \
+ always_inline bool ipsec_sa_is_set_##v (const ipsec_sa_t *sa) \
+ { \
+ return (sa->flags & IPSEC_SA_FLAG_##v); \
}
foreach_ipsec_sa_flags
#undef _
-#define _(a,v,s) \
- always_inline int \
- ipsec_sa_set_##v (ipsec_sa_t *sa) { \
- return (sa->flags |= IPSEC_SA_FLAG_##v); \
+#define _(a, v, s) \
+ always_inline void ipsec_sa_set_##v (ipsec_sa_t *sa) \
+ { \
+ sa->flags |= IPSEC_SA_FLAG_##v; \
}
foreach_ipsec_sa_flags
#undef _
-#define _(a,v,s) \
- always_inline int \
- ipsec_sa_unset_##v (ipsec_sa_t *sa) { \
- return (sa->flags &= ~IPSEC_SA_FLAG_##v); \
+#define _(a, v, s) \
+ always_inline int ipsec_sa_unset_##v (ipsec_sa_t *sa) \
+ { \
+ return (sa->flags &= ~IPSEC_SA_FLAG_##v); \
}
- foreach_ipsec_sa_flags
+ foreach_ipsec_sa_flags
#undef _
-/**
- * @brief
- * SA packet & bytes counters
- */
-extern vlib_combined_counter_main_t ipsec_sa_counters;
-extern vlib_simple_counter_main_t ipsec_sa_lost_counters;
-
-extern void ipsec_mk_key (ipsec_key_t * key, const u8 * data, u8 len);
-
-extern int
-ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
- ipsec_crypto_alg_t crypto_alg, const ipsec_key_t *ck,
- ipsec_integ_alg_t integ_alg, const ipsec_key_t *ik,
- ipsec_sa_flags_t flags, u32 salt, u16 src_port,
- u16 dst_port, const tunnel_t *tun, u32 *sa_out_index);
+ /**
+ * @brief
+ * SA packet & bytes counters
+ */
+ extern vlib_combined_counter_main_t ipsec_sa_counters;
+extern vlib_simple_counter_main_t ipsec_sa_err_counters[IPSEC_SA_N_ERRORS];
+
+extern void ipsec_mk_key (ipsec_key_t *key, const u8 *data, u8 len);
+
+extern int ipsec_sa_update (u32 id, u16 src_port, u16 dst_port,
+ const tunnel_t *tun, bool is_tun);
+extern int ipsec_sa_add_and_lock (
+ u32 id, u32 spi, ipsec_protocol_t proto, ipsec_crypto_alg_t crypto_alg,
+ const ipsec_key_t *ck, ipsec_integ_alg_t integ_alg, const ipsec_key_t *ik,
+ ipsec_sa_flags_t flags, u32 salt, u16 src_port, u16 dst_port,
+ u32 anti_replay_window_size, const tunnel_t *tun, u32 *sa_out_index);
+extern int ipsec_sa_bind (u32 id, u32 worker, bool bind);
extern index_t ipsec_sa_find_and_lock (u32 id);
extern int ipsec_sa_unlock_id (u32 id);
extern void ipsec_sa_unlock (index_t sai);
extern void ipsec_sa_lock (index_t sai);
extern void ipsec_sa_clear (index_t sai);
-extern void ipsec_sa_set_crypto_alg (ipsec_sa_t * sa,
+extern void ipsec_sa_set_crypto_alg (ipsec_sa_t *sa,
ipsec_crypto_alg_t crypto_alg);
-extern void ipsec_sa_set_integ_alg (ipsec_sa_t * sa,
+extern void ipsec_sa_set_integ_alg (ipsec_sa_t *sa,
ipsec_integ_alg_t integ_alg);
+extern void ipsec_sa_set_async_mode (ipsec_sa_t *sa, int is_enabled);
-typedef walk_rc_t (*ipsec_sa_walk_cb_t) (ipsec_sa_t * sa, void *ctx);
+typedef walk_rc_t (*ipsec_sa_walk_cb_t) (ipsec_sa_t *sa, void *ctx);
extern void ipsec_sa_walk (ipsec_sa_walk_cb_t cd, void *ctx);
extern u8 *format_ipsec_replay_window (u8 *s, va_list *args);
-extern u8 *format_ipsec_crypto_alg (u8 * s, va_list * args);
-extern u8 *format_ipsec_integ_alg (u8 * s, va_list * args);
-extern u8 *format_ipsec_sa (u8 * s, va_list * args);
-extern u8 *format_ipsec_key (u8 * s, va_list * args);
-extern uword unformat_ipsec_crypto_alg (unformat_input_t * input,
- va_list * args);
-extern uword unformat_ipsec_integ_alg (unformat_input_t * input,
- va_list * args);
-extern uword unformat_ipsec_key (unformat_input_t * input, va_list * args);
-
-#define IPSEC_UDP_PORT_NONE ((u16)~0)
+extern u8 *format_ipsec_crypto_alg (u8 *s, va_list *args);
+extern u8 *format_ipsec_integ_alg (u8 *s, va_list *args);
+extern u8 *format_ipsec_sa (u8 *s, va_list *args);
+extern u8 *format_ipsec_key (u8 *s, va_list *args);
+extern uword unformat_ipsec_crypto_alg (unformat_input_t *input,
+ va_list *args);
+extern uword unformat_ipsec_integ_alg (unformat_input_t *input, va_list *args);
+extern uword unformat_ipsec_key (unformat_input_t *input, va_list *args);
+
+#define IPSEC_UDP_PORT_NONE ((u16) ~0)
/*
* Anti Replay definitions
*/
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (64)
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_MAX_INDEX (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE-1)
+#define IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE(_sa) \
+ (u32) (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (_sa)) ? \
+ clib_bitmap_bytes (_sa->replay_window_huge) * 8 : \
+ BITS (_sa->replay_window))
+
+#define IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN(_sa, _is_huge) \
+ (u32) (_is_huge ? clib_bitmap_bytes (_sa->replay_window_huge) * 8 : \
+ BITS (_sa->replay_window))
+
+#define IPSEC_SA_ANTI_REPLAY_WINDOW_N_SEEN(_sa) \
+ (u64) (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (_sa)) ? \
+ clib_bitmap_count_set_bits (_sa->replay_window_huge) : \
+ count_set_bits (_sa->replay_window))
+
+#define IPSEC_SA_ANTI_REPLAY_WINDOW_N_SEEN_KNOWN_WIN(_sa, _is_huge) \
+ (u64) (_is_huge ? clib_bitmap_count_set_bits (_sa->replay_window_huge) : \
+ count_set_bits (_sa->replay_window))
+
+#define IPSEC_SA_ANTI_REPLAY_WINDOW_MAX_INDEX(_sa) \
+ (u32) (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (_sa) - 1)
+
+#define IPSEC_SA_ANTI_REPLAY_WINDOW_MAX_INDEX_KNOWN_WIN(_sa, _is_huge) \
+ (u32) (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (_sa, _is_huge) - 1)
/*
* sequence number less than the lower bound are outside of the window
* From RFC4303 Appendix A:
* Bl = Tl - W + 1
*/
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND(_tl) (_tl - IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE + 1)
+#define IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND(_sa) \
+ (u32) (_sa->seq - IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (_sa) + 1)
+
+#define IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND_KNOWN_WIN(_sa, _is_huge) \
+ (u32) (_sa->seq - \
+ IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (_sa, _is_huge) + 1)
+
+always_inline u64
+ipsec_sa_anti_replay_get_64b_window (const ipsec_sa_t *sa)
+{
+ if (!ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa))
+ return sa->replay_window;
+
+ u64 w;
+ u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (sa);
+ u32 tl_win_index = sa->seq & (window_size - 1);
+
+ if (PREDICT_TRUE (tl_win_index >= 63))
+ return clib_bitmap_get_multiple (sa->replay_window_huge, tl_win_index - 63,
+ 64);
+
+ w = clib_bitmap_get_multiple_no_check (sa->replay_window_huge, 0,
+ tl_win_index + 1)
+ << (63 - tl_win_index);
+ w |= clib_bitmap_get_multiple_no_check (sa->replay_window_huge,
+ window_size - 63 + tl_win_index,
+ 63 - tl_win_index);
+
+ return w;
+}
always_inline int
-ipsec_sa_anti_replay_check (const ipsec_sa_t *sa, u32 seq)
+ipsec_sa_anti_replay_check (const ipsec_sa_t *sa, u32 seq, bool ar_huge)
{
- if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) &&
- sa->replay_window & (1ULL << (sa->seq - seq)))
- return 1;
+ u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (sa, ar_huge);
+
+ /* we assume that the packet is in the window.
+ * if the packet falls left (sa->seq - seq >= window size),
+ * the result is wrong */
+
+ if (ar_huge)
+ return clib_bitmap_get (sa->replay_window_huge, seq & (window_size - 1));
else
- return 0;
+ return (sa->replay_window >> (window_size + seq - sa->seq - 1)) & 1;
+
+ return 0;
}
/*
@@ -343,10 +421,14 @@ ipsec_sa_anti_replay_check (const ipsec_sa_t *sa, u32 seq)
always_inline int
ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
u32 hi_seq_used, bool post_decrypt,
- u32 *hi_seq_req)
+ u32 *hi_seq_req, bool ar_huge)
{
ASSERT ((post_decrypt == false) == (hi_seq_req != 0));
+ u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (sa, ar_huge);
+ u32 window_lower_bound =
+ IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND_KNOWN_WIN (sa, ar_huge);
+
if (!ipsec_sa_is_set_USE_ESN (sa))
{
if (hi_seq_req)
@@ -359,14 +441,11 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
if (PREDICT_TRUE (seq > sa->seq))
return 0;
- u32 diff = sa->seq - seq;
-
- if (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE > diff)
- return ((sa->replay_window & (1ULL << diff)) ? 1 : 0);
- else
+ /* does the packet fall out on the left of the window */
+ if (sa->seq >= seq + window_size)
return 1;
- return 0;
+ return ipsec_sa_anti_replay_check (sa, seq, ar_huge);
}
if (!ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
@@ -406,14 +485,15 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
*/
return 0;
}
- if (PREDICT_TRUE (sa->seq >= (IPSEC_SA_ANTI_REPLAY_WINDOW_MAX_INDEX)))
+
+ if (PREDICT_TRUE (sa->seq >= window_size - 1))
{
/*
- * the last sequence number VPP recieved is more than one
+ * the last sequence number VPP received is more than one
* window size greater than zero.
* Case A from RFC4303 Appendix A.
*/
- if (seq < IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND (sa->seq))
+ if (seq < window_lower_bound)
{
/*
* the received sequence number is lower than the lower bound
@@ -425,7 +505,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
{
if (hi_seq_used == sa->seq_hi)
/* the high sequence number used to succesfully decrypt this
- * packet is the same as the last-sequnence number of the SA.
+ * packet is the same as the last-sequence number of the SA.
* that means this packet did not cause a wrap.
* this packet is thus out of window and should be dropped */
return 1;
@@ -437,8 +517,8 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
}
else
{
- /* pre-decrypt it might be the might that casues a wrap, we
- * need to decrpyt to find out */
+ /* pre-decrypt it might be the packet that causes a wrap, we
+ * need to decrypt it to find out */
if (hi_seq_req)
*hi_seq_req = sa->seq_hi + 1;
return 0;
@@ -447,17 +527,17 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
else
{
/*
- * the recieved sequence number greater than the low
+ * the received sequence number greater than the low
* end of the window.
*/
if (hi_seq_req)
*hi_seq_req = sa->seq_hi;
if (seq <= sa->seq)
/*
- * The recieved seq number is within bounds of the window
+ * The received seq number is within bounds of the window
* check if it's a duplicate
*/
- return (ipsec_sa_anti_replay_check (sa, seq));
+ return ipsec_sa_anti_replay_check (sa, seq, ar_huge);
else
/*
* The received sequence number is greater than the window
@@ -470,14 +550,14 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
else
{
/*
- * the last sequence number VPP recieved is within one window
+ * the last sequence number VPP received is within one window
* size of zero, i.e. 0 < TL < WINDOW_SIZE, the lower bound is thus a
* large sequence number.
- * Note that the check below uses unsiged integer arthimetic, so the
+ * Note that the check below uses unsigned integer arithmetic, so the
* RHS will be a larger number.
* Case B from RFC4303 Appendix A.
*/
- if (seq < IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND (sa->seq))
+ if (seq < window_lower_bound)
{
/*
* the sequence number is less than the lower bound.
@@ -490,7 +570,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
*/
if (hi_seq_req)
*hi_seq_req = sa->seq_hi;
- return (ipsec_sa_anti_replay_check (sa, seq));
+ return ipsec_sa_anti_replay_check (sa, seq, ar_huge);
}
else
{
@@ -498,7 +578,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
* the packet is less the window lower bound or greater than
* the higher bound, depending on how you look at it...
* We're assuming, given that the last sequence number received,
- * TL < WINDOW_SIZE, that a largeer seq num is more likely to be
+ * TL < WINDOW_SIZE, that a larger seq num is more likely to be
* a packet that moves the window forward, than a packet that has
* wrapped the high sequence again. If it were the latter then
* we've lost close to 2^32 packets.
@@ -511,15 +591,14 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
else
{
/*
- * the packet seq number is between the lower bound (a large nubmer)
- * and MAX_SEQ_NUM. This is in the window since the window upper bound
- * tl > 0.
- * However, since TL is the other side of 0 to the received
- * packet, the SA has moved on to a higher sequence number.
+ * the packet seq number is between the lower bound (a large number)
+ * and MAX_SEQ_NUM. This is in the window since the window upper
+ * bound tl > 0. However, since TL is the other side of 0 to the
+ * received packet, the SA has moved on to a higher sequence number.
*/
if (hi_seq_req)
*hi_seq_req = sa->seq_hi - 1;
- return (ipsec_sa_anti_replay_check (sa, seq));
+ return ipsec_sa_anti_replay_check (sa, seq, ar_huge);
}
}
@@ -529,45 +608,149 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
}
always_inline u32
-ipsec_sa_anti_replay_window_shift (ipsec_sa_t *sa, u32 inc)
+ipsec_sa_anti_replay_window_shift (ipsec_sa_t *sa, u32 inc, bool ar_huge)
{
u32 n_lost = 0;
+ u32 seen = 0;
+ u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (sa, ar_huge);
- if (inc < IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE)
+ if (inc < window_size)
{
- if (sa->seq > IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE)
+ if (ar_huge)
+ {
+ /* the number of packets we saw in this section of the window */
+ clib_bitmap_t *window = sa->replay_window_huge;
+ u32 window_lower_bound = (sa->seq + 1) & (window_size - 1);
+ u32 window_next_lower_bound =
+ (window_lower_bound + inc) & (window_size - 1);
+
+ uword i_block, i_word_start, i_word_end, full_words;
+ uword n_blocks = window_size >> log2_uword_bits;
+ uword mask;
+
+ i_block = window_lower_bound >> log2_uword_bits;
+
+ i_word_start = window_lower_bound & (uword_bits - 1);
+ i_word_end = window_next_lower_bound & (uword_bits - 1);
+
+ /* We stay in the same word */
+ if (i_word_start + inc <= uword_bits)
+ {
+ mask = pow2_mask (inc) << i_word_start;
+ seen += count_set_bits (window[i_block] & mask);
+ window[i_block] &= ~mask;
+ }
+ else
+ {
+ full_words = (inc + i_word_start - uword_bits - i_word_end) >>
+ log2_uword_bits;
+
+ /* count set bits in the first word */
+ mask = (uword) ~0 << i_word_start;
+ seen += count_set_bits (window[i_block] & mask);
+ window[i_block] &= ~mask;
+ i_block = (i_block + 1) & (n_blocks - 1);
+
+ /* count set bits in the next full words */
+ /* even if the last word need to be fully counted, we treat it
+ * apart */
+ while (full_words >= 8)
+ {
+ if (full_words >= 16)
+ {
+ /* prefect the next 8 blocks (64 bytes) */
+ clib_prefetch_store (
+ &window[(i_block + 8) & (n_blocks - 1)]);
+ }
+
+ seen += count_set_bits (window[i_block]);
+ seen +=
+ count_set_bits (window[(i_block + 1) & (n_blocks - 1)]);
+ seen +=
+ count_set_bits (window[(i_block + 2) & (n_blocks - 1)]);
+ seen +=
+ count_set_bits (window[(i_block + 3) & (n_blocks - 1)]);
+ seen +=
+ count_set_bits (window[(i_block + 4) & (n_blocks - 1)]);
+ seen +=
+ count_set_bits (window[(i_block + 5) & (n_blocks - 1)]);
+ seen +=
+ count_set_bits (window[(i_block + 6) & (n_blocks - 1)]);
+ seen +=
+ count_set_bits (window[(i_block + 7) & (n_blocks - 1)]);
+ window[i_block] = 0;
+ window[(i_block + 1) & (n_blocks - 1)] = 0;
+ window[(i_block + 2) & (n_blocks - 1)] = 0;
+ window[(i_block + 3) & (n_blocks - 1)] = 0;
+ window[(i_block + 4) & (n_blocks - 1)] = 0;
+ window[(i_block + 5) & (n_blocks - 1)] = 0;
+ window[(i_block + 6) & (n_blocks - 1)] = 0;
+ window[(i_block + 7) & (n_blocks - 1)] = 0;
+
+ i_block = (i_block + 8) & (n_blocks - 1);
+ full_words -= 8;
+ }
+ while (full_words > 0)
+ {
+ // last word is treated after the loop
+ seen += count_set_bits (window[i_block]);
+ window[i_block] = 0;
+ i_block = (i_block + 1) & (n_blocks - 1);
+ full_words--;
+ }
+
+ /* the last word */
+ mask = pow2_mask (i_word_end);
+ seen += count_set_bits (window[i_block] & mask);
+ window[i_block] &= ~mask;
+ }
+
+ clib_bitmap_set_no_check (window,
+ (sa->seq + inc) & (window_size - 1), 1);
+ }
+ else
{
/*
* count how many holes there are in the portion
* of the window that we will right shift of the end
* as a result of this increments
*/
- u64 mask = (((u64) 1 << inc) - 1) << (BITS (u64) - inc);
- u64 old = sa->replay_window & mask;
+ u64 old = sa->replay_window & pow2_mask (inc);
/* the number of packets we saw in this section of the window */
- u64 seen = count_set_bits (old);
-
- /*
- * the number we missed is the size of the window section
- * minus the number we saw.
- */
- n_lost = inc - seen;
+ seen = count_set_bits (old);
+ sa->replay_window =
+ ((sa->replay_window) >> inc) | (1ULL << (window_size - 1));
}
- sa->replay_window = ((sa->replay_window) << inc) | 1;
+
+ /*
+ * the number we missed is the size of the window section
+ * minus the number we saw.
+ */
+ n_lost = inc - seen;
}
else
{
/* holes in the replay window are lost packets */
- n_lost = BITS (u64) - count_set_bits (sa->replay_window);
+ n_lost = window_size -
+ IPSEC_SA_ANTI_REPLAY_WINDOW_N_SEEN_KNOWN_WIN (sa, ar_huge);
/* any sequence numbers that now fall outside the window
* are forever lost */
- n_lost += inc - IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE;
+ n_lost += inc - window_size;
- sa->replay_window = 1;
+ if (PREDICT_FALSE (ar_huge))
+ {
+ clib_bitmap_zero (sa->replay_window_huge);
+ clib_bitmap_set_no_check (sa->replay_window_huge,
+ (sa->seq + inc) & (window_size - 1), 1);
+ }
+ else
+ {
+ sa->replay_window = 1ULL << (window_size - 1);
+ }
}
- return (n_lost);
+ return n_lost;
}
/*
@@ -581,9 +764,10 @@ ipsec_sa_anti_replay_window_shift (ipsec_sa_t *sa, u32 inc)
*/
always_inline u64
ipsec_sa_anti_replay_advance (ipsec_sa_t *sa, u32 thread_index, u32 seq,
- u32 hi_seq)
+ u32 hi_seq, bool ar_huge)
{
u64 n_lost = 0;
+ u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (sa, ar_huge);
u32 pos;
if (ipsec_sa_is_set_USE_ESN (sa))
@@ -593,25 +777,33 @@ ipsec_sa_anti_replay_advance (ipsec_sa_t *sa, u32 thread_index, u32 seq,
if (wrap == 0 && seq > sa->seq)
{
pos = seq - sa->seq;
- n_lost = ipsec_sa_anti_replay_window_shift (sa, pos);
+ n_lost = ipsec_sa_anti_replay_window_shift (sa, pos, ar_huge);
sa->seq = seq;
}
else if (wrap > 0)
{
- pos = ~seq + sa->seq + 1;
- n_lost = ipsec_sa_anti_replay_window_shift (sa, pos);
+ pos = seq + ~sa->seq + 1;
+ n_lost = ipsec_sa_anti_replay_window_shift (sa, pos, ar_huge);
sa->seq = seq;
sa->seq_hi = hi_seq;
}
else if (wrap < 0)
{
pos = ~seq + sa->seq + 1;
- sa->replay_window |= (1ULL << pos);
+ if (ar_huge)
+ clib_bitmap_set_no_check (sa->replay_window_huge,
+ seq & (window_size - 1), 1);
+ else
+ sa->replay_window |= (1ULL << (window_size - 1 - pos));
}
else
{
pos = sa->seq - seq;
- sa->replay_window |= (1ULL << pos);
+ if (ar_huge)
+ clib_bitmap_set_no_check (sa->replay_window_huge,
+ seq & (window_size - 1), 1);
+ else
+ sa->replay_window |= (1ULL << (window_size - 1 - pos));
}
}
else
@@ -619,13 +811,17 @@ ipsec_sa_anti_replay_advance (ipsec_sa_t *sa, u32 thread_index, u32 seq,
if (seq > sa->seq)
{
pos = seq - sa->seq;
- n_lost = ipsec_sa_anti_replay_window_shift (sa, pos);
+ n_lost = ipsec_sa_anti_replay_window_shift (sa, pos, ar_huge);
sa->seq = seq;
}
else
{
pos = sa->seq - seq;
- sa->replay_window |= (1ULL << pos);
+ if (ar_huge)
+ clib_bitmap_set_no_check (sa->replay_window_huge,
+ seq & (window_size - 1), 1);
+ else
+ sa->replay_window |= (1ULL << (window_size - 1 - pos));
}
}
@@ -637,8 +833,8 @@ ipsec_sa_anti_replay_advance (ipsec_sa_t *sa, u32 thread_index, u32 seq,
* Makes choice for thread_id should be assigned.
* if input ~0, gets random worker_id based on unix_time_now_nsec
*/
-always_inline u32
-ipsec_sa_assign_thread (u32 thread_id)
+always_inline u16
+ipsec_sa_assign_thread (u16 thread_id)
{
return ((thread_id) ? thread_id
: (unix_time_now_nsec () % vlib_num_workers ()) + 1);
diff --git a/src/vnet/ipsec/ipsec_spd.c b/src/vnet/ipsec/ipsec_spd.c
index 5d5d521dd72..7b9a0aea8ed 100644
--- a/src/vnet/ipsec/ipsec_spd.c
+++ b/src/vnet/ipsec/ipsec_spd.c
@@ -38,12 +38,10 @@ ipsec_add_del_spd (vlib_main_t * vm, u32 spd_id, int is_add)
if (!spd)
return VNET_API_ERROR_INVALID_VALUE;
- /* *INDENT-OFF* */
hash_foreach (k, v, im->spd_index_by_sw_if_index, ({
if (v == spd_index)
ipsec_set_interface_spd(vm, k, spd_id, 0);
}));
- /* *INDENT-ON* */
hash_unset (im->spd_index_by_spd_id, spd_id);
#define _(s,v) vec_free(spd->policies[IPSEC_SPD_POLICY_##s]);
foreach_ipsec_spd_policy_type
@@ -165,9 +163,6 @@ ipsec_add_del_spd (vlib_main_t * vm, u32 spd_id, int is_add)
pool_max_len (im->fp_ip6_lookup_hashes_pool))
{
clib_bihash_40_8_t *bihash_table;
- ipsec_spd_fp_t *fp_spd = &spd->fp_spd;
-
- fp_spd->name6_out = format (0, "spd_%u_fp_ip6_out", spd_id);
fp_spd->name6_out = format (0, "spd_%u_fp_ip6_out", spd_id);
pool_get (im->fp_ip6_lookup_hashes_pool, bihash_table);
@@ -185,7 +180,6 @@ ipsec_add_del_spd (vlib_main_t * vm, u32 spd_id, int is_add)
pool_max_len (im->fp_ip6_lookup_hashes_pool))
{
clib_bihash_40_8_t *bihash_table;
- ipsec_spd_fp_t *fp_spd = &spd->fp_spd;
fp_spd->name6_in = format (0, "spd_%u_fp_ip6_in", spd_id);
pool_get (im->fp_ip6_lookup_hashes_pool, bihash_table);
diff --git a/src/vnet/ipsec/ipsec_spd.h b/src/vnet/ipsec/ipsec_spd.h
index 3a4fd0ec91c..3b1e4b40747 100644
--- a/src/vnet/ipsec/ipsec_spd.h
+++ b/src/vnet/ipsec/ipsec_spd.h
@@ -55,8 +55,6 @@ typedef struct
*/
typedef struct
{
- /** vectors for each of the fast path policy types */
- u32 *fp_policies[IPSEC_SPD_POLICY_N_TYPES];
ipsec_fp_mask_id_t *fp_mask_ids[IPSEC_SPD_POLICY_N_TYPES];
/* names of bihash tables */
u8 *name4_out;
diff --git a/src/vnet/ipsec/ipsec_spd_fp_lookup.h b/src/vnet/ipsec/ipsec_spd_fp_lookup.h
index a372ac77a50..2bbd7c664f9 100644
--- a/src/vnet/ipsec/ipsec_spd_fp_lookup.h
+++ b/src/vnet/ipsec/ipsec_spd_fp_lookup.h
@@ -97,8 +97,8 @@ static_always_inline int
single_rule_in_match_5tuple (ipsec_policy_t *policy, ipsec_fp_5tuple_t *match)
{
- u32 sa = clib_net_to_host_u32 (match->laddr.as_u32);
- u32 da = clib_net_to_host_u32 (match->raddr.as_u32);
+ u32 da = clib_net_to_host_u32 (match->laddr.as_u32);
+ u32 sa = clib_net_to_host_u32 (match->raddr.as_u32);
if (policy->policy == IPSEC_POLICY_ACTION_PROTECT)
{
@@ -118,16 +118,16 @@ single_rule_in_match_5tuple (ipsec_policy_t *policy, ipsec_fp_5tuple_t *match)
}
else
{
- if (da < clib_net_to_host_u32 (policy->raddr.start.ip4.as_u32))
+ if (sa < clib_net_to_host_u32 (policy->raddr.start.ip4.as_u32))
return (0);
- if (da > clib_net_to_host_u32 (policy->raddr.stop.ip4.as_u32))
+ if (sa > clib_net_to_host_u32 (policy->raddr.stop.ip4.as_u32))
return (0);
- if (sa < clib_net_to_host_u32 (policy->laddr.start.ip4.as_u32))
+ if (da < clib_net_to_host_u32 (policy->laddr.start.ip4.as_u32))
return (0);
- if (sa > clib_net_to_host_u32 (policy->laddr.stop.ip4.as_u32))
+ if (da > clib_net_to_host_u32 (policy->laddr.stop.ip4.as_u32))
return (0);
}
return (1);
@@ -196,13 +196,16 @@ ipsec_fp_in_ip6_policy_match_n (void *spd_fp, ipsec_fp_5tuple_t *tuples,
{
policy = im->policies + *policy_id;
- if ((last_priority[i] < policy->priority) &&
- (single_rule_in_match_5tuple (policy, match)))
+ if (single_rule_in_match_5tuple (policy, match))
{
- last_priority[i] = policy->priority;
- if (policies[i] == 0)
- counter++;
- policies[i] = policy;
+ if (last_priority[i] < policy->priority)
+ {
+ last_priority[i] = policy->priority;
+ if (policies[i] == 0)
+ counter++;
+ policies[i] = policy;
+ }
+ break;
}
}
}
@@ -291,13 +294,16 @@ ipsec_fp_in_ip4_policy_match_n (void *spd_fp, ipsec_fp_5tuple_t *tuples,
{
policy = im->policies + *policy_id;
- if ((last_priority[i] < policy->priority) &&
- (single_rule_in_match_5tuple (policy, match)))
+ if (single_rule_in_match_5tuple (policy, match))
{
- last_priority[i] = policy->priority;
- if (policies[i] == 0)
- counter++;
- policies[i] = policy;
+ if (last_priority[i] < policy->priority)
+ {
+ last_priority[i] = policy->priority;
+ if (policies[i] == 0)
+ counter++;
+ policies[i] = policy;
+ }
+ break;
}
}
}
@@ -418,6 +424,7 @@ ipsec_fp_out_ip6_policy_match_n (void *spd_fp, ipsec_fp_5tuple_t *tuples,
policies[i] = policy;
ids[i] = *policy_id;
}
+ break;
}
}
}
@@ -511,14 +518,17 @@ ipsec_fp_out_ip4_policy_match_n (void *spd_fp, ipsec_fp_5tuple_t *tuples,
{
policy = im->policies + *policy_id;
- if ((last_priority[i] < policy->priority) &&
- (single_rule_out_match_5tuple (policy, match)))
+ if (single_rule_out_match_5tuple (policy, match))
{
- last_priority[i] = policy->priority;
- if (policies[i] == 0)
- counter++;
- policies[i] = policy;
- ids[i] = *policy_id;
+ if (last_priority[i] < policy->priority)
+ {
+ last_priority[i] = policy->priority;
+ if (policies[i] == 0)
+ counter++;
+ policies[i] = policy;
+ ids[i] = *policy_id;
+ }
+ break;
}
}
}
diff --git a/src/vnet/ipsec/ipsec_spd_policy.c b/src/vnet/ipsec/ipsec_spd_policy.c
index 5261621b64a..af087689941 100644
--- a/src/vnet/ipsec/ipsec_spd_policy.c
+++ b/src/vnet/ipsec/ipsec_spd_policy.c
@@ -24,22 +24,6 @@ vlib_combined_counter_main_t ipsec_spd_policy_counters = {
.stat_segment_name = "/net/ipsec/policy",
};
-static int
-ipsec_spd_entry_sort (void *a1, void *a2)
-{
- ipsec_main_t *im = &ipsec_main;
- u32 *id1 = a1;
- u32 *id2 = a2;
- ipsec_policy_t *p1, *p2;
-
- p1 = pool_elt_at_index (im->policies, *id1);
- p2 = pool_elt_at_index (im->policies, *id2);
- if (p1 && p2)
- return p2->priority - p1->priority;
-
- return 0;
-}
-
int
ipsec_policy_mk_type (bool is_outbound,
bool is_ipv6,
@@ -189,6 +173,7 @@ ipsec_add_del_policy (vlib_main_t * vm,
if (is_add)
{
u32 policy_index;
+ u32 i;
if (policy->policy == IPSEC_POLICY_ACTION_PROTECT)
{
@@ -216,9 +201,20 @@ ipsec_add_del_policy (vlib_main_t * vm,
vlib_validate_combined_counter (&ipsec_spd_policy_counters,
policy_index);
vlib_zero_combined_counter (&ipsec_spd_policy_counters, policy_index);
- vec_add1 (spd->policies[policy->type], policy_index);
- vec_sort_with_function (spd->policies[policy->type],
- ipsec_spd_entry_sort);
+
+ vec_foreach_index (i, spd->policies[policy->type])
+ {
+ ipsec_policy_t *p =
+ pool_elt_at_index (im->policies, spd->policies[policy->type][i]);
+
+ if (p->priority <= vp->priority)
+ {
+ break;
+ }
+ }
+
+ vec_insert_elts (spd->policies[policy->type], &policy_index, 1, i);
+
*stat_index = policy_index;
}
else
@@ -382,7 +378,6 @@ ipsec_fp_get_policy_ports_mask (ipsec_policy_t *policy,
}
mask->protocol = (policy->protocol == IPSEC_POLICY_PROTOCOL_ANY) ? 0 : ~0;
- mask->action = 0;
}
static_always_inline void
@@ -399,6 +394,15 @@ ipsec_fp_ip4_get_policy_mask (ipsec_policy_t *policy, ipsec_fp_5tuple_t *mask,
clib_memset_u8 (mask, 0xff, sizeof (ipsec_fp_5tuple_t));
clib_memset_u8 (&mask->l3_zero_pad, 0, sizeof (mask->l3_zero_pad));
+ if (inbound && (policy->type == IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT &&
+ policy->sa_index != INDEX_INVALID))
+ {
+ ipsec_sa_t *s = ipsec_sa_get (policy->sa_index);
+
+ if (ipsec_sa_is_set_IS_TUNNEL (s))
+ goto set_spi_mask;
+ }
+
/* find bits where start != stop */
*plmask = *pladdr_start ^ *pladdr_stop;
*prmask = *praddr_start ^ *praddr_stop;
@@ -413,6 +417,7 @@ ipsec_fp_ip4_get_policy_mask (ipsec_policy_t *policy, ipsec_fp_5tuple_t *mask,
*prmask = clib_host_to_net_u32 (
mask_out_highest_set_bit_u32 (clib_net_to_host_u32 (*prmask)));
+set_spi_mask:
if (inbound)
{
if (policy->type != IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT)
@@ -440,6 +445,15 @@ ipsec_fp_ip6_get_policy_mask (ipsec_policy_t *policy, ipsec_fp_5tuple_t *mask,
clib_memset_u8 (mask, 0xff, sizeof (ipsec_fp_5tuple_t));
+ if (inbound && (policy->type == IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT &&
+ policy->sa_index != INDEX_INVALID))
+ {
+ ipsec_sa_t *s = ipsec_sa_get (policy->sa_index);
+
+ if (ipsec_sa_is_set_IS_TUNNEL (s))
+ goto set_spi_mask;
+ }
+
*plmask = (*pladdr_start++ ^ *pladdr_stop++);
*prmask = (*praddr_start++ ^ *praddr_stop++);
@@ -466,16 +480,16 @@ ipsec_fp_ip6_get_policy_mask (ipsec_policy_t *policy, ipsec_fp_5tuple_t *mask,
if (*prmask++ & clib_host_to_net_u64 (0x1))
{
- *prmask = (*pladdr_start ^ *pladdr_stop);
+ *prmask = (*praddr_start ^ *praddr_stop);
*prmask = clib_host_to_net_u64 (
mask_out_highest_set_bit_u64 (clib_net_to_host_u64 (*prmask)));
}
else
*prmask = 0;
-
+set_spi_mask:
if (inbound)
{
- if (policy->type != IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT)
+ if (policy->type != IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT)
mask->spi = 0;
mask->protocol = 0;
@@ -512,7 +526,21 @@ ipsec_fp_get_policy_5tuple (ipsec_policy_t *policy, ipsec_fp_5tuple_t *tuple,
policy->sa_index != INDEX_INVALID)
{
ipsec_sa_t *s = ipsec_sa_get (policy->sa_index);
+
tuple->spi = s->spi;
+ if (ipsec_sa_is_set_IS_TUNNEL (s))
+ {
+ if (tuple->is_ipv6)
+ {
+ tuple->ip6_laddr = s->tunnel.t_dst.ip.ip6;
+ tuple->ip6_raddr = s->tunnel.t_src.ip.ip6;
+ }
+ else
+ {
+ tuple->laddr = s->tunnel.t_dst.ip.ip4;
+ tuple->raddr = s->tunnel.t_src.ip.ip4;
+ }
+ }
}
else
tuple->spi = INDEX_INVALID;
@@ -521,7 +549,6 @@ ipsec_fp_get_policy_5tuple (ipsec_policy_t *policy, ipsec_fp_5tuple_t *tuple,
}
tuple->protocol = policy->protocol;
-
tuple->lport = policy->lport.start;
tuple->rport = policy->rport.start;
}
@@ -590,17 +617,24 @@ ipsec_fp_ip4_add_policy (ipsec_main_t *im, ipsec_spd_fp_t *fp_spd,
}
else
{
+ u32 i;
+ u32 *old_fp_policies_ids = result_val->fp_policies_ids;
- if (vec_max_len (result_val->fp_policies_ids) !=
- vec_len (result_val->fp_policies_ids))
+ vec_foreach_index (i, result_val->fp_policies_ids)
{
- /* no need to resize */
- vec_add1 (result_val->fp_policies_ids, policy_index);
+ ipsec_policy_t *p =
+ pool_elt_at_index (im->policies, result_val->fp_policies_ids[i]);
+
+ if (p->priority <= policy->priority)
+ {
+ break;
+ }
}
- else
- {
- vec_add1 (result_val->fp_policies_ids, policy_index);
+ vec_insert_elts (result_val->fp_policies_ids, &policy_index, 1, i);
+
+ if (result_val->fp_policies_ids != old_fp_policies_ids)
+ {
res = clib_bihash_add_del_16_8 (bihash_table, &result, 1);
if (res != 0)
@@ -626,7 +660,6 @@ ipsec_fp_ip4_add_policy (ipsec_main_t *im, ipsec_spd_fp_t *fp_spd,
(fp_spd->fp_mask_ids[policy->type] + searched_idx)->refcount++;
mte->refcount++;
- vec_add1 (fp_spd->fp_policies[policy->type], policy_index);
clib_memcpy (vp, policy, sizeof (*vp));
return 0;
@@ -695,17 +728,24 @@ ipsec_fp_ip6_add_policy (ipsec_main_t *im, ipsec_spd_fp_t *fp_spd,
}
else
{
+ u32 i;
+ u32 *old_fp_policies_ids = result_val->fp_policies_ids;
- if (vec_max_len (result_val->fp_policies_ids) !=
- vec_len (result_val->fp_policies_ids))
+ vec_foreach_index (i, result_val->fp_policies_ids)
{
- /* no need to resize */
- vec_add1 (result_val->fp_policies_ids, policy_index);
+ ipsec_policy_t *p =
+ pool_elt_at_index (im->policies, result_val->fp_policies_ids[i]);
+
+ if (p->priority <= policy->priority)
+ {
+ break;
+ }
}
- else
- {
- vec_add1 (result_val->fp_policies_ids, policy_index);
+ vec_insert_elts (result_val->fp_policies_ids, &policy_index, 1, i);
+
+ if (result_val->fp_policies_ids != old_fp_policies_ids)
+ {
res = clib_bihash_add_del_40_8 (bihash_table, &result, 1);
if (res != 0)
@@ -731,7 +771,6 @@ ipsec_fp_ip6_add_policy (ipsec_main_t *im, ipsec_spd_fp_t *fp_spd,
(fp_spd->fp_mask_ids[policy->type] + searched_idx)->refcount++;
mte->refcount++;
- vec_add1 (fp_spd->fp_policies[policy->type], policy_index);
clib_memcpy (vp, policy, sizeof (*vp));
return 0;
@@ -760,7 +799,7 @@ ipsec_fp_ip6_del_policy (ipsec_main_t *im, ipsec_spd_fp_t *fp_spd,
fp_spd->ip6_out_lookup_hash_idx);
ipsec_policy_t *vp;
- u32 ii, iii, imt;
+ u32 ii, imt;
ipsec_fp_ip6_get_policy_mask (policy, &mask, inbound);
ipsec_fp_get_policy_5tuple (policy, &policy_5tuple, inbound);
@@ -769,57 +808,38 @@ ipsec_fp_ip6_del_policy (ipsec_main_t *im, ipsec_spd_fp_t *fp_spd,
if (res != 0)
return -1;
- res = -1;
vec_foreach_index (ii, result_val->fp_policies_ids)
{
vp =
pool_elt_at_index (im->policies, *(result_val->fp_policies_ids + ii));
if (ipsec_policy_is_equal (vp, policy))
{
- vec_foreach_index (iii, fp_spd->fp_policies[policy->type])
+ if (vec_len (result_val->fp_policies_ids) == 1)
+ {
+ vec_free (result_val->fp_policies_ids);
+ clib_bihash_add_del_40_8 (bihash_table, &result, 0);
+ }
+ else
+ vec_delete (result_val->fp_policies_ids, 1, ii);
+
+ vec_foreach_index (imt, fp_spd->fp_mask_ids[policy->type])
{
- if (*(fp_spd->fp_policies[policy->type] + iii) ==
- *(result_val->fp_policies_ids + ii))
+ if ((fp_spd->fp_mask_ids[policy->type] + imt)->mask_type_idx ==
+ vp->fp_mask_type_id)
{
- if (vec_len (result_val->fp_policies_ids) == 1)
- {
- vec_free (result_val->fp_policies_ids);
- clib_bihash_add_del_40_8 (bihash_table, &result, 0);
- }
- else
- {
- vec_del1 (result_val->fp_policies_ids, ii);
- }
- vec_del1 (fp_spd->fp_policies[policy->type], iii);
-
- vec_foreach_index (imt, fp_spd->fp_mask_ids[policy->type])
- {
- if ((fp_spd->fp_mask_ids[policy->type] + imt)
- ->mask_type_idx == vp->fp_mask_type_id)
- {
-
- if ((fp_spd->fp_mask_ids[policy->type] + imt)
- ->refcount-- == 1)
- vec_del1 (fp_spd->fp_mask_ids[policy->type], imt);
-
- break;
- }
- }
-
- res = 0;
+
+ if ((fp_spd->fp_mask_ids[policy->type] + imt)->refcount-- ==
+ 1)
+ vec_del1 (fp_spd->fp_mask_ids[policy->type], imt);
+
break;
}
}
- if (res != 0)
- continue;
- else
- {
- ipsec_fp_release_mask_type (im, vp->fp_mask_type_id);
- ipsec_sa_unlock (vp->sa_index);
- pool_put (im->policies, vp);
- return 0;
- }
+ ipsec_fp_release_mask_type (im, vp->fp_mask_type_id);
+ ipsec_sa_unlock (vp->sa_index);
+ pool_put (im->policies, vp);
+ return 0;
}
}
return -1;
@@ -837,7 +857,7 @@ ipsec_fp_ip4_del_policy (ipsec_main_t *im, ipsec_spd_fp_t *fp_spd,
(ipsec_fp_lookup_value_t *) &result.value;
bool inbound = ipsec_is_policy_inbound (policy);
ipsec_policy_t *vp;
- u32 ii, iii, imt;
+ u32 ii, imt;
clib_bihash_16_8_t *bihash_table =
inbound ? pool_elt_at_index (im->fp_ip4_lookup_hashes_pool,
fp_spd->ip4_in_lookup_hash_idx) :
@@ -852,57 +872,37 @@ ipsec_fp_ip4_del_policy (ipsec_main_t *im, ipsec_spd_fp_t *fp_spd,
if (res != 0)
return -1;
- res = -1;
vec_foreach_index (ii, result_val->fp_policies_ids)
{
vp =
pool_elt_at_index (im->policies, *(result_val->fp_policies_ids + ii));
if (ipsec_policy_is_equal (vp, policy))
{
- vec_foreach_index (iii, fp_spd->fp_policies[policy->type])
+ if (vec_len (result_val->fp_policies_ids) == 1)
{
- if (*(fp_spd->fp_policies[policy->type] + iii) ==
- *(result_val->fp_policies_ids + ii))
- {
- if (vec_len (result_val->fp_policies_ids) == 1)
- {
- vec_free (result_val->fp_policies_ids);
- clib_bihash_add_del_16_8 (bihash_table, &result, 0);
- }
- else
- {
- vec_del1 (result_val->fp_policies_ids, ii);
- }
- vec_del1 (fp_spd->fp_policies[policy->type], iii);
-
- vec_foreach_index (imt, fp_spd->fp_mask_ids[policy->type])
- {
- if ((fp_spd->fp_mask_ids[policy->type] + imt)
- ->mask_type_idx == vp->fp_mask_type_id)
- {
-
- if ((fp_spd->fp_mask_ids[policy->type] + imt)
- ->refcount-- == 1)
- vec_del1 (fp_spd->fp_mask_ids[policy->type], imt);
-
- break;
- }
- }
-
- res = 0;
- break;
- }
+ vec_free (result_val->fp_policies_ids);
+ clib_bihash_add_del_16_8 (bihash_table, &result, 0);
}
-
- if (res != 0)
- continue;
else
+ vec_delete (result_val->fp_policies_ids, 1, ii);
+
+ vec_foreach_index (imt, fp_spd->fp_mask_ids[policy->type])
{
- ipsec_fp_release_mask_type (im, vp->fp_mask_type_id);
- ipsec_sa_unlock (vp->sa_index);
- pool_put (im->policies, vp);
- return 0;
+ if ((fp_spd->fp_mask_ids[policy->type] + imt)->mask_type_idx ==
+ vp->fp_mask_type_id)
+ {
+
+ if ((fp_spd->fp_mask_ids[policy->type] + imt)->refcount-- ==
+ 1)
+ vec_del1 (fp_spd->fp_mask_ids[policy->type], imt);
+
+ break;
+ }
}
+ ipsec_fp_release_mask_type (im, vp->fp_mask_type_id);
+ ipsec_sa_unlock (vp->sa_index);
+ pool_put (im->policies, vp);
+ return 0;
}
}
return -1;
diff --git a/src/vnet/ipsec/ipsec_test.c b/src/vnet/ipsec/ipsec_test.c
index f1436193636..86d09f18a5c 100644
--- a/src/vnet/ipsec/ipsec_test.c
+++ b/src/vnet/ipsec/ipsec_test.c
@@ -282,12 +282,30 @@ vl_api_ipsec_sad_entry_add_reply_t_handler (
{
}
+static void
+vl_api_ipsec_sad_entry_add_v2_reply_t_handler (
+ vl_api_ipsec_sad_entry_add_reply_t *mp)
+{
+}
+
static int
api_ipsec_sad_entry_del (vat_main_t *vat)
{
return -1;
}
+static int
+api_ipsec_sad_bind (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ipsec_sad_unbind (vat_main_t *vat)
+{
+ return -1;
+}
+
static void
vl_api_ipsec_sad_entry_add_del_v2_reply_t_handler (
vl_api_ipsec_sad_entry_add_del_v2_reply_t *mp)
@@ -307,6 +325,12 @@ api_ipsec_sad_entry_add_del_v3 (vat_main_t *vat)
}
static int
+api_ipsec_sad_entry_update (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
api_ipsec_tunnel_protect_update (vat_main_t *vat)
{
return -1;
@@ -324,6 +348,18 @@ api_ipsec_sa_v3_dump (vat_main_t *vat)
}
static int
+api_ipsec_sa_v4_dump (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ipsec_sa_v5_dump (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
api_ipsec_tunnel_protect_dump (vat_main_t *vat)
{
return -1;
@@ -347,6 +383,12 @@ api_ipsec_sad_entry_add (vat_main_t *vat)
return -1;
}
+static int
+api_ipsec_sad_entry_add_v2 (vat_main_t *vat)
+{
+ return -1;
+}
+
static void
vl_api_ipsec_spd_entry_add_del_reply_t_handler (
vl_api_ipsec_spd_entry_add_del_reply_t *mp)
@@ -376,6 +418,16 @@ vl_api_ipsec_sa_v3_details_t_handler (vl_api_ipsec_sa_v3_details_t *mp)
{
}
+static void
+vl_api_ipsec_sa_v4_details_t_handler (vl_api_ipsec_sa_v4_details_t *mp)
+{
+}
+
+static void
+vl_api_ipsec_sa_v5_details_t_handler (vl_api_ipsec_sa_v5_details_t *mp)
+{
+}
+
static int
api_ipsec_spd_interface_dump (vat_main_t *vat)
{
diff --git a/src/vnet/ipsec/ipsec_tun.c b/src/vnet/ipsec/ipsec_tun.c
index 82f5a11d26f..ecda291e985 100644
--- a/src/vnet/ipsec/ipsec_tun.c
+++ b/src/vnet/ipsec/ipsec_tun.c
@@ -236,7 +236,6 @@ ipsec_tun_protect_rx_db_add (ipsec_main_t * im,
if (ip46_address_is_zero (&itp->itp_crypto.dst))
return;
- /* *INDENT-OFF* */
FOR_EACH_IPSEC_PROTECT_INPUT_SAI(itp, sai,
({
sa = ipsec_sa_get (sai);
@@ -291,7 +290,6 @@ ipsec_tun_protect_rx_db_add (ipsec_main_t * im,
ipsec_tun_register_nodes (AF_IP6);
}
}))
- /* *INDENT-ON* */
}
static adj_walk_rc_t
@@ -371,7 +369,6 @@ ipsec_tun_protect_rx_db_remove (ipsec_main_t * im,
{
const ipsec_sa_t *sa;
- /* *INDENT-OFF* */
FOR_EACH_IPSEC_PROTECT_INPUT_SA(itp, sa,
({
if (ip46_address_is_ip4 (&itp->itp_crypto.dst))
@@ -405,7 +402,6 @@ ipsec_tun_protect_rx_db_remove (ipsec_main_t * im,
}
}
}));
- /* *INDENT-ON* */
}
static adj_walk_rc_t
@@ -464,7 +460,6 @@ ipsec_tun_protect_set_crypto_addr (ipsec_tun_protect_t * itp)
{
ipsec_sa_t *sa;
- /* *INDENT-OFF* */
FOR_EACH_IPSEC_PROTECT_INPUT_SA(itp, sa,
({
if (ipsec_sa_is_set_IS_TUNNEL (sa))
@@ -484,7 +479,6 @@ ipsec_tun_protect_set_crypto_addr (ipsec_tun_protect_t * itp)
itp->itp_flags &= ~IPSEC_PROTECT_ENCAPED;
}
}));
- /* *INDENT-ON* */
}
static void
@@ -504,13 +498,11 @@ ipsec_tun_protect_config (ipsec_main_t * im,
if (itp->itp_flags & IPSEC_PROTECT_ITF)
ipsec_sa_set_NO_ALGO_NO_DROP (ipsec_sa_get (itp->itp_out_sa));
- /* *INDENT-OFF* */
FOR_EACH_IPSEC_PROTECT_INPUT_SAI(itp, sai,
({
ipsec_sa_lock(sai);
}));
ipsec_tun_protect_set_crypto_addr(itp);
- /* *INDENT-ON* */
/*
* add to the DB against each SA
@@ -527,7 +519,6 @@ ipsec_tun_protect_unconfig (ipsec_main_t * im, ipsec_tun_protect_t * itp)
ipsec_sa_t *sa;
index_t sai;
- /* *INDENT-OFF* */
FOR_EACH_IPSEC_PROTECT_INPUT_SA(itp, sa,
({
ipsec_sa_unset_IS_PROTECT (sa);
@@ -543,7 +534,6 @@ ipsec_tun_protect_unconfig (ipsec_main_t * im, ipsec_tun_protect_t * itp)
({
ipsec_sa_unlock(sai);
}));
- /* *INDENT-ON* */
ITP_DBG (itp, "unconfigured");
}
@@ -751,12 +741,10 @@ ipsec_tun_protect_walk (ipsec_tun_protect_walk_cb_t fn, void *ctx)
{
index_t itpi;
- /* *INDENT-OFF* */
pool_foreach_index (itpi, ipsec_tun_protect_pool)
{
fn (itpi, ctx);
}
- /* *INDENT-ON* */
}
void
@@ -772,12 +760,10 @@ ipsec_tun_protect_walk_itf (u32 sw_if_index,
idi = &itp_db.id_itf[sw_if_index];
- /* *INDENT-OFF* */
hash_foreach(key, itpi, idi->id_hash,
({
fn (itpi, ctx);
}));
- /* *INDENT-ON* */
if (INDEX_INVALID != idi->id_itp)
fn (idi->id_itp, ctx);
}
diff --git a/src/vnet/ipsec/ipsec_tun_in.c b/src/vnet/ipsec/ipsec_tun_in.c
index 9f1e2d6c5a1..c82de3ebaff 100644
--- a/src/vnet/ipsec/ipsec_tun_in.c
+++ b/src/vnet/ipsec/ipsec_tun_in.c
@@ -278,6 +278,7 @@ ipsec_tun_protect_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
else
{
next[0] = ipsec_ip6_if_no_tunnel (node, b[0], esp0, ip60);
+ vlib_buffer_advance (b[0], -buf_rewind0);
n_no_tunnel++;
goto trace00;
}
@@ -410,7 +411,6 @@ VLIB_NODE_FN (ipsec4_tun_input_node) (vlib_main_t * vm,
return ipsec_tun_protect_input_inline (vm, node, from_frame, 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ipsec4_tun_input_node) = {
.name = "ipsec4-tun-input",
.vector_size = sizeof (u32),
@@ -420,7 +420,6 @@ VLIB_REGISTER_NODE (ipsec4_tun_input_node) = {
.error_counters = ipsec_tun_error_counters,
.sibling_of = "device-input",
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ipsec6_tun_input_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -429,7 +428,6 @@ VLIB_NODE_FN (ipsec6_tun_input_node) (vlib_main_t * vm,
return ipsec_tun_protect_input_inline (vm, node, from_frame, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ipsec6_tun_input_node) = {
.name = "ipsec6-tun-input",
.vector_size = sizeof (u32),
@@ -439,7 +437,6 @@ VLIB_REGISTER_NODE (ipsec6_tun_input_node) = {
.error_counters = ipsec_tun_error_counters,
.sibling_of = "device-input",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ipsec/ipsec_types.api b/src/vnet/ipsec/ipsec_types.api
index 3f894348bcb..37c1141ab46 100644
--- a/src/vnet/ipsec/ipsec_types.api
+++ b/src/vnet/ipsec/ipsec_types.api
@@ -37,6 +37,9 @@ enum ipsec_crypto_alg
IPSEC_API_CRYPTO_ALG_DES_CBC,
IPSEC_API_CRYPTO_ALG_3DES_CBC,
IPSEC_API_CRYPTO_ALG_CHACHA20_POLY1305 [backwards_compatible],
+ IPSEC_API_CRYPTO_ALG_AES_NULL_GMAC_128 [backwards_compatible],
+ IPSEC_API_CRYPTO_ALG_AES_NULL_GMAC_192 [backwards_compatible],
+ IPSEC_API_CRYPTO_ALG_AES_NULL_GMAC_256 [backwards_compatible],
};
/*
@@ -193,9 +196,6 @@ typedef ipsec_spd_entry_v2
/** \brief IPsec: Security Association Database entry
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param is_add - add SAD entry if non-zero, else delete
@param sad_id - sad id
@param spi - security parameter index
@param protocol - 0 = AH, 1 = ESP
@@ -203,6 +203,7 @@ typedef ipsec_spd_entry_v2
@param crypto_key - crypto keying material
@param integrity_algorithm - one of the supported algorithms
@param integrity_key - integrity keying material
+ @param flags - SA flags (see ipsec_sad_flags above)
@param tunnel_src_address - IPsec tunnel source address IPv6 if is_tunnel_ipv6 is non-zero, else IPv4. Only valid if is_tunnel is non-zero
@param tunnel_dst_address - IPsec tunnel destination address IPv6 if is_tunnel_ipv6 is non-zero, else IPv4. Only valid if is_tunnel is non-zero
@param tx_table_id - the FIB id used for encapsulated packets
@@ -287,6 +288,46 @@ typedef ipsec_sad_entry_v3
u16 udp_dst_port [default=4500];
};
+/** \brief IPsec: Security Association Database entry
+ @param sad_id - sad id
+ @param spi - security parameter index
+ @param protocol - 0 = AH, 1 = ESP
+ @param crypto_algorithm - a supported crypto algorithm
+ @param crypto_key - crypto keying material
+ @param integrity_algorithm - one of the supported algorithms
+ @param integrity_key - integrity keying material
+ @param flags - SA flags (see ipsec_sad_flags above)
+ @param tunnel - tunnel description (see vnet/tunnel/tunnel_types.api)
+ @param salt - for use with counter mode ciphers
+ @param udp_src_port - If using UDP Encapsulation, use this source port for
+ TX. It is ignored for RX.
+ @param udp_dst_port - If using UDP Encapsulation, use this destination port
+ for TX. Expect traffic on this port for RX.
+ @param anti_replay_window_size - AR window size to use. The supplied value is round up to the nearest power of 2.
+ */
+typedef ipsec_sad_entry_v4
+{
+ u32 sad_id;
+ u32 spi;
+
+ vl_api_ipsec_proto_t protocol;
+
+ vl_api_ipsec_crypto_alg_t crypto_algorithm;
+ vl_api_key_t crypto_key;
+
+ vl_api_ipsec_integ_alg_t integrity_algorithm;
+ vl_api_key_t integrity_key;
+
+ vl_api_ipsec_sad_flags_t flags;
+
+ vl_api_tunnel_t tunnel;
+
+ u32 salt;
+ u16 udp_src_port [default=4500];
+ u16 udp_dst_port [default=4500];
+
+ u32 anti_replay_window_size [default=64];
+};
/*
* Local Variables:
diff --git a/src/vnet/l2/feat_bitmap.c b/src/vnet/l2/feat_bitmap.c
index 349ec67462b..507fe365f07 100644
--- a/src/vnet/l2/feat_bitmap.c
+++ b/src/vnet/l2/feat_bitmap.c
@@ -155,7 +155,6 @@ feat_bitmap_drop_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (feat_bitmap_drop_init);
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (feat_bitmap_drop_node,static) = {
.function = feat_bitmap_drop_node_fn,
.name = "feature-bitmap-drop",
@@ -173,7 +172,6 @@ VLIB_REGISTER_NODE (feat_bitmap_drop_node,static) = {
[FEAT_BITMAP_DROP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2.api b/src/vnet/l2/l2.api
index b0ac23f705a..ccba9aa3df1 100644
--- a/src/vnet/l2/l2.api
+++ b/src/vnet/l2/l2.api
@@ -1,6 +1,7 @@
/* Hey Emacs use -*- mode: C -*- */
/*
* Copyright (c) 2016 Cisco and/or its affiliates.
+ * Copyright (c) 2022 Nordix Foundation.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -14,7 +15,7 @@
* limitations under the License.
*/
-option version = "3.1.0";
+option version = "3.2.0";
import "vnet/ip/ip_types.api";
import "vnet/ethernet/ethernet_types.api";
@@ -304,7 +305,7 @@ autoreply define bridge_domain_set_learn_limit
u32 learn_limit;
};
-/** \brief L2 bridge domain add or delete request
+/** \brief L2 bridge domain add or delete request - will be deprecated
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@param bd_id - the bridge domain to create
@@ -319,6 +320,7 @@ autoreply define bridge_domain_set_learn_limit
*/
autoreply define bridge_domain_add_del
{
+ option deprecated;
u32 client_index;
u32 context;
u32 bd_id;
@@ -333,6 +335,49 @@ autoreply define bridge_domain_add_del
bool is_add [default=true];
};
+/** \brief L2 bridge domain add delete request version 2
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bd_id - if the id == ~0 creates a bridge domain with an unused id
+ if the id != ~0 the id of the bridge domain to create/delete
+ @param flood - enable/disable bcast/mcast flooding in the bd
+ @param uu_flood - enable/disable unknown unicast flood in the bd
+ @param forward - enable/disable forwarding on all interfaces in the bd
+ @param learn - enable/disable learning on all interfaces in the bd
+ @param arp_term - enable/disable arp termination in the bd
+ @param arp_ufwd - enable/disable arp unicast forwarding in the bd
+ @param mac_age - mac aging time in min, 0 for disabled
+ @param is_add - add or delete flag
+*/
+define bridge_domain_add_del_v2
+{
+ u32 client_index;
+ u32 context;
+ u32 bd_id;
+ bool flood;
+ bool uu_flood;
+ bool forward;
+ bool learn;
+ bool arp_term;
+ bool arp_ufwd;
+ u8 mac_age;
+ string bd_tag[64];
+ bool is_add [default=true];
+};
+
+/** \brief L2 bridge domain add delete version 2 response
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the set bridge flags request
+ @param resulting_id - the id for the new bridge domain
+*/
+define bridge_domain_add_del_v2_reply
+{
+ u32 context;
+ i32 retval;
+ u32 bd_id;
+};
+
+
/** \brief L2 bridge domain request operational state details
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c
index c555a17d5ea..035542d298d 100644
--- a/src/vnet/l2/l2_api.c
+++ b/src/vnet/l2/l2_api.c
@@ -3,6 +3,7 @@
* l2_api.c - layer 2 forwarding api
*
* Copyright (c) 2016 Cisco and/or its affiliates.
+ * Copyright (c) 2022 Nordix Foundation.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -67,7 +68,6 @@ vl_api_l2_xconnect_dump_t_handler (vl_api_l2_xconnect_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
vec_foreach_index (sw_if_index, l2im->configs)
{
config = vec_elt_at_index (l2im->configs, sw_if_index);
@@ -75,7 +75,6 @@ vl_api_l2_xconnect_dump_t_handler (vl_api_l2_xconnect_dump_t * mp)
send_l2_xconnect_details (reg, mp->context, sw_if_index,
config->output_sw_if_index);
}
- /* *INDENT-ON* */
}
static void
@@ -413,12 +412,10 @@ vl_api_l2_flags_t_handler (vl_api_l2_flags_t * mp)
BAD_SW_IF_INDEX_LABEL;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_L2_FLAGS_REPLY,
({
rmp->resulting_feature_bitmap = ntohl(rbm);
}));
- /* *INDENT-ON* */
}
static void
@@ -511,6 +508,37 @@ vl_api_bridge_domain_add_del_t_handler (vl_api_bridge_domain_add_del_t * mp)
}
static void
+vl_api_bridge_domain_add_del_v2_t_handler (
+ vl_api_bridge_domain_add_del_v2_t *mp)
+{
+ vl_api_bridge_domain_add_del_v2_reply_t *rmp;
+ u32 bd_id = ntohl (mp->bd_id);
+ int rv = 0;
+
+ if ((~0 == bd_id) && (mp->is_add))
+ bd_id = bd_get_unused_id ();
+
+ if ((~0 == bd_id) && (mp->is_add))
+ rv = VNET_API_ERROR_EAGAIN;
+ else
+ {
+ l2_bridge_domain_add_del_args_t a = { .is_add = mp->is_add,
+ .flood = mp->flood,
+ .uu_flood = mp->uu_flood,
+ .forward = mp->forward,
+ .learn = mp->learn,
+ .arp_term = mp->arp_term,
+ .arp_ufwd = mp->arp_ufwd,
+ .mac_age = mp->mac_age,
+ .bd_id = bd_id,
+ .bd_tag = mp->bd_tag };
+ rv = bd_add_del (&a);
+ }
+ REPLY_MACRO2 (VL_API_BRIDGE_DOMAIN_ADD_DEL_V2_REPLY,
+ ({ rmp->bd_id = htonl (bd_id); }));
+}
+
+static void
send_bridge_domain_details (l2input_main_t * l2im,
vl_api_registration_t * reg,
l2_bridge_domain_t * bd_config,
@@ -651,12 +679,10 @@ vl_api_bridge_flags_t_handler (vl_api_bridge_flags_t * mp)
bitmap = bd_set_flags (vm, bd_index, flags, mp->is_set);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_BRIDGE_FLAGS_REPLY,
({
rmp->resulting_feature_bitmap = ntohl(bitmap);
}));
- /* *INDENT-ON* */
}
static void
@@ -918,7 +944,6 @@ vl_api_bd_ip_mac_dump_t_handler (vl_api_bd_ip_mac_dump_t * mp)
u64 mac64;
bd_id = bd_config->bd_id;
- /* *INDENT-OFF* */
hash_foreach (ip4_addr.as_u32, mac64, bd_config->mac_by_ip4,
({
ip46_address_t ip = {
@@ -940,7 +965,6 @@ vl_api_bd_ip_mac_dump_t_handler (vl_api_bd_ip_mac_dump_t * mp)
send_bd_ip_mac_entry (am, reg, bd_id, &ip, IP46_TYPE_IP6,
&mac, mp->context);
}));
- /* *INDENT-ON* */
}
}
}
@@ -1094,12 +1118,10 @@ vl_api_bvi_create_t_handler (vl_api_bvi_create_t * mp)
rv = l2_bvi_create (ntohl (mp->user_instance), &mac, &sw_if_index);
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_BVI_CREATE_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -1193,13 +1215,11 @@ l2_arp_term_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_arp_term_process_node) = {
.function = l2_arp_term_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "l2-arp-term-publisher",
};
-/* *INDENT-ON* */
static void
vl_api_want_l2_arp_term_events_t_handler (vl_api_want_l2_arp_term_events_t *
@@ -1280,14 +1300,15 @@ l2_api_hookup (vlib_main_t * vm)
{
api_main_t *am = vlibapi_get_main ();
- /* Mark VL_API_BRIDGE_DOMAIN_DUMP as mp safe */
- vl_api_set_msg_thread_safe (am, VL_API_BRIDGE_DOMAIN_DUMP, 1);
-
/*
* Set up the (msg_name, crc, message-id) table
*/
REPLY_MSG_ID_BASE = setup_message_id_table ();
+ /* Mark VL_API_BRIDGE_DOMAIN_DUMP as mp safe */
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_BRIDGE_DOMAIN_DUMP, 1);
+
return 0;
}
diff --git a/src/vnet/l2/l2_arp_term.c b/src/vnet/l2/l2_arp_term.c
index 594ee8e3622..eed9b7af7c3 100644
--- a/src/vnet/l2/l2_arp_term.c
+++ b/src/vnet/l2/l2_arp_term.c
@@ -449,7 +449,6 @@ arp_term_l2bd (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = {
.function = arp_term_l2bd,
.name = "arp-term-l2bd",
@@ -464,7 +463,6 @@ VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = {
.format_buffer = format_ethernet_arp_header,
.format_trace = format_arp_term_input_trace,
};
-/* *INDENT-ON* */
clib_error_t *
arp_term_init (vlib_main_t * vm)
diff --git a/src/vnet/l2/l2_bd.c b/src/vnet/l2/l2_bd.c
index 7e6ea60b440..c7392c03b58 100644
--- a/src/vnet/l2/l2_bd.c
+++ b/src/vnet/l2/l2_bd.c
@@ -102,12 +102,10 @@ bd_free_ip_mac_tables (l2_bridge_domain_t * bd)
ip6_address_t *ip6_addr_key;
hash_free (bd->mac_by_ip4);
- /* *INDENT-OFF* */
hash_foreach_mem (ip6_addr_key, mac_addr, bd->mac_by_ip6,
({
clib_mem_free (ip6_addr_key); /* free memory used for ip6 addr key */
}));
- /* *INDENT-ON* */
hash_free (bd->mac_by_ip6);
}
@@ -454,13 +452,11 @@ done:
* Example of how to disable learning (where 200 is the bridge-domain-id):
* @cliexcmd{set bridge-domain learn 200 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_learn_cli, static) = {
.path = "set bridge-domain learn",
.short_help = "set bridge-domain learn <bridge-domain-id> [disable]",
.function = bd_learn,
};
-/* *INDENT-ON* */
static clib_error_t *
bd_default_learn_limit (vlib_main_t *vm, unformat_input_t *input,
@@ -547,13 +543,11 @@ done:
* Example of how to disable forwarding (where 200 is the bridge-domain-id):
* @cliexcmd{set bridge-domain forward 200 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_fwd_cli, static) = {
.path = "set bridge-domain forward",
.short_help = "set bridge-domain forward <bridge-domain-id> [disable]",
.function = bd_fwd,
};
-/* *INDENT-ON* */
/**
Set bridge-domain flood enable/disable.
@@ -612,13 +606,11 @@ done:
* Example of how to disable flooding (where 200 is the bridge-domain-id):
* @cliexcmd{set bridge-domain flood 200 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_flood_cli, static) = {
.path = "set bridge-domain flood",
.short_help = "set bridge-domain flood <bridge-domain-id> [disable]",
.function = bd_flood,
};
-/* *INDENT-ON* */
/**
Set bridge-domain unknown-unicast flood enable/disable.
@@ -677,13 +669,11 @@ done:
* Example of how to disable unknown-unicast flooding (where 200 is the bridge-domain-id):
* @cliexcmd{set bridge-domain uu-flood 200 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_uu_flood_cli, static) = {
.path = "set bridge-domain uu-flood",
.short_help = "set bridge-domain uu-flood <bridge-domain-id> [disable]",
.function = bd_uu_flood,
};
-/* *INDENT-ON* */
/**
Set bridge-domain arp-unicast forward enable/disable.
@@ -742,13 +732,11 @@ done:
* Example of how to disable arp-unicast forwarding (where 200 is the bridge-domain-id):
* @cliexcmd{set bridge-domain arp-ufwd 200 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_arp_ufwd_cli, static) = {
.path = "set bridge-domain arp-ufwd",
.short_help = "set bridge-domain arp-ufwd <bridge-domain-id> [disable]",
.function = bd_arp_ufwd,
};
-/* *INDENT-ON* */
/**
Set bridge-domain arp term enable/disable.
@@ -854,13 +842,11 @@ done:
* Example of how to disable mac aging (where 200 is the bridge-domain-id):
* @cliexcmd{set bridge-domain flood 200 0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_mac_age_cli, static) = {
.path = "set bridge-domain mac-age",
.short_help = "set bridge-domain mac-age <bridge-domain-id> <mins>",
.function = bd_mac_age,
};
-/* *INDENT-ON* */
static clib_error_t *
bd_learn_limit (vlib_main_t *vm, unformat_input_t *input,
@@ -921,13 +907,11 @@ VLIB_CLI_COMMAND (bd_learn_limit_cli, static) = {
* Example of how to disable ARP termination (where 200 is the bridge-domain-id):
* @cliexcmd{set bridge-domain arp term 200 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_arp_term_cli, static) = {
.path = "set bridge-domain arp term",
.short_help = "set bridge-domain arp term <bridge-domain-id> [disable]",
.function = bd_arp_term,
};
-/* *INDENT-ON* */
/**
@@ -1119,13 +1103,11 @@ done:
* Example of how to delete an ARP entry (where 200 is the bridge-domain-id):
* @cliexcmd{set bridge-domain arp entry 200 192.168.72.45 52:54:00:3b:83:1a del}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_arp_entry_cli, static) = {
.path = "set bridge-domain arp entry",
.short_help = "set bridge-domain arp entry <bridge-domain-id> [<ip-addr> <mac-addr> [del] | del-all]",
.function = bd_arp_entry,
};
-/* *INDENT-ON* */
static u8 *
format_uu_cfg (u8 * s, va_list * args)
@@ -1289,7 +1271,6 @@ bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
vlib_cli_output (vm,
"\n IP4/IP6 to MAC table for ARP Termination");
- /* *INDENT-OFF* */
hash_foreach (ip4_addr, mac_addr, bd_config->mac_by_ip4,
({
vlib_cli_output (vm, "%=40U => %=20U",
@@ -1303,7 +1284,6 @@ bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
format_ip6_address, ip6_addr,
format_ethernet_address, &mac_addr);
}));
- /* *INDENT-ON* */
}
if ((detail || bd_tag) && (bd_config->bd_tag))
@@ -1349,13 +1329,11 @@ done:
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_show_cli, static) = {
.path = "show bridge-domain",
.short_help = "show bridge-domain [bridge-domain-id [detail|int|arp|bd-tag]]",
.function = bd_show,
};
-/* *INDENT-ON* */
int
bd_add_del (l2_bridge_domain_add_del_args_t * a)
@@ -1493,8 +1471,15 @@ bd_add_del_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (bd_id == ~0)
{
- error = clib_error_return (0, "bridge-domain-id not specified");
- goto done;
+ if (is_add)
+ {
+ bd_id = bd_get_unused_id ();
+ }
+ else
+ {
+ error = clib_error_return (0, "bridge-domain-id not specified");
+ goto done;
+ }
}
if (bd_id == 0)
@@ -1587,7 +1572,6 @@ done:
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_create_cli, static) = {
.path = "create bridge-domain",
.short_help = "create bridge-domain <bridge-domain-id>"
@@ -1595,9 +1579,38 @@ VLIB_CLI_COMMAND (bd_create_cli, static) = {
" [arp-ufwd <0|1>] [mac-age <nn>] [bd-tag <tag>] [del]",
.function = bd_add_del_command_fn,
};
-/* *INDENT-ON* */
+/*
+ * Returns an unused bridge domain id, and ~0 if it can't find one.
+ */
+u32
+bd_get_unused_id (void)
+{
+ bd_main_t *bdm = &bd_main;
+ int i, j;
+ static u32 seed = 0;
+ /* limit to 1M tries */
+ for (j = 0; j < 1 << 10; j++)
+ {
+ seed = random_u32 (&seed);
+ for (i = 0; i < 1 << 10; i++)
+ {
+ /*
+ * iterate seed+0, seed+1, seed-1, seed+2, seed-2, ... to generate id
+ */
+ seed += (2 * (i % 2) - 1) * i;
+ /* bd_id must be (1 <= bd_id <= L2_BD_ID_MAX) */
+ seed &= L2_BD_ID_MAX;
+ if (seed == 0)
+ continue;
+ if (bd_find_index (bdm, seed) == ~0)
+ return seed;
+ }
+ }
+
+ return ~0;
+}
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2_bd.h b/src/vnet/l2/l2_bd.h
index 0d77292519d..082d210b972 100644
--- a/src/vnet/l2/l2_bd.h
+++ b/src/vnet/l2/l2_bd.h
@@ -2,6 +2,7 @@
* l2_bd.h : layer 2 bridge domain
*
* Copyright (c) 2013 Cisco and/or its affiliates.
+ * Copyright (c) 2022 Nordix Foundation.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -166,7 +167,7 @@ u32 bd_set_flags (vlib_main_t * vm, u32 bd_index, bd_flags_t flags,
void bd_set_mac_age (vlib_main_t * vm, u32 bd_index, u8 age);
void bd_set_learn_limit (vlib_main_t *vm, u32 bd_index, u32 learn_limit);
int bd_add_del (l2_bridge_domain_add_del_args_t * args);
-
+u32 bd_get_unused_id (void);
/**
* \brief Get a bridge domain.
*
diff --git a/src/vnet/l2/l2_bvi.c b/src/vnet/l2/l2_bvi.c
index 9cfff55fb45..e39c4aae39d 100644
--- a/src/vnet/l2/l2_bvi.c
+++ b/src/vnet/l2/l2_bvi.c
@@ -58,14 +58,12 @@ bvi_mac_change (vnet_hw_interface_t * hi,
return (NULL);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (bvi_device_class) = {
.name = "BVI",
.format_device_name = format_bvi_name,
.admin_up_down_function = bvi_admin_up_down,
.mac_addr_change_function = bvi_mac_change,
};
-/* *INDENT-ON* */
/*
* Maintain a bitmap of allocated bvi instance numbers.
@@ -273,13 +271,11 @@ l2_bvi_create_cli (vlib_main_t * vm,
* Example of how to create a bvi interface:
* @cliexcmd{bvi create}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_bvi_create_command, static) = {
.path = "bvi create",
.short_help = "bvi create [mac <mac-addr>] [instance <instance>]",
.function = l2_bvi_create_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
l2_bvi_delete_cli (vlib_main_t * vm,
@@ -324,13 +320,11 @@ l2_bvi_delete_cli (vlib_main_t * vm,
* Example of how to create a bvi interface:
* @cliexcmd{bvi delete bvi0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_bvi_delete_command, static) = {
.path = "bvi delete",
.short_help = "bvi delete <interface>",
.function = l2_bvi_delete_cli,
};
-/* *INDENT-ON* */
/*
diff --git a/src/vnet/l2/l2_classify.h b/src/vnet/l2/l2_classify.h
index 68a2bb98e64..3c86fb5ca86 100644
--- a/src/vnet/l2/l2_classify.h
+++ b/src/vnet/l2/l2_classify.h
@@ -39,7 +39,6 @@ typedef enum
L2_INPUT_CLASSIFY_NEXT_ETHERNET_INPUT,
L2_INPUT_CLASSIFY_NEXT_IP4_INPUT,
L2_INPUT_CLASSIFY_NEXT_IP6_INPUT,
- L2_INPUT_CLASSIFY_NEXT_LI,
L2_INPUT_CLASSIFY_N_NEXT,
} l2_input_classify_next_t;
diff --git a/src/vnet/l2/l2_efp_filter.c b/src/vnet/l2/l2_efp_filter.c
index ad325b83df2..47256ffa5d3 100644
--- a/src/vnet/l2/l2_efp_filter.c
+++ b/src/vnet/l2/l2_efp_filter.c
@@ -461,7 +461,6 @@ VLIB_NODE_FN (l2_efp_filter_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_efp_filter_node) = {
.name = "l2-efp-filter",
.vector_size = sizeof (u32),
@@ -478,7 +477,6 @@ VLIB_REGISTER_NODE (l2_efp_filter_node) = {
[L2_EFP_FILTER_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
clib_error_t *
@@ -559,13 +557,11 @@ done:
* Example of how to disable a Layer 2 efp-filter on a sub-interface:
* @cliexcmd{set interface l2 efp-filter GigabitEthernet0/8/0.200 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_l2_efp_filter_cli, static) = {
.path = "set interface l2 efp-filter",
.short_help = "set interface l2 efp-filter <interface> [disable]",
.function = int_l2_efp_filter,
};
-/* *INDENT-ON* */
#endif /* CLIB_MARCH_VARIANT */
diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c
index d9d6710fd15..3dcd1e7ae26 100644
--- a/src/vnet/l2/l2_fib.c
+++ b/src/vnet/l2/l2_fib.c
@@ -95,8 +95,7 @@ format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args)
if (!swif)
return format (s, "Stale");
- return format (s, "%U", format_vnet_sw_interface_name, vnm,
- vnet_get_sw_interface_or_null (vnm, sw_if_index));
+ return format (s, "%U", format_vnet_sw_if_index_name, vnm, sw_if_index);
}
typedef struct l2fib_dump_walk_ctx_t_
@@ -353,13 +352,11 @@ show_l2fib (vlib_main_t * vm,
* 3 l2fib entries
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_l2fib_cli, static) = {
.path = "show l2fib",
.short_help = "show l2fib [all] | [bd_id <nn> | bd_index <nn>] [learn | add] | [raw]",
.function = show_l2fib,
};
-/* *INDENT-ON* */
void
l2fib_table_init (void)
@@ -416,13 +413,11 @@ clear_l2fib (vlib_main_t * vm,
* no l2fib entries
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_l2fib_cli, static) = {
.path = "clear l2fib",
.short_help = "clear l2fib",
.function = clear_l2fib,
};
-/* *INDENT-ON* */
static l2fib_seq_num_t
l2fib_cur_seq_num (u32 bd_index, u32 sw_if_index)
@@ -593,13 +588,11 @@ done:
* 3 l2fib entries
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2fib_add_cli, static) = {
.path = "l2fib add",
.short_help = "l2fib add <mac> <bridge-domain-id> filter | <intf> [static | bvi]",
.function = l2fib_add,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -724,13 +717,11 @@ l2fib_test_command_fn (vlib_main_t * vm,
* @cliexcmd{test l2fib del mac 52:54:00:53:00:00 count 4}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2fib_test_command, static) = {
.path = "test l2fib",
.short_help = "test l2fib [add|del|check] mac <base-addr> count <nn>",
.function = l2fib_test_command_fn,
};
-/* *INDENT-ON* */
/**
@@ -833,13 +824,11 @@ done:
* Example of how to delete a MAC Address entry from the L2 FIB table of a bridge-domain (where 200 is the bridge-domain-id):
* @cliexcmd{l2fib del 52:54:00:53:18:33 200}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2fib_del_cli, static) = {
.path = "l2fib del",
.short_help = "l2fib del <mac> <bridge-domain-id> []",
.function = l2fib_del,
};
-/* *INDENT-ON* */
static clib_error_t *
l2fib_set_scan_delay (vlib_main_t *vm, unformat_input_t *input,
@@ -977,13 +966,11 @@ l2fib_flush_mac_all (vlib_main_t * vm,
* Example of how to flush MAC Address entries learned on an interface from the L2 FIB table:
* @cliexcmd{l2fib flush-mac interface GigabitEthernet2/1/0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2fib_flush_mac_all_cli, static) = {
.path = "l2fib flush-mac all",
.short_help = "l2fib flush-mac all",
.function = l2fib_flush_mac_all,
};
-/* *INDENT-ON* */
/*?
* This command kick off ager to delete all existing MAC Address entries,
@@ -993,13 +980,11 @@ VLIB_CLI_COMMAND (l2fib_flush_mac_all_cli, static) = {
* Example of how to flush MAC Address entries learned on an interface from the L2 FIB table:
* @cliexcmd{l2fib flush-mac interface GigabitEthernet2/1/0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2fib_flush_mac_int_cli, static) = {
.path = "l2fib flush-mac interface",
.short_help = "l2fib flush-mac interface <if-name>",
.function = l2fib_flush_mac_int,
};
-/* *INDENT-ON* */
/**
Flush bridge-domain MACs except static ones.
@@ -1042,13 +1027,11 @@ done:
* Example of how to flush MAC Address entries learned in a bridge domain from the L2 FIB table:
* @cliexcmd{l2fib flush-mac bridge-domain 1000}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2fib_flush_mac_bd_cli, static) = {
.path = "l2fib flush-mac bridge-domain",
.short_help = "l2fib flush-mac bridge-domain <bd-id>",
.function = l2fib_flush_mac_bd,
};
-/* *INDENT-ON* */
clib_error_t *
l2fib_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
@@ -1149,7 +1132,7 @@ l2fib_scan (vlib_main_t * vm, f64 start_time, u8 event_only)
{
for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
{
- if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL)
+ if (BV (clib_bihash_is_free) (&v->kvp[k]))
continue;
l2fib_entry_key_t key = {.raw = v->kvp[k].key };
@@ -1366,13 +1349,11 @@ l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2fib_mac_age_scanner_process_node) = {
.function = l2fib_mac_age_scanner_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "l2fib-mac-age-scanner-process",
};
-/* *INDENT-ON* */
clib_error_t *
l2fib_init (vlib_main_t * vm)
diff --git a/src/vnet/l2/l2_flood.c b/src/vnet/l2/l2_flood.c
index c0d7bf8dfab..f8cb3cb5687 100644
--- a/src/vnet/l2/l2_flood.c
+++ b/src/vnet/l2/l2_flood.c
@@ -362,7 +362,6 @@ VLIB_NODE_FN (l2flood_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2flood_node) = {
.name = "l2-flood",
.vector_size = sizeof (u32),
@@ -380,7 +379,6 @@ VLIB_REGISTER_NODE (l2flood_node) = {
[L2FLOOD_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
clib_error_t *
@@ -468,13 +466,11 @@ done:
* Example of how to disable flooding:
* @cliexcmd{set interface l2 flood GigabitEthernet0/8/0 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_flood_cli, static) = {
.path = "set interface l2 flood",
.short_help = "set interface l2 flood <interface> [disable]",
.function = int_flood,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2_fwd.c b/src/vnet/l2/l2_fwd.c
index 1ee3a534cd7..503dfc27957 100644
--- a/src/vnet/l2/l2_fwd.c
+++ b/src/vnet/l2/l2_fwd.c
@@ -288,7 +288,6 @@ l2fwd_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
#ifdef COUNTERS
em->counters[node_counter_base_index + L2FWD_ERROR_L2FWD] += 4;
#endif
- /* *INDENT-OFF* */
l2fib_lookup_4 (msm->mac_table, &cached_key, &cached_result,
h0->dst_address, h1->dst_address,
h2->dst_address, h3->dst_address,
@@ -304,7 +303,6 @@ l2fwd_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
&result1,
&result2,
&result3);
- /* *INDENT-ON* */
l2fwd_process (vm, node, msm, em, b[0], sw_if_index0, &result0, next);
l2fwd_process (vm, node, msm, em, b[1], sw_if_index1, &result1,
next + 1);
@@ -414,7 +412,6 @@ VLIB_NODE_FN (l2fwd_node) (vlib_main_t * vm,
return l2fwd_node_inline (vm, node, frame, 0 /* do_trace */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2fwd_node) = {
.name = "l2-fwd",
.vector_size = sizeof (u32),
@@ -432,7 +429,6 @@ VLIB_REGISTER_NODE (l2fwd_node) = {
[L2FWD_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
clib_error_t *
@@ -527,13 +523,11 @@ done:
* Example of how to disable forwarding:
* @cliexcmd{set interface l2 forward GigabitEthernet0/8/0 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_fwd_cli, static) = {
.path = "set interface l2 forward",
.short_help = "set interface l2 forward <interface> [disable]",
.function = int_fwd,
};
-/* *INDENT-ON* */
#endif
diff --git a/src/vnet/l2/l2_in_out_acl.c b/src/vnet/l2/l2_in_out_acl.c
index 7307a6802a2..2e2cb1e7f36 100644
--- a/src/vnet/l2/l2_in_out_acl.c
+++ b/src/vnet/l2/l2_in_out_acl.c
@@ -464,7 +464,6 @@ VLIB_NODE_FN (l2_outacl_node) (vlib_main_t * vm,
IN_OUT_ACL_OUTPUT_TABLE_GROUP);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_inacl_node) = {
.name = "l2-input-acl",
.vector_size = sizeof (u32),
@@ -498,7 +497,6 @@ VLIB_REGISTER_NODE (l2_outacl_node) = {
[ACL_NEXT_INDEX_DENY] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
diff --git a/src/vnet/l2/l2_in_out_feat_arc.c b/src/vnet/l2/l2_in_out_feat_arc.c
index 94c4c1bb713..26fbd3eb776 100644
--- a/src/vnet/l2/l2_in_out_feat_arc.c
+++ b/src/vnet/l2/l2_in_out_feat_arc.c
@@ -396,7 +396,6 @@ vnet_l2_in_out_feat_arc_enable_disable (u32 sw_if_index, int is_output,
}
#endif /* CLIB_MARCH_VARIANT */
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (l2_in_ip4_arc, static) =
{
.arc_name = "l2-input-ip4",
@@ -438,10 +437,8 @@ VNET_FEATURE_ARC_INIT (l2_in_nonip_arc, static) =
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_in_feat_arc_node) = {
.name = "l2-input-feat-arc",
.vector_size = sizeof (u32),
@@ -521,7 +518,6 @@ VNET_FEATURE_INIT (l2_out_nonip_arc_end, static) =
.node_name = "l2-output-feat-arc-end",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
diff --git a/src/vnet/l2/l2_input.c b/src/vnet/l2/l2_input.c
index b09555aa6ed..23bd5cc9958 100644
--- a/src/vnet/l2/l2_input.c
+++ b/src/vnet/l2/l2_input.c
@@ -646,13 +646,11 @@ done:
* Example of how to remove an interface from a Layer2 bridge-domain:
* @cliexcmd{set interface l3 GigabitEthernet0/a/0.200}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_l2_bridge_cli, static) = {
.path = "set interface l2 bridge",
.short_help = "set interface l2 bridge <interface> <bridge-domain-id> [bvi|uu-fwd] [shg]",
.function = int_l2_bridge,
};
-/* *INDENT-ON* */
/**
* Set subinterface in xconnect mode with another interface.
@@ -712,13 +710,11 @@ done:
* @cliexcmd{set interface l3 GigabitEthernet0/8/0.300}
* @cliexcmd{set interface l3 GigabitEthernet0/9/0.300}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_l2_xc_cli, static) = {
.path = "set interface l2 xconnect",
.short_help = "set interface l2 xconnect <interface> <peer interface>",
.function = int_l2_xc,
};
-/* *INDENT-ON* */
/**
* Set subinterface in L3 mode.
@@ -762,13 +758,11 @@ done:
* Example of how to set the mode of an interface to Layer 3:
* @cliexcmd{set interface l3 GigabitEthernet0/8/0.200}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_l3_cli, static) = {
.path = "set interface l3",
.short_help = "set interface l3 <interface>",
.function = int_l3,
};
-/* *INDENT-ON* */
/**
* Show interface mode.
@@ -810,9 +804,7 @@ show_int_mode (vlib_main_t * vm,
/* Gather interfaces. */
sis = vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces));
vec_set_len (sis, 0);
- /* *INDENT-OFF* */
pool_foreach (si, im->sw_interfaces) { vec_add1 (sis, si[0]); }
- /* *INDENT-ON* */
}
vec_foreach (si, sis)
@@ -878,13 +870,11 @@ done:
* l2 bridge GigabitEthernet0/8/0.200 bd_id 200 shg 0
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_l2_mode, static) = {
.path = "show mode",
.short_help = "show mode [<if-name1> <if-name2> ...]",
.function = show_int_mode,
};
-/* *INDENT-ON* */
#define foreach_l2_init_function \
_(feat_bitmap_drop_init) \
diff --git a/src/vnet/l2/l2_input.h b/src/vnet/l2/l2_input.h
index 7d1dc9c1d05..3de1537b45e 100644
--- a/src/vnet/l2/l2_input.h
+++ b/src/vnet/l2/l2_input.h
@@ -27,6 +27,7 @@
#include <vnet/ethernet/packet.h>
#include <vnet/ip/ip4_inlines.h>
#include <vnet/ip/ip6_inlines.h>
+#include <vnet/mpls/mpls_lookup.h>
/* l2 connection type */
typedef enum l2_input_flags_t_
@@ -327,7 +328,7 @@ vnet_update_l2_len (vlib_buffer_t *b)
/*
* Compute flow hash of an ethernet packet, use 5-tuple hash if L3 packet
- * is ip4 or ip6. Otherwise hash on smac/dmac/etype.
+ * is ip4, ip6, or mpls. Otherwise hash on smac/dmac/etype.
* The vlib buffer current pointer is expected to be at ethernet header
* and vnet l2.l2_len is expected to be setup already.
*/
@@ -342,6 +343,9 @@ vnet_l2_compute_flow_hash (vlib_buffer_t * b)
return ip4_compute_flow_hash ((ip4_header_t *) l3h, IP_FLOW_HASH_DEFAULT);
else if (ethertype == ETHERNET_TYPE_IP6)
return ip6_compute_flow_hash ((ip6_header_t *) l3h, IP_FLOW_HASH_DEFAULT);
+ else if (ethertype == ETHERNET_TYPE_MPLS)
+ return mpls_compute_flow_hash ((mpls_unicast_header_t *) l3h,
+ IP_FLOW_HASH_DEFAULT);
else
{
u32 a, b, c;
diff --git a/src/vnet/l2/l2_input_classify.c b/src/vnet/l2/l2_input_classify.c
index d33a0810d28..cc031bd46a5 100644
--- a/src/vnet/l2/l2_input_classify.c
+++ b/src/vnet/l2/l2_input_classify.c
@@ -442,7 +442,6 @@ VLIB_NODE_FN (l2_input_classify_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_input_classify_node) = {
.name = "l2-input-classify",
.vector_size = sizeof (u32),
@@ -462,10 +461,8 @@ VLIB_REGISTER_NODE (l2_input_classify_node) = {
[L2_INPUT_CLASSIFY_NEXT_ETHERNET_INPUT] = "ethernet-input-not-l2",
[L2_INPUT_CLASSIFY_NEXT_IP4_INPUT] = "ip4-input",
[L2_INPUT_CLASSIFY_NEXT_IP6_INPUT] = "ip6-input",
- [L2_INPUT_CLASSIFY_NEXT_LI] = "li-hit",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
/** l2 input classsifier feature initialization. */
@@ -642,7 +639,6 @@ int_l2_input_classify_command_fn (vlib_main_t * vm,
* @todo This is incomplete. This needs a detailed description and a
* practical example.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_l2_input_classify_cli, static) = {
.path = "set interface l2 input classify",
.short_help =
@@ -650,7 +646,6 @@ VLIB_CLI_COMMAND (int_l2_input_classify_cli, static) = {
" [ip6-table <n>] [other-table <n>]",
.function = int_l2_input_classify_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2_input_node.c b/src/vnet/l2/l2_input_node.c
index f8dfa3641b3..76b94809eb3 100644
--- a/src/vnet/l2/l2_input_node.c
+++ b/src/vnet/l2/l2_input_node.c
@@ -251,11 +251,11 @@ l2input_node_inline (vlib_main_t * vm,
/* Prefetch next iteration. */
{
- /* Prefetch the buffer header and packet for the N+2 loop iteration */
- clib_prefetch_store (b + 4);
- clib_prefetch_store (b + 5);
- clib_prefetch_store (b + 6);
- clib_prefetch_store (b + 7);
+ /* Prefetch the buffer header for the N+2 loop iteration */
+ clib_prefetch_store (b[4]);
+ clib_prefetch_store (b[5]);
+ clib_prefetch_store (b[6]);
+ clib_prefetch_store (b[7]);
clib_prefetch_store (b[4]->data);
clib_prefetch_store (b[5]->data);
@@ -365,7 +365,6 @@ VLIB_NODE_FN (l2input_node) (vlib_main_t * vm,
return l2input_node_inline (vm, node, frame, 0 /* do_trace */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2input_node) = {
.name = "l2-input",
.vector_size = sizeof (u32),
@@ -385,7 +384,6 @@ VLIB_REGISTER_NODE (l2input_node) = {
[L2INPUT_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2_input_vtr.c b/src/vnet/l2/l2_input_vtr.c
index 3c1235bfa32..ccf3efa2390 100644
--- a/src/vnet/l2/l2_input_vtr.c
+++ b/src/vnet/l2/l2_input_vtr.c
@@ -319,7 +319,6 @@ VLIB_NODE_FN (l2_invtr_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_invtr_node) = {
.name = "l2-input-vtr",
.vector_size = sizeof (u32),
@@ -336,7 +335,6 @@ VLIB_REGISTER_NODE (l2_invtr_node) = {
[L2_INVTR_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
clib_error_t *
diff --git a/src/vnet/l2/l2_learn.c b/src/vnet/l2/l2_learn.c
index 6d90cee62a7..24b5389e55a 100644
--- a/src/vnet/l2/l2_learn.c
+++ b/src/vnet/l2/l2_learn.c
@@ -439,7 +439,6 @@ VLIB_NODE_FN (l2learn_node) (vlib_main_t * vm,
return l2learn_node_inline (vm, node, frame, 0 /* do_trace */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2learn_node) = {
.name = "l2-learn",
.vector_size = sizeof (u32),
@@ -457,7 +456,6 @@ VLIB_REGISTER_NODE (l2learn_node) = {
[L2LEARN_NEXT_L2FWD] = "l2-fwd",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
clib_error_t *
@@ -540,13 +538,11 @@ done:
* Example of how to disable learning:
* @cliexcmd{set interface l2 learn GigabitEthernet0/8/0 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_learn_cli, static) = {
.path = "set interface l2 learn",
.short_help = "set interface l2 learn <interface> [disable]",
.function = int_learn,
};
-/* *INDENT-ON* */
static clib_error_t *
diff --git a/src/vnet/l2/l2_output.c b/src/vnet/l2/l2_output.c
index 74ca868e535..7c70cf9f4c7 100644
--- a/src/vnet/l2/l2_output.c
+++ b/src/vnet/l2/l2_output.c
@@ -443,7 +443,6 @@ VLIB_NODE_FN (l2output_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2output_node) = {
.name = "l2-output",
.vector_size = sizeof (u32),
@@ -461,7 +460,6 @@ VLIB_REGISTER_NODE (l2output_node) = {
[L2OUTPUT_NEXT_BAD_INTF] = "l2-output-bad-intf",
},
};
-/* *INDENT-ON* */
#define foreach_l2output_bad_intf_error \
@@ -549,7 +547,6 @@ VLIB_NODE_FN (l2output_bad_intf_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2output_bad_intf_node) = {
.name = "l2-output-bad-intf",
.vector_size = sizeof (u32),
@@ -565,7 +562,6 @@ VLIB_REGISTER_NODE (l2output_bad_intf_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
static clib_error_t *
l2output_init (vlib_main_t * vm)
diff --git a/src/vnet/l2/l2_output_classify.c b/src/vnet/l2/l2_output_classify.c
index 97beb37f351..33a7c927386 100644
--- a/src/vnet/l2/l2_output_classify.c
+++ b/src/vnet/l2/l2_output_classify.c
@@ -435,7 +435,6 @@ VLIB_NODE_FN (l2_output_classify_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_output_classify_node) = {
.name = "l2-output-classify",
.vector_size = sizeof (u32),
@@ -454,7 +453,6 @@ VLIB_REGISTER_NODE (l2_output_classify_node) = {
[L2_OUTPUT_CLASSIFY_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
/** l2 output classsifier feature initialization. */
@@ -634,7 +632,6 @@ int_l2_output_classify_command_fn (vlib_main_t * vm,
* @todo This is incomplete. This needs a detailed description and a
* practical example.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_l2_output_classify_cli, static) = {
.path = "set interface l2 output classify",
.short_help =
@@ -642,7 +639,6 @@ VLIB_CLI_COMMAND (int_l2_output_classify_cli, static) = {
" [ip6-table <n>] [other-table <n>]",
.function = int_l2_output_classify_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2_patch.c b/src/vnet/l2/l2_patch.c
index 6de4e50a298..f85938ed799 100644
--- a/src/vnet/l2/l2_patch.c
+++ b/src/vnet/l2/l2_patch.c
@@ -206,7 +206,6 @@ VLIB_NODE_FN (l2_patch_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_patch_node) = {
.name = "l2-patch",
.vector_size = sizeof (u32),
@@ -223,7 +222,6 @@ VLIB_REGISTER_NODE (l2_patch_node) = {
[L2_PATCH_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
extern int
vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index, int is_add);
@@ -270,6 +268,8 @@ vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index, int is_add)
vnet_feature_enable_disable ("device-input", "l2-patch",
rxhi->sw_if_index, 1, 0, 0);
+ vnet_feature_enable_disable ("port-rx-eth", "l2-patch",
+ rxhi->sw_if_index, 1, 0, 0);
}
else
{
@@ -278,6 +278,8 @@ vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index, int is_add)
vnet_feature_enable_disable ("device-input", "l2-patch",
rxhi->sw_if_index, 0, 0, 0);
+ vnet_feature_enable_disable ("port-rx-eth", "l2-patch",
+ rxhi->sw_if_index, 0, 0, 0);
if (vec_len (l2pm->tx_next_by_rx_sw_if_index) > rx_sw_if_index)
{
l2pm->tx_next_by_rx_sw_if_index[rx_sw_if_index] = ~0;
@@ -369,13 +371,11 @@ done:
* @todo This is incomplete. This needs a detailed description and a
* practical example.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_patch_command, static) = {
.path = "test l2patch",
.short_help = "test l2patch rx <intfc> tx <intfc> [del]",
.function = test_patch_command_fn,
};
-/* *INDENT-ON* */
/** Display the contents of the l2patch table. */
static clib_error_t *
@@ -421,13 +421,11 @@ show_l2patch (vlib_main_t * vm,
* @todo This is incomplete. This needs a detailed description and a
* practical example.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_l2patch_cli, static) = {
.path = "show l2patch",
.short_help = "Show l2 interface cross-connect entries",
.function = show_l2patch,
};
-/* *INDENT-ON* */
static clib_error_t *
l2_patch_init (vlib_main_t * vm)
diff --git a/src/vnet/l2/l2_rw.c b/src/vnet/l2/l2_rw.c
index 2c008794c1b..c0e8ec489fc 100644
--- a/src/vnet/l2/l2_rw.c
+++ b/src/vnet/l2/l2_rw.c
@@ -109,6 +109,7 @@ l2_rw_rewrite (l2_rw_entry_t * rwe, u8 * h)
/* FALLTHROUGH */
case 1:
d[0] = (d[0] & ~rwe->mask[0]) | rwe->value[0];
+ rwe->hit_count++;
break;
default:
abort ();
@@ -332,6 +333,7 @@ l2_rw_mod_entry (u32 * index,
return 0;
}
+ e->hit_count = 0;
e->skip_n_vectors = skip / sizeof (u32x4);
skip -= e->skip_n_vectors * sizeof (u32x4);
e->rewrite_n_vectors = (skip + len - 1) / sizeof (u32x4) + 1;
@@ -398,17 +400,19 @@ l2_rw_entry_cli_fn (vlib_main_t * vm,
* the provisioned mask and value, modifies the packet header.
*
* @cliexpar
- * @todo This is incomplete. This needs a detailed description and a
- * practical example.
+ * Example of how to add an l2 rewrite entry to change the destination mac of
+ * the packet to 00:8a:00:0d:0e:02 (where parameter mask is Ethernet header's
+mask,
+ * parameter value is Ethernet header's value):
+ * @cliexcmd{l2 rewrite entry mask ffffffffffff00000000000000000000 value
+008a000d0e0200000000000000000000}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_rw_entry_cli, static) = {
.path = "l2 rewrite entry",
.short_help =
"l2 rewrite entry [index <index>] [mask <hex-mask>] [value <hex-value>] [skip <n_bytes>] [del]",
.function = l2_rw_entry_cli_fn,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
int
@@ -468,21 +472,36 @@ l2_rw_interface_cli_fn (vlib_main_t * vm,
}
/*?
- * Layer 2-Rewrite node uses classify tables to match packets. Then, using
- * the provisioned mask and value, modifies the packet header.
+ * Apply the rule to the interface. The following example shows how to use
+classify
+ * entry and Layer 2-Rewrite entry to modify the packet ethernet header on the
+ * interface.
*
* @cliexpar
- * @todo This is incomplete. This needs a detailed description and a
- * practical example.
+ * Example use the classify to filter packets that do not need to be modified
+(where
+ * 192.168.68.34 is the destination ip of the data packet, 8080 is the
+destination port
+ * of the packet):
+ * @cliexcmd{classify table mask l3 ip4 dst l4 dst_port}
+ * @cliexcmd{classify session acl-hit-next permit table-index 0 match l3 ip4
+dst 192.168.68.34 l4 dst_port 8080}
+ *
+ * @cliexpar
+ * Example apply classify and l2 rewrite rules to the interface (where
+YusurK2Eth6/0/1/3
+ * is interface, \"table 0\" means Table Id is 0, \"miss 0\" means the packet
+that matches
+ * the classify. miss will be modified according to the l2 rewrite entry with
+index 0):
+ * @cliexcmd{set interface l2 rewrite YusurK2Eth6/0/1/3 table 0 miss-index 0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_rw_interface_cli, static) = {
.path = "set interface l2 rewrite",
.short_help =
"set interface l2 rewrite <interface> [table <table index>] [miss-index <entry-index>]",
.function = l2_rw_interface_cli_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
l2_rw_show_interfaces_cli_fn (vlib_main_t * vm,
@@ -494,30 +513,27 @@ l2_rw_show_interfaces_cli_fn (vlib_main_t * vm,
vlib_cli_output (vm, "No interface is currently using l2 rewrite\n");
uword i;
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, rw->configs_bitmap) {
vlib_cli_output (vm, "sw_if_index:%d %U\n", i, format_l2_rw_config, &rw->configs[i]);
}
- /* *INDENT-ON* */
return 0;
}
/*?
- * Layer 2-Rewrite node uses classify tables to match packets. Then, using
- * the provisioned mask and value, modifies the packet header.
+ * This command displays the l2 rewrite entries of the interfaces.
*
* @cliexpar
- * @todo This is incomplete. This needs a detailed description and a
- * practical example.
+ * Example of how to display the l2 rewrite rules on the interface:
+ * @cliexstart{show l2 rewrite interfaces}
+ * sw_if_index:4 table-index:0 miss-index:0
+ * @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_rw_show_interfaces_cli, static) = {
.path = "show l2 rewrite interfaces",
.short_help =
"show l2 rewrite interfaces",
.function = l2_rw_show_interfaces_cli_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
l2_rw_show_entries_cli_fn (vlib_main_t * vm,
@@ -528,30 +544,29 @@ l2_rw_show_entries_cli_fn (vlib_main_t * vm,
if (pool_elts (rw->entries) == 0)
vlib_cli_output (vm, "No entries\n");
- /* *INDENT-OFF* */
pool_foreach (e, rw->entries) {
vlib_cli_output (vm, "%U\n", format_l2_rw_entry, e);
}
- /* *INDENT-ON* */
return 0;
}
/*?
- * Layer 2-Rewrite node uses classify tables to match packets. Then, using
- * the provisioned mask and value, modifies the packet header.
+ * This command displays all l2 rewrite entries.
*
* @cliexpar
- * @todo This is incomplete. This needs a detailed description and a
- * practical example.
+ * Example of how to display all l2 rewrite entries:
+ * @cliexstart{show l2 rewrite entries}
+ * 0 - mask:ffffffffffff00000000000000000000
+value:aabbccddeeff00000000000000000000
+ * hits:0 skip_bytes:0
+ * @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_rw_show_entries_cli, static) = {
.path = "show l2 rewrite entries",
.short_help =
"show l2 rewrite entries",
.function = l2_rw_show_entries_cli_fn,
};
-/* *INDENT-ON* */
static int
l2_rw_enable_disable (u32 bridge_domain, u8 disable)
@@ -587,21 +602,22 @@ l2_rw_set_cli_fn (vlib_main_t * vm,
}
/*?
- * Layer 2-Rewrite node uses classify tables to match packets. Then, using
- * the provisioned mask and value, modifies the packet header.
+ * Layer 2 rewrite can be enabled and disabled on each interface and on each
+bridge-domain.
+ * Use this command to manage l2 rewrite on bridge-domain.
*
* @cliexpar
- * @todo This is incomplete. This needs a detailed description and a
- * practical example.
+ * Example of how to enable rewrite (where 100 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain rewrite 100}
+ * Example of how to disable rewrite (where 100 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain rewrite 100 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_rw_set_cli, static) = {
.path = "set bridge-domain rewrite",
.short_help =
"set bridge-domain rewrite <bridge-domain> [disable]",
.function = l2_rw_set_cli_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
l2_rw_init (vlib_main_t * vm)
@@ -643,7 +659,6 @@ static char *l2_rw_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_rw_node) = {
.name = "l2-rw",
.vector_size = sizeof (u32),
@@ -655,7 +670,6 @@ VLIB_REGISTER_NODE (l2_rw_node) = {
.n_next_nodes = L2_RW_N_NEXT,
.next_nodes = { [L2_RW_NEXT_DROP] = "error-drop"},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2_rw.h b/src/vnet/l2/l2_rw.h
index f9b10333f43..6d12a21fe55 100644
--- a/src/vnet/l2/l2_rw.h
+++ b/src/vnet/l2/l2_rw.h
@@ -27,7 +27,6 @@
#include <vnet/l2/l2_input.h>
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct _l2_rw_entry {
u16 skip_n_vectors;
u16 rewrite_n_vectors;
@@ -35,15 +34,12 @@ typedef CLIB_PACKED(struct _l2_rw_entry {
u32x4 *mask;
u32x4 *value;
}) l2_rw_entry_t;
-/* *INDENT-ON* */
/* l2_rw configuration for one interface */
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct _l2_rw_config {
u32 table_index; /* Which classify table to use */
u32 miss_index; /* Rewrite entry to use if table does not match */
}) l2_rw_config_t;
-/* *INDENT-ON* */
typedef struct
{
diff --git a/src/vnet/l2/l2_test.c b/src/vnet/l2/l2_test.c
index 3be4a46223d..b78e388a9f1 100644
--- a/src/vnet/l2/l2_test.c
+++ b/src/vnet/l2/l2_test.c
@@ -1,5 +1,6 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright(c) 2021 Cisco Systems, Inc.
+ * Copyright(c) 2022 Nordix Foundation.
*/
#include <vat/vat.h>
@@ -634,6 +635,18 @@ done:
return ret;
}
+static int
+api_bridge_domain_add_del_v2 (vat_main_t *vam)
+{
+ return -1;
+}
+
+static void
+vl_api_bridge_domain_add_del_v2_reply_t_handler (
+ vl_api_bridge_domain_add_del_v2_reply_t *mp)
+{
+}
+
#define foreach_pbb_vtr_op \
_ ("disable", L2_VTR_DISABLED) \
_ ("pop", L2_VTR_POP_2) \
diff --git a/src/vnet/l2/l2_uu_fwd.c b/src/vnet/l2/l2_uu_fwd.c
index fb3571d159c..4a510b658d7 100644
--- a/src/vnet/l2/l2_uu_fwd.c
+++ b/src/vnet/l2/l2_uu_fwd.c
@@ -211,7 +211,6 @@ VLIB_NODE_FN (l2_uu_fwd_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_uu_fwd_node) = {
.name = "l2-uu-fwd",
.vector_size = sizeof (u32),
@@ -228,7 +227,6 @@ VLIB_REGISTER_NODE (l2_uu_fwd_node) = {
[L2_UU_FWD_NEXT_L2_OUTPUT] = "l2-output",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2_vtr.c b/src/vnet/l2/l2_vtr.c
index bfd1dcb9280..4053c0fc1cb 100644
--- a/src/vnet/l2/l2_vtr.c
+++ b/src/vnet/l2/l2_vtr.c
@@ -670,13 +670,11 @@ done:
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_l2_vtr_cli, static) = {
.path = "set interface l2 tag-rewrite",
.short_help = "set interface l2 tag-rewrite <interface> [disable | pop {1|2} | push {dot1q|dot1ad} <tag> <tag>]",
.function = int_l2_vtr,
};
-/* *INDENT-ON* */
/**
* Get pbb tag rewrite on the given interface.
@@ -816,13 +814,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_l2_pbb_vtr_cli, static) = {
.path = "set interface l2 pbb-tag-rewrite",
.short_help = "set interface l2 pbb-tag-rewrite <interface> [disable | pop | push | translate_pbb_stag <outer_tag> dmac <address> smac <address> s_id <nn> [b_vlanid <nn>]]",
.function = int_l2_pbb_vtr,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2_xcrw.c b/src/vnet/l2/l2_xcrw.c
index d848fac6b72..9edd8b6ba57 100644
--- a/src/vnet/l2/l2_xcrw.c
+++ b/src/vnet/l2/l2_xcrw.c
@@ -238,7 +238,6 @@ VLIB_NODE_FN (l2_xcrw_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_xcrw_node) = {
.name = "l2-xcrw",
.vector_size = sizeof (u32),
@@ -255,7 +254,6 @@ VLIB_REGISTER_NODE (l2_xcrw_node) = {
[L2_XCRW_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
clib_error_t *
@@ -279,12 +277,10 @@ format_xcrw_name (u8 * s, va_list * args)
return format (s, "xcrw%d", dev_instance);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (xcrw_device_class,static) = {
.name = "Xcrw",
.format_device_name = format_xcrw_name,
};
-/* *INDENT-ON* */
/* Create a sham tunnel interface and return its sw_if_index */
static u32
@@ -496,7 +492,6 @@ done:
* @todo This is incomplete. This needs a detailed description and a
* practical example.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_l2_xcrw_command, static) = {
.path = "set interface l2 xcrw",
.short_help =
@@ -504,7 +499,6 @@ VLIB_CLI_COMMAND (set_l2_xcrw_command, static) = {
" [del] [tx-fib-id <id>] [ipv6] rw <hex-bytes>",
.function = set_l2_xcrw_command_fn,
};
-/* *INDENT-ON* */
#endif /* CLIB_MARCH_VARIANT */
@@ -568,12 +562,10 @@ show_l2xcrw_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "%U", format_l2xcrw, 0, 0);
- /* *INDENT-OFF* */
pool_foreach (t, xcm->tunnels)
{
vlib_cli_output (vm, "%U", format_l2xcrw, vnm, t);
}
- /* *INDENT-ON* */
return 0;
}
@@ -585,13 +577,11 @@ show_l2xcrw_command_fn (vlib_main_t * vm,
* @todo This is incomplete. This needs a detailed description and a
* practical example.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_l2xcrw_command, static) = {
.path = "show l2xcrw",
.short_help = "show l2xcrw",
.function = show_l2xcrw_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/lawful-intercept/lawful_intercept.c b/src/vnet/lawful-intercept/lawful_intercept.c
deleted file mode 100644
index fff44fc3a67..00000000000
--- a/src/vnet/lawful-intercept/lawful_intercept.c
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vnet/lawful-intercept/lawful_intercept.h>
-
-li_main_t li_main;
-
-static clib_error_t *
-set_li_command_fn (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- li_main_t *lm = &li_main;
- ip4_address_t collector;
- u8 collector_set = 0;
- ip4_address_t src;
- u8 src_set = 0;
- u32 tmp;
- u16 udp_port = 0;
- u8 is_add = 1;
- int i;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "collector %U", unformat_ip4_address, &collector))
- collector_set = 1;
- if (unformat (input, "src %U", unformat_ip4_address, &src))
- src_set = 1;
- else if (unformat (input, "udp-port %d", &tmp))
- udp_port = tmp;
- else if (unformat (input, "del"))
- is_add = 0;
- else
- break;
- }
-
- if (collector_set == 0)
- return clib_error_return (0, "collector must be set...");
- if (src_set == 0)
- return clib_error_return (0, "src must be set...");
- if (udp_port == 0)
- return clib_error_return (0, "udp-port must be set...");
-
- if (is_add == 1)
- {
- for (i = 0; i < vec_len (lm->collectors); i++)
- {
- if (lm->collectors[i].as_u32 == collector.as_u32)
- {
- if (lm->ports[i] == udp_port)
- return clib_error_return (
- 0, "collector %U:%d already configured", format_ip4_address,
- &collector, udp_port);
- else
- return clib_error_return (
- 0, "collector %U already configured with port %d",
- format_ip4_address, &collector, (int) (lm->ports[i]));
- }
- }
- vec_add1 (lm->collectors, collector);
- vec_add1 (lm->ports, udp_port);
- vec_add1 (lm->src_addrs, src);
- return 0;
- }
- else
- {
- for (i = 0; i < vec_len (lm->collectors); i++)
- {
- if ((lm->collectors[i].as_u32 == collector.as_u32)
- && lm->ports[i] == udp_port)
- {
- vec_delete (lm->collectors, 1, i);
- vec_delete (lm->ports, 1, i);
- vec_delete (lm->src_addrs, 1, i);
- return 0;
- }
- }
- return clib_error_return (0, "collector %U:%d not configured",
- &collector, udp_port);
- }
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (set_li_command, static) = {
- .path = "set li",
- .short_help =
- "set li src <ip4-address> collector <ip4-address> udp-port <nnnn>",
- .function = set_li_command_fn,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-li_init (vlib_main_t * vm)
-{
- li_main_t *lm = &li_main;
-
- lm->vlib_main = vm;
- lm->vnet_main = vnet_get_main ();
- lm->hit_node_index = li_hit_node.index;
- return 0;
-}
-
-VLIB_INIT_FUNCTION (li_init);
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/lawful-intercept/lawful_intercept.h b/src/vnet/lawful-intercept/lawful_intercept.h
deleted file mode 100644
index e39fa0d0752..00000000000
--- a/src/vnet/lawful-intercept/lawful_intercept.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __lawful_intercept_h__
-#define __lawful_intercept_h__
-
-#include <vnet/vnet.h>
-#include <vnet/ip/ip.h>
-
-typedef struct
-{
- /* LI collector info */
- ip4_address_t *src_addrs;
- ip4_address_t *collectors;
- u16 *ports;
-
- /* Hit node index */
- u32 hit_node_index;
-
- /* convenience */
- vlib_main_t *vlib_main;
- vnet_main_t *vnet_main;
-} li_main_t;
-
-extern li_main_t li_main;
-
-/* *INDENT-OFF* */
-typedef CLIB_PACKED(struct {
- ip4_header_t ip4;
- udp_header_t udp;
-}) ip4_udp_header_t;
-/* *INDENT-ON* */
-
-extern vlib_node_registration_t li_hit_node;
-
-#endif /* __lawful_intercept_h__ */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/lawful-intercept/node.c b/src/vnet/lawful-intercept/node.c
deleted file mode 100644
index c5328e672d0..00000000000
--- a/src/vnet/lawful-intercept/node.c
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vlib/vlib.h>
-#include <vnet/vnet.h>
-#include <vppinfra/error.h>
-
-#include <vnet/lawful-intercept/lawful_intercept.h>
-
-#include <vppinfra/error.h>
-#include <vppinfra/elog.h>
-
-extern vlib_node_registration_t li_hit_node;
-
-typedef struct
-{
- u32 next_index;
-} li_hit_trace_t;
-
-/* packet trace format function */
-static u8 *
-format_li_hit_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- li_hit_trace_t *t = va_arg (*args, li_hit_trace_t *);
-
- s = format (s, "LI_HIT: next index %d", t->next_index);
-
- return s;
-}
-
-#define foreach_li_hit_error \
-_(HITS, "LI packets processed") \
-_(NO_COLLECTOR, "No collector configured") \
-_(BUFFER_ALLOCATION_FAILURE, "Buffer allocation failure")
-
-typedef enum
-{
-#define _(sym,str) LI_HIT_ERROR_##sym,
- foreach_li_hit_error
-#undef _
- LI_HIT_N_ERROR,
-} li_hit_error_t;
-
-static char *li_hit_error_strings[] = {
-#define _(sym,string) string,
- foreach_li_hit_error
-#undef _
-};
-
-typedef enum
-{
- LI_HIT_NEXT_ETHERNET,
- LI_HIT_N_NEXT,
-} li_hit_next_t;
-
-VLIB_NODE_FN (li_hit_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame)
-{
- u32 n_left_from, *from, *to_next;
- li_hit_next_t next_index;
- vlib_frame_t *int_frame = 0;
- u32 *to_int_next = 0;
- li_main_t *lm = &li_main;
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
-
- if (PREDICT_FALSE (vec_len (lm->collectors) == 0))
- {
- vlib_node_increment_counter (vm, li_hit_node.index,
- LI_HIT_ERROR_NO_COLLECTOR, n_left_from);
- }
- else
- {
- /* The intercept frame... */
- int_frame = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
- to_int_next = vlib_frame_vector_args (int_frame);
- }
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-#if 0
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- u32 next0 = LI_HIT_NEXT_INTERFACE_OUTPUT;
- u32 next1 = LI_HIT_NEXT_INTERFACE_OUTPUT;
- u32 sw_if_index0, sw_if_index1;
- u8 tmp0[6], tmp1[6];
- ethernet_header_t *en0, *en1;
- u32 bi0, bi1;
- vlib_buffer_t *b0, *b1;
-
- /* Prefetch next iteration. */
- {
- vlib_buffer_t *p2, *p3;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
-
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
-
- clib_prefetch_store (p2->data);
- clib_prefetch_store (p3->data);
- }
-
- /* speculatively enqueue b0 and b1 to the current next frame */
- to_next[0] = bi0 = from[0];
- to_next[1] = bi1 = from[1];
- from += 2;
- to_next += 2;
- n_left_from -= 2;
- n_left_to_next -= 2;
-
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
-
- /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
- ASSERT (b0->current_data == 0);
- ASSERT (b1->current_data == 0);
-
- en0 = vlib_buffer_get_current (b0);
- en1 = vlib_buffer_get_current (b1);
-
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
- sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
-
- /* Send pkt back out the RX interface */
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
- vnet_buffer (b1)->sw_if_index[VLIB_TX] = sw_if_index1;
-
- /* $$$$$ End of processing 2 x packets $$$$$ */
-
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
- {
- if (b0->flags & VLIB_BUFFER_IS_TRACED)
- {
- li_hit_trace_t *t =
- vlib_add_trace (vm, node, b0, sizeof (*t));
- t->sw_if_index = sw_if_index0;
- t->next_index = next0;
- }
- if (b1->flags & VLIB_BUFFER_IS_TRACED)
- {
- li_hit_trace_t *t =
- vlib_add_trace (vm, node, b1, sizeof (*t));
- t->sw_if_index = sw_if_index1;
- t->next_index = next1;
- }
- }
-
- /* verify speculative enqueues, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
-#endif /* $$$ dual-loop off */
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0;
- vlib_buffer_t *b0;
- vlib_buffer_t *c0;
- ip4_udp_header_t *iu0;
- ip4_header_t *ip0;
- udp_header_t *udp0;
- u32 next0 = LI_HIT_NEXT_ETHERNET;
-
- /* speculatively enqueue b0 to the current next frame */
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- if (PREDICT_TRUE (to_int_next != 0))
- {
- /* Make an intercept copy. This can fail. */
- c0 = vlib_buffer_copy (vm, b0);
-
- if (PREDICT_FALSE (c0 == 0))
- {
- vlib_node_increment_counter
- (vm, node->node_index,
- LI_HIT_ERROR_BUFFER_ALLOCATION_FAILURE, 1);
- goto skip;
- }
-
- vlib_buffer_advance (c0, -sizeof (*iu0));
-
- iu0 = vlib_buffer_get_current (c0);
- ip0 = &iu0->ip4;
-
- ip0->ip_version_and_header_length = 0x45;
- ip0->ttl = 254;
- ip0->protocol = IP_PROTOCOL_UDP;
-
- ip0->src_address.as_u32 = lm->src_addrs[0].as_u32;
- ip0->dst_address.as_u32 = lm->collectors[0].as_u32;
- ip0->length = vlib_buffer_length_in_chain (vm, c0);
- ip0->checksum = ip4_header_checksum (ip0);
-
- udp0 = &iu0->udp;
- udp0->src_port = udp0->dst_port =
- clib_host_to_net_u16 (lm->ports[0]);
- udp0->checksum = 0;
- udp0->length =
- clib_net_to_host_u16 (vlib_buffer_length_in_chain (vm, b0));
-
- to_int_next[0] = vlib_get_buffer_index (vm, c0);
- to_int_next++;
- }
-
- skip:
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- li_hit_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
- t->next_index = next0;
- }
-
- /* verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- if (int_frame)
- {
- int_frame->n_vectors = frame->n_vectors;
- vlib_put_frame_to_node (vm, ip4_lookup_node.index, int_frame);
- }
-
- vlib_node_increment_counter (vm, li_hit_node.index,
- LI_HIT_ERROR_HITS, frame->n_vectors);
- return frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (li_hit_node) = {
- .name = "li-hit",
- .vector_size = sizeof (u32),
- .format_trace = format_li_hit_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(li_hit_error_strings),
- .error_strings = li_hit_error_strings,
-
- .n_next_nodes = LI_HIT_N_NEXT,
-
- /* edit / add dispositions here */
- .next_nodes = {
- [LI_HIT_NEXT_ETHERNET] = "ethernet-input-not-l2",
- },
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/llc/llc.c b/src/vnet/llc/llc.c
index 4a7fdf9d9ba..4cbf17d48df 100644
--- a/src/vnet/llc/llc.c
+++ b/src/vnet/llc/llc.c
@@ -181,14 +181,12 @@ llc_build_rewrite (vnet_main_t * vnm,
return (rewrite);
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (llc_hw_interface_class) = {
.name = "LLC",
.format_header = format_llc_header_with_length,
.unformat_header = unformat_llc_header,
.build_rewrite = llc_build_rewrite,
};
-/* *INDENT-ON* */
static void
add_protocol (llc_main_t * pm, llc_protocol_t protocol, char *protocol_name)
diff --git a/src/vnet/llc/node.c b/src/vnet/llc/node.c
index 086925bd305..d1ee6948269 100644
--- a/src/vnet/llc/node.c
+++ b/src/vnet/llc/node.c
@@ -246,7 +246,6 @@ static char *llc_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (llc_input_node) = {
.function = llc_input,
.name = "llc-input",
@@ -267,7 +266,6 @@ VLIB_REGISTER_NODE (llc_input_node) = {
.format_trace = format_llc_input_trace,
.unformat_buffer = unformat_llc_header,
};
-/* *INDENT-ON* */
static void
llc_setup_node (vlib_main_t *vm, u32 node_index)
diff --git a/src/vnet/mfib/mfib_forward.c b/src/vnet/mfib/mfib_forward.c
index affedb0ef00..3befce041bb 100644
--- a/src/vnet/mfib/mfib_forward.c
+++ b/src/vnet/mfib/mfib_forward.c
@@ -74,7 +74,7 @@ mfib_forward_lookup_trace (vlib_main_t * vm,
t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
t0->entry_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
t0->fib_index = vec_elt (im->mfib_index_by_sw_if_index,
- vnet_buffer(b1)->sw_if_index[VLIB_RX]);
+ vnet_buffer(b0)->sw_if_index[VLIB_RX]);
}
if (b1->flags & VLIB_BUFFER_IS_TRACED)
{
diff --git a/src/vnet/mfib/mfib_itf.c b/src/vnet/mfib/mfib_itf.c
index b323d3e4a96..e65a6d733cf 100644
--- a/src/vnet/mfib/mfib_itf.c
+++ b/src/vnet/mfib/mfib_itf.c
@@ -206,10 +206,8 @@ format_mfib_itf (u8 * s, va_list * args)
if (~0 != mfib_itf->mfi_sw_if_index)
{
return (format(s, " %U: %U",
- format_vnet_sw_interface_name,
- vnm,
- vnet_get_sw_interface(vnm,
- mfib_itf->mfi_sw_if_index),
+ format_vnet_sw_if_index_name,
+ vnm, mfib_itf->mfi_sw_if_index,
format_mfib_itf_flags, mfib_itf->mfi_flags));
}
else
diff --git a/src/vnet/mfib/mfib_types.c b/src/vnet/mfib/mfib_types.c
index 19583ea18f4..755f656a7b2 100644
--- a/src/vnet/mfib/mfib_types.c
+++ b/src/vnet/mfib/mfib_types.c
@@ -253,7 +253,6 @@ mfib_show_route_flags (vlib_main_t * vm,
/*?
* This command displays the set of supported flags applicable to an MFIB route
*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (mfib_route_flags_command, static) =
{
.path = "show mfib route flags",
@@ -261,7 +260,6 @@ VLIB_CLI_COMMAND (mfib_route_flags_command, static) =
.function = mfib_show_route_flags,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
clib_error_t *
mfib_show_itf_flags (vlib_main_t * vm,
@@ -282,7 +280,6 @@ mfib_show_itf_flags (vlib_main_t * vm,
/*?
* This command displays the set of supported flags applicable to an MFIB interface
*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (mfib_itf_flags_command, static) =
{
.path = "show mfib itf flags",
@@ -290,4 +287,3 @@ VLIB_CLI_COMMAND (mfib_itf_flags_command, static) =
.function = mfib_show_itf_flags,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
diff --git a/src/vnet/misc.c b/src/vnet/misc.c
index 18d4651cff3..ea816615a50 100644
--- a/src/vnet/misc.c
+++ b/src/vnet/misc.c
@@ -56,18 +56,14 @@ vnet_local_interface_tx (vlib_main_t * vm,
return f->n_vectors;
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (vnet_local_interface_device_class) = {
.name = "local",
.tx_function = vnet_local_interface_tx,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (vnet_local_interface_hw_class,static) = {
.name = "local",
};
-/* *INDENT-ON* */
clib_error_t *
vnet_main_init (vlib_main_t * vm)
@@ -86,10 +82,12 @@ vnet_main_init (vlib_main_t * vm)
vnm->local_interface_hw_if_index = hw_if_index;
vnm->local_interface_sw_if_index = hw->sw_if_index;
+ vnm->pcap.current_filter_function =
+ vlib_is_packet_traced_default_function ();
+
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (vnet_main_init)=
{
.init_order = VLIB_INITS("vnet_interface_init",
@@ -102,7 +100,6 @@ VLIB_INIT_FUNCTION (vnet_main_init)=
"mpls_init",
"vnet_main_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/mpls/interface.c b/src/vnet/mpls/interface.c
index 5e80b9d0532..fd654dca891 100644
--- a/src/vnet/mpls/interface.c
+++ b/src/vnet/mpls/interface.c
@@ -22,6 +22,14 @@
#include <vnet/adj/adj_midchain.h>
#include <vnet/dpo/classify_dpo.h>
+typedef struct
+{
+ mpls_interface_state_change_function_t *function;
+ uword function_opaque;
+} mpls_interface_state_change_callback_t;
+
+/** Functions to call when interface becomes MPLS enabled/disabled. */
+static mpls_interface_state_change_callback_t *state_change_callbacks;
u8
mpls_sw_interface_is_enabled (u32 sw_if_index)
@@ -34,6 +42,17 @@ mpls_sw_interface_is_enabled (u32 sw_if_index)
return (mm->mpls_enabled_by_sw_if_index[sw_if_index]);
}
+void
+mpls_interface_state_change_add_callback (
+ mpls_interface_state_change_function_t *function, uword opaque)
+{
+ mpls_interface_state_change_callback_t cb = {
+ .function = function,
+ .function_opaque = opaque,
+ };
+ vec_add1 (state_change_callbacks, cb);
+}
+
int
mpls_sw_interface_enable_disable (mpls_main_t *mm, u32 sw_if_index,
u8 is_enable)
@@ -81,6 +100,12 @@ mpls_sw_interface_enable_disable (mpls_main_t *mm, u32 sw_if_index,
else if (hi->l3_if_count)
hi->l3_if_count--;
+ {
+ mpls_interface_state_change_callback_t *cb;
+ vec_foreach (cb, state_change_callbacks)
+ cb->function (mm, cb->function_opaque, sw_if_index, is_enable);
+ }
+
return (0);
}
diff --git a/src/vnet/mpls/mpls.api b/src/vnet/mpls/mpls.api
index 4c164bb2bf2..5d775dafdfc 100644
--- a/src/vnet/mpls/mpls.api
+++ b/src/vnet/mpls/mpls.api
@@ -92,6 +92,26 @@ define mpls_tunnel_details
vl_api_mpls_tunnel_t mt_tunnel;
};
+/** \brief Dump mpls enabled interface(s)
+ @param client_index - opaque cookie to identify the sender
+ @param sw_if_index - sw_if_index of a specific interface, or -1 (default)
+ to return all MPLS enabled interfaces
+*/
+define mpls_interface_dump
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index [default=0xffffffff];
+};
+
+/** \brief mpls enabled interface details
+*/
+define mpls_interface_details
+{
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+};
+
/** \brief MPLS Route Add / del route
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/vnet/mpls/mpls.c b/src/vnet/mpls/mpls.c
index 0d01010feea..7d922b003cc 100644
--- a/src/vnet/mpls/mpls.c
+++ b/src/vnet/mpls/mpls.c
@@ -370,7 +370,13 @@ done:
VLIB_CLI_COMMAND (mpls_local_label_command, static) = {
.path = "mpls local-label",
.function = vnet_mpls_local_label,
- .short_help = "mpls local-label [add|del] <label-value> [eos|non-eos] via [next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight <value>] [preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-attached] [rx-ip4 <interface>] [out-labels <value value value>]",
+ .short_help =
+ "mpls local-label [add|del] <label-value> [eos|non-eos] via "
+ "[next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight "
+ "<value>] [preference <value>] [udp-encap-id <value>] "
+ "[ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] "
+ "[mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-attached] "
+ "[rx-ip4|rx-ip6 <interface>] [out-labels <value value value>]",
};
clib_error_t *
@@ -430,13 +436,11 @@ vnet_mpls_table_cmd (vlib_main_t * vm,
return error;
}
-/* *INDENT-ON* */
/*?
* This command is used to add or delete MPLS Tables. All
* Tables must be explicitly added before that can be used,
* Including the default table.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (mpls_table_command, static) = {
.path = "mpls table",
.short_help = "mpls table [add|del] <table-id>",
diff --git a/src/vnet/mpls/mpls.h b/src/vnet/mpls/mpls.h
index 885901f89a4..6baaaad95ba 100644
--- a/src/vnet/mpls/mpls.h
+++ b/src/vnet/mpls/mpls.h
@@ -23,14 +23,18 @@
#include <vnet/fib/fib_node.h>
#include <vnet/adj/adj.h>
+struct mpls_main_t;
+
/**
* @brief Definition of a callback for receiving MPLS interface state change
* notifications
*/
-typedef void (*mpls_interface_state_change_callback_t) (u32 sw_if_index,
- u32 is_enable);
+typedef void (mpls_interface_state_change_function_t) (struct mpls_main_t *mm,
+ uword opaque,
+ u32 sw_if_index,
+ u32 is_enable);
-typedef struct
+typedef struct mpls_main_t
{
/* MPLS FIB index for each software interface */
u32 *fib_index_by_sw_if_index;
@@ -77,11 +81,14 @@ unformat_function_t unformat_mpls_unicast_label;
unformat_function_t unformat_mpls_header;
unformat_function_t unformat_pg_mpls_header;
+u8 mpls_sw_interface_is_enabled (u32 sw_if_index);
+
+void mpls_interface_state_change_add_callback (
+ mpls_interface_state_change_function_t *function, uword opaque);
+
int mpls_sw_interface_enable_disable (mpls_main_t *mm, u32 sw_if_index,
u8 is_enable);
-u8 mpls_sw_interface_is_enabled (u32 sw_if_index);
-
int mpls_dest_cmp (void *a1, void *a2);
int mpls_fib_index_cmp (void *a1, void *a2);
diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c
index fac52827e1d..58998a6576c 100644
--- a/src/vnet/mpls/mpls_api.c
+++ b/src/vnet/mpls/mpls_api.c
@@ -199,12 +199,10 @@ vl_api_mpls_route_add_del_t_handler (vl_api_mpls_route_add_del_t * mp)
rv = mpls_route_add_del_t_handler (vnm, mp, &stats_index);
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_MPLS_ROUTE_ADD_DEL_REPLY,
({
rmp->stats_index = htonl (stats_index);
}));
- /* *INDENT-ON* */
}
void
@@ -270,13 +268,11 @@ vl_api_mpls_tunnel_add_del_t_handler (vl_api_mpls_tunnel_add_del_t * mp)
vec_free (rpaths);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_MPLS_TUNNEL_ADD_DEL_REPLY,
({
rmp->sw_if_index = ntohl(tunnel_sw_if_index);
rmp->tunnel_index = ntohl(tunnel_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -401,12 +397,58 @@ vl_api_mpls_table_dump_t_handler (vl_api_mpls_table_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (fib_table, mm->fibs)
{
send_mpls_table_details(am, reg, mp->context, fib_table);
}
- /* *INDENT-ON* */
+}
+
+static void
+send_mpls_interface_details (vpe_api_main_t *am, vl_api_registration_t *reg,
+ u32 context, const u32 sw_if_index)
+{
+ vl_api_mpls_interface_details_t *mp;
+
+ mp = vl_msg_api_alloc_zero (sizeof (*mp));
+ mp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_MPLS_INTERFACE_DETAILS);
+ mp->context = context;
+
+ mp->sw_if_index = htonl (sw_if_index);
+ vl_api_send_msg (reg, (u8 *) mp);
+}
+
+static void
+vl_api_mpls_interface_dump_t_handler (vl_api_mpls_interface_dump_t *mp)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ vl_api_registration_t *reg;
+ vnet_interface_main_t *im = &vnet_main.interface_main;
+ vnet_sw_interface_t *si;
+ u32 sw_if_index = ~0;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ if (sw_if_index == ~0)
+ {
+ pool_foreach (si, im->sw_interfaces)
+ {
+ if (mpls_sw_interface_is_enabled (si->sw_if_index))
+ {
+ send_mpls_interface_details (am, reg, mp->context,
+ si->sw_if_index);
+ }
+ }
+ }
+ else
+ {
+ if (mpls_sw_interface_is_enabled (sw_if_index))
+ {
+ send_mpls_interface_details (am, reg, mp->context, sw_if_index);
+ }
+ }
}
static void
diff --git a/src/vnet/mpls/mpls_input.c b/src/vnet/mpls/mpls_input.c
index c18cbda6315..0505d9a1829 100644
--- a/src/vnet/mpls/mpls_input.c
+++ b/src/vnet/mpls/mpls_input.c
@@ -278,10 +278,8 @@ static clib_error_t * mpls_input_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (mpls_input_init) =
{
.runs_after = VLIB_INITS("mpls_init"),
};
-/* *INDENT-ON* */
#endif /* CLIB_MARCH_VARIANT */
diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c
index db423392c03..a5ac56534a5 100644
--- a/src/vnet/mpls/mpls_lookup.c
+++ b/src/vnet/mpls/mpls_lookup.c
@@ -44,13 +44,13 @@ format_mpls_lookup_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
mpls_lookup_trace_t * t = va_arg (*args, mpls_lookup_trace_t *);
- s = format (s, "MPLS: next [%d], lookup fib index %d, LB index %d hash %x "
- "label %d eos %d",
- t->next_index, t->lfib_index, t->lb_index, t->hash,
- vnet_mpls_uc_get_label(
- clib_net_to_host_u32(t->label_net_byte_order)),
- vnet_mpls_uc_get_s(
- clib_net_to_host_u32(t->label_net_byte_order)));
+ s = format (
+ s,
+ "MPLS: next [%d], lookup fib index %d, LB index %d hash 0x%08x "
+ "label %d eos %d",
+ t->next_index, t->lfib_index, t->lb_index, t->hash,
+ vnet_mpls_uc_get_label (clib_net_to_host_u32 (t->label_net_byte_order)),
+ vnet_mpls_uc_get_s (clib_net_to_host_u32 (t->label_net_byte_order)));
return s;
}
@@ -482,8 +482,8 @@ format_mpls_load_balance_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
mpls_load_balance_trace_t * t = va_arg (*args, mpls_load_balance_trace_t *);
- s = format (s, "MPLS: next [%d], LB index %d hash %d",
- t->next_index, t->lb_index, t->hash);
+ s = format (s, "MPLS: next [%d], LB index %d hash 0x%08x", t->next_index,
+ t->lb_index, t->hash);
return s;
}
@@ -553,75 +553,77 @@ VLIB_NODE_FN (mpls_load_balance_node) (vlib_main_t * vm,
* We don't want to use the same hash value at each level in the recursion
* graph as that would lead to polarisation
*/
- hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
- hc1 = vnet_buffer (p1)->ip.flow_hash = 0;
-
- if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
- {
- if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash))
- {
- hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1;
- }
- else
- {
- hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0);
- }
- dpo0 = load_balance_get_fwd_bucket(lb0, (hc0 & lb0->lb_n_buckets_minus_1));
- }
- else
- {
- dpo0 = load_balance_get_bucket_i (lb0, 0);
- }
- if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
- {
- if (PREDICT_TRUE (vnet_buffer(p1)->ip.flow_hash))
- {
- hc1 = vnet_buffer(p1)->ip.flow_hash = vnet_buffer(p1)->ip.flow_hash >> 1;
- }
- else
- {
- hc1 = vnet_buffer(p1)->ip.flow_hash = mpls_compute_flow_hash(mpls1, hc1);
- }
- dpo1 = load_balance_get_fwd_bucket(lb1, (hc1 & lb1->lb_n_buckets_minus_1));
- }
- else
- {
- dpo1 = load_balance_get_bucket_i (lb1, 0);
- }
-
- next0 = dpo0->dpoi_next_node;
- next1 = dpo1->dpoi_next_node;
-
- vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
- vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
-
- vlib_increment_combined_counter
- (cm, thread_index, lbi0, 1,
- vlib_buffer_length_in_chain (vm, p0));
- vlib_increment_combined_counter
- (cm, thread_index, lbi1, 1,
- vlib_buffer_length_in_chain (vm, p1));
-
- if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
- {
- mpls_load_balance_trace_t *tr = vlib_add_trace (vm, node,
- p0, sizeof (*tr));
- tr->next_index = next0;
- tr->lb_index = lbi0;
- tr->hash = hc0;
- }
- if (PREDICT_FALSE(p1->flags & VLIB_BUFFER_IS_TRACED))
- {
- mpls_load_balance_trace_t *tr = vlib_add_trace (vm, node,
- p1, sizeof (*tr));
- tr->next_index = next1;
- tr->lb_index = lbi1;
- tr->hash = hc1;
- }
-
- vlib_validate_buffer_enqueue_x2 (vm, node, next,
- to_next, n_left_to_next,
- pi0, pi1, next0, next1);
+ hc0 = hc1 = 0;
+
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ vnet_buffer (p0)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ mpls_compute_flow_hash (mpls0, lb0->lb_hash_config);
+ }
+ dpo0 = load_balance_get_fwd_bucket (
+ lb0, (hc0 & lb0->lb_n_buckets_minus_1));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+ if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
+ {
+ hc1 = vnet_buffer (p1)->ip.flow_hash =
+ vnet_buffer (p1)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc1 = vnet_buffer (p1)->ip.flow_hash =
+ mpls_compute_flow_hash (mpls1, lb1->lb_hash_config);
+ }
+ dpo1 = load_balance_get_fwd_bucket (
+ lb1, (hc1 & lb1->lb_n_buckets_minus_1));
+ }
+ else
+ {
+ dpo1 = load_balance_get_bucket_i (lb1, 0);
+ }
+
+ next0 = dpo0->dpoi_next_node;
+ next1 = dpo1->dpoi_next_node;
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+
+ vlib_increment_combined_counter (
+ cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+ vlib_increment_combined_counter (
+ cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_load_balance_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->lb_index = lbi0;
+ tr->hash = hc0;
+ }
+ if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_load_balance_trace_t *tr =
+ vlib_add_trace (vm, node, p1, sizeof (*tr));
+ tr->next_index = next1;
+ tr->lb_index = lbi1;
+ tr->hash = hc1;
+ }
+
+ vlib_validate_buffer_enqueue_x2 (
+ vm, node, next, to_next, n_left_to_next, pi0, pi1, next0, next1);
}
while (n_left_from > 0 && n_left_to_next > 0)
@@ -646,44 +648,45 @@ VLIB_NODE_FN (mpls_load_balance_node) (vlib_main_t * vm,
lb0 = load_balance_get(lbi0);
- hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
- if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
- {
- if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash))
- {
- hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1;
- }
- else
- {
- hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0);
- }
- dpo0 = load_balance_get_fwd_bucket(lb0, (hc0 & lb0->lb_n_buckets_minus_1));
- }
- else
- {
- dpo0 = load_balance_get_bucket_i (lb0, 0);
- }
-
- next0 = dpo0->dpoi_next_node;
- vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
-
- if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
- {
- mpls_load_balance_trace_t *tr = vlib_add_trace (vm, node,
- p0, sizeof (*tr));
- tr->next_index = next0;
- tr->lb_index = lbi0;
- tr->hash = hc0;
- }
-
- vlib_increment_combined_counter
- (cm, thread_index, lbi0, 1,
- vlib_buffer_length_in_chain (vm, p0));
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next,
- to_next, n_left_to_next,
- pi0, next0);
- }
+ hc0 = 0;
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ vnet_buffer (p0)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ mpls_compute_flow_hash (mpls0, lb0->lb_hash_config);
+ }
+ dpo0 = load_balance_get_fwd_bucket (
+ lb0, (hc0 & lb0->lb_n_buckets_minus_1));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_load_balance_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->lb_index = lbi0;
+ tr->hash = hc0;
+ }
+
+ vlib_increment_combined_counter (
+ cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next, to_next,
+ n_left_to_next, pi0, next0);
+ }
vlib_put_next_frame (vm, node, next, n_left_to_next);
}
diff --git a/src/vnet/mpls/mpls_tunnel.c b/src/vnet/mpls/mpls_tunnel.c
index 5f7bf8c3b25..b03a4a57f68 100644
--- a/src/vnet/mpls/mpls_tunnel.c
+++ b/src/vnet/mpls/mpls_tunnel.c
@@ -932,7 +932,12 @@ done:
VLIB_CLI_COMMAND (create_mpls_tunnel_command, static) = {
.path = "mpls tunnel",
.short_help =
- "mpls tunnel [multicast] [l2-only] via [next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight <value>] [preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-connected] [rx-ip4 <interface>] [out-labels <value value value>]",
+ "mpls tunnel [multicast] [l2-only] via [next-hop-address] "
+ "[next-hop-interface] [next-hop-table <value>] [weight <value>] "
+ "[preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table "
+ "<value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] "
+ "[resolve-via-host] [resolve-via-connected] [rx-ip4|rx-ip6 <interface>] "
+ "[out-labels <value value value>]",
.function = vnet_create_mpls_tunnel_command_fn,
};
diff --git a/src/vnet/osi/node.c b/src/vnet/osi/node.c
index 4eb3e461139..9edc354cda7 100644
--- a/src/vnet/osi/node.c
+++ b/src/vnet/osi/node.c
@@ -239,7 +239,6 @@ static char *osi_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (osi_input_node) = {
.function = osi_input,
.name = "osi-input",
@@ -260,7 +259,6 @@ VLIB_REGISTER_NODE (osi_input_node) = {
.format_trace = format_osi_input_trace,
.unformat_buffer = unformat_osi_header,
};
-/* *INDENT-ON* */
static void
osi_setup_node (vlib_main_t *vm, u32 node_index)
diff --git a/src/vnet/pg/cli.c b/src/vnet/pg/cli.c
index ac225094391..3f2de2604b2 100644
--- a/src/vnet/pg/cli.c
+++ b/src/vnet/pg/cli.c
@@ -47,12 +47,10 @@
/* Root of all packet generator cli commands. */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_pg_command, static) = {
.path = "packet-generator",
.short_help = "Packet generator commands",
};
-/* *INDENT-ON* */
void
pg_enable_disable (u32 stream_index, int is_enable)
@@ -63,11 +61,9 @@ pg_enable_disable (u32 stream_index, int is_enable)
if (stream_index == ~0)
{
/* No stream specified: enable/disable all streams. */
- /* *INDENT-OFF* */
pool_foreach (s, pg->streams) {
pg_stream_enable_disable (pg, s, is_enable);
}
- /* *INDENT-ON* */
}
else
{
@@ -138,23 +134,19 @@ doit:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (enable_streams_cli, static) = {
.path = "packet-generator enable-stream",
.short_help = "Enable packet generator streams",
.function = enable_disable_stream,
.function_arg = 1, /* is_enable */
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (disable_streams_cli, static) = {
.path = "packet-generator disable-stream",
.short_help = "Disable packet generator streams",
.function = enable_disable_stream,
.function_arg = 0, /* is_enable */
};
-/* *INDENT-ON* */
static u8 *
format_pg_edit_group (u8 * s, va_list * va)
@@ -210,12 +202,10 @@ format_pg_stream (u8 * s, va_list * va)
if (verbose)
{
pg_edit_group_t *g;
- /* *INDENT-OFF* */
vec_foreach (g, t->edit_groups)
{
s = format (s, "\n%U%U", format_white_space, indent, format_pg_edit_group, g);
}
- /* *INDENT-ON* */
}
return s;
@@ -244,23 +234,19 @@ show_streams (vlib_main_t * vm,
}
vlib_cli_output (vm, "%U", format_pg_stream, 0, 0);
- /* *INDENT-OFF* */
pool_foreach (s, pg->streams) {
vlib_cli_output (vm, "%U", format_pg_stream, s, verbose);
}
- /* *INDENT-ON* */
done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_streams_cli, static) = {
.path = "show packet-generator ",
.short_help = "show packet-generator [verbose]",
.function = show_streams,
};
-/* *INDENT-ON* */
static clib_error_t *
pg_pcap_read (pg_stream_t * s, char *file_name)
@@ -505,7 +491,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (new_stream_cli, static) = {
.path = "packet-generator new",
.function = new_stream,
@@ -523,7 +508,6 @@ VLIB_CLI_COMMAND (new_stream_cli, static) = {
"rate PPS rate to transfer packet data\n"
"maxframe NPKTS maximum number of packets per frame\n",
};
-/* *INDENT-ON* */
static clib_error_t *
del_stream (vlib_main_t * vm,
@@ -541,13 +525,11 @@ del_stream (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (del_stream_cli, static) = {
.path = "packet-generator delete",
.function = del_stream,
.short_help = "Delete stream with given name",
};
-/* *INDENT-ON* */
static clib_error_t *
change_stream_parameters (vlib_main_t * vm,
@@ -588,13 +570,11 @@ change_stream_parameters (vlib_main_t * vm,
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (change_stream_parameters_cli, static) = {
.path = "packet-generator configure",
.short_help = "Change packet generator stream parameters",
.function = change_stream_parameters,
};
-/* *INDENT-ON* */
static clib_error_t *
pg_capture_cmd_fn (vlib_main_t * vm,
@@ -671,13 +651,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (pg_capture_cmd, static) = {
.path = "packet-generator capture",
.short_help = "packet-generator capture <interface name> pcap <filename> [count <n>]",
.function = pg_capture_cmd_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
create_pg_if_cmd_fn (vlib_main_t * vm,
@@ -685,7 +663,7 @@ create_pg_if_cmd_fn (vlib_main_t * vm,
{
pg_main_t *pg = &pg_main;
unformat_input_t _line_input, *line_input = &_line_input;
- u32 if_id, gso_enabled = 0, gso_size = 0, coalesce_enabled = 0;
+ u32 if_id = ~0, gso_enabled = 0, gso_size = 0, coalesce_enabled = 0;
clib_error_t *error = NULL;
pg_interface_mode_t mode = PG_MODE_ETHERNET;
@@ -730,7 +708,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_pg_if_cmd, static) = {
.path = "create packet-generator",
.short_help = "create packet-generator interface <interface name>"
@@ -738,7 +715,6 @@ VLIB_CLI_COMMAND (create_pg_if_cmd, static) = {
" [mode <ethernet | ip4 | ip6>]",
.function = create_pg_if_cmd_fn,
};
-/* *INDENT-ON* */
/* Dummy init function so that we can be linked in. */
static clib_error_t *
diff --git a/src/vnet/pg/input.c b/src/vnet/pg/input.c
index 6f38ed0869a..321472c4d85 100644
--- a/src/vnet/pg/input.c
+++ b/src/vnet/pg/input.c
@@ -1578,7 +1578,7 @@ fill_buffer_offload_flags (vlib_main_t *vm, u32 *buffers, u32 n_buffers,
(VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
- if (buffer_oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
+ if (buffer_oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM || gso_enabled)
oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM;
}
else if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP6))
@@ -1596,7 +1596,7 @@ fill_buffer_offload_flags (vlib_main_t *vm, u32 *buffers, u32 n_buffers,
if (l4_proto == IP_PROTOCOL_TCP)
{
- if (buffer_oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
+ if (buffer_oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM || gso_enabled)
oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
/* only set GSO flag for chained buffers */
@@ -1639,8 +1639,8 @@ pg_generate_packets (vlib_node_runtime_t * node,
pg_interface_t *pi;
int i;
- pi = pool_elt_at_index (pg->interfaces,
- pg->if_id_by_sw_if_index[s->sw_if_index[VLIB_RX]]);
+ pi = pool_elt_at_index (
+ pg->interfaces, pg->if_index_by_sw_if_index[s->sw_if_index[VLIB_RX]]);
bi0 = s->buffer_indices;
n_packets_in_fifo = pg_stream_fill (pg, s, n_packets_to_generate);
@@ -1816,17 +1816,14 @@ pg_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
if (vlib_num_workers ())
worker_index = vlib_get_current_worker_index ();
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, pg->enabled_streams[worker_index]) {
pg_stream_t *s = vec_elt_at_index (pg->streams, i);
n_packets += pg_input_stream (node, pg, s);
}
- /* *INDENT-ON* */
return n_packets;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (pg_input_node) = {
.function = pg_input,
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
@@ -1839,7 +1836,6 @@ VLIB_REGISTER_NODE (pg_input_node) = {
/* Input node will be left disabled until a stream is active. */
.state = VLIB_NODE_STATE_DISABLED,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (pg_input_mac_filter) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1864,9 +1860,9 @@ VLIB_NODE_FN (pg_input_mac_filter) (vlib_main_t * vm,
pg_interface_t *pi;
mac_address_t in;
- pi = pool_elt_at_index
- (pg->interfaces,
- pg->if_id_by_sw_if_index[vnet_buffer (b[0])->sw_if_index[VLIB_RX]]);
+ pi = pool_elt_at_index (
+ pg->interfaces,
+ pg->if_index_by_sw_if_index[vnet_buffer (b[0])->sw_if_index[VLIB_RX]]);
eth = vlib_buffer_get_current (b[0]);
mac_address_from_bytes (&in, eth->dst_address);
@@ -1898,7 +1894,6 @@ VLIB_NODE_FN (pg_input_mac_filter) (vlib_main_t * vm,
return (frame->n_vectors);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (pg_input_mac_filter) = {
.name = "pg-input-mac-filter",
.vector_size = sizeof (u32),
@@ -1912,7 +1907,6 @@ VNET_FEATURE_INIT (pg_input_mac_filter_feat, static) = {
.arc_name = "device-input",
.node_name = "pg-input-mac-filter",
};
-/* *INDENT-ON* */
static clib_error_t *
pg_input_mac_filter_cfg (vlib_main_t * vm,
@@ -1950,13 +1944,11 @@ pg_input_mac_filter_cfg (vlib_main_t * vm,
return NULL;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (enable_streams_cli, static) = {
.path = "packet-generator mac-filter",
.short_help = "packet-generator mac-filter <INTERFACE> <on|off>",
.function = pg_input_mac_filter_cfg,
};
-/* *INDENT-ON* */
/*
diff --git a/src/vnet/pg/pg.api b/src/vnet/pg/pg.api
index 3630e0c2f0d..4f531fb1f5e 100644
--- a/src/vnet/pg/pg.api
+++ b/src/vnet/pg/pg.api
@@ -38,6 +38,8 @@ enum pg_interface_mode : u8
*/
define pg_create_interface
{
+ option deprecated;
+
u32 client_index;
u32 context;
vl_api_interface_index_t interface_id;
@@ -60,6 +62,8 @@ define pg_create_interface_v2
*/
define pg_create_interface_reply
{
+ option deprecated;
+
u32 context;
i32 retval;
vl_api_interface_index_t sw_if_index;
diff --git a/src/vnet/pg/pg.h b/src/vnet/pg/pg.h
index e69ee6458e7..6d5b25ba25a 100644
--- a/src/vnet/pg/pg.h
+++ b/src/vnet/pg/pg.h
@@ -349,7 +349,7 @@ typedef struct pg_main_t
/* Pool of interfaces. */
pg_interface_t *interfaces;
uword *if_index_by_if_id;
- uword *if_id_by_sw_if_index;
+ uword *if_index_by_sw_if_index;
/* Vector of buffer indices for use in pg_stream_fill_replay, per thread */
u32 **replay_buffers_by_thread;
@@ -383,7 +383,7 @@ void pg_interface_enable_disable_coalesce (pg_interface_t * pi, u8 enable,
u32 tx_node_index);
/* Find/create free packet-generator interface index. */
-u32 pg_interface_add_or_get (pg_main_t *pg, uword stream_index, u8 gso_enabled,
+u32 pg_interface_add_or_get (pg_main_t *pg, u32 stream_index, u8 gso_enabled,
u32 gso_size, u8 coalesce_enabled,
pg_interface_mode_t mode);
diff --git a/src/vnet/pg/pg_api.c b/src/vnet/pg/pg_api.c
index 468c88ee8bb..e5d0a08a527 100644
--- a/src/vnet/pg/pg_api.c
+++ b/src/vnet/pg/pg_api.c
@@ -40,12 +40,10 @@ vl_api_pg_create_interface_t_handler (vl_api_pg_create_interface_t * mp)
ntohl (mp->gso_size), 0, PG_MODE_ETHERNET);
pg_interface_t *pi = pool_elt_at_index (pg->interfaces, pg_if_id);
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_PG_CREATE_INTERFACE_REPLY,
({
rmp->sw_if_index = ntohl(pi->sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
diff --git a/src/vnet/pg/stream.c b/src/vnet/pg/stream.c
index 112cc09ae97..cf3d37d5e9e 100644
--- a/src/vnet/pg/stream.c
+++ b/src/vnet/pg/stream.c
@@ -171,7 +171,6 @@ pg_add_del_mac_address (vnet_hw_interface_t * hi,
return (NULL);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (pg_dev_class) = {
.name = "pg",
.tx_function = pg_output,
@@ -180,7 +179,6 @@ VNET_DEVICE_CLASS (pg_dev_class) = {
.admin_up_down_function = pg_interface_admin_up_down,
.mac_addr_add_del_function = pg_add_del_mac_address,
};
-/* *INDENT-ON* */
static u8 *
pg_build_rewrite (vnet_main_t * vnm,
@@ -197,12 +195,10 @@ pg_build_rewrite (vnet_main_t * vnm,
return (rewrite);
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (pg_interface_class,static) = {
.name = "Packet generator",
.build_rewrite = pg_build_rewrite,
};
-/* *INDENT-ON* */
static u32
pg_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags)
@@ -249,7 +245,7 @@ VNET_HW_INTERFACE_CLASS (pg_tun_hw_interface_class) = {
};
u32
-pg_interface_add_or_get (pg_main_t *pg, uword if_id, u8 gso_enabled,
+pg_interface_add_or_get (pg_main_t *pg, u32 if_id, u8 gso_enabled,
u32 gso_size, u8 coalesce_enabled,
pg_interface_mode_t mode)
{
@@ -315,8 +311,8 @@ pg_interface_add_or_get (pg_main_t *pg, uword if_id, u8 gso_enabled,
hash_set (pg->if_index_by_if_id, if_id, i);
- vec_validate (pg->if_id_by_sw_if_index, hi->sw_if_index);
- pg->if_id_by_sw_if_index[hi->sw_if_index] = i;
+ vec_validate (pg->if_index_by_sw_if_index, hi->sw_if_index);
+ pg->if_index_by_sw_if_index[hi->sw_if_index] = i;
if (vlib_num_workers ())
{
@@ -560,6 +556,11 @@ pg_stream_add (pg_main_t * pg, pg_stream_t * s_init)
*/
s->sw_if_index[VLIB_RX] = pi->sw_if_index;
}
+ else if (vec_len (pg->if_index_by_sw_if_index) <= s->sw_if_index[VLIB_RX])
+ {
+ vec_validate (pg->if_index_by_sw_if_index, s->sw_if_index[VLIB_RX]);
+ pg->if_index_by_sw_if_index[s->sw_if_index[VLIB_RX]] = s->pg_if_index;
+ }
/* Connect the graph. */
s->next_index = vlib_node_add_next (vm, device_input_node.index,
diff --git a/src/vnet/policer/node_funcs.c b/src/vnet/policer/node_funcs.c
index efa2f830f8c..2d2252d247a 100644
--- a/src/vnet/policer/node_funcs.c
+++ b/src/vnet/policer/node_funcs.c
@@ -670,7 +670,6 @@ VLIB_NODE_FN (ip4_policer_classify_node) (vlib_main_t * vm,
POLICER_CLASSIFY_TABLE_IP4);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_policer_classify_node) = {
.name = "ip4-policer-classify",
.vector_size = sizeof (u32),
@@ -682,7 +681,6 @@ VLIB_REGISTER_NODE (ip4_policer_classify_node) = {
[POLICER_CLASSIFY_NEXT_INDEX_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip6_policer_classify_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -692,7 +690,6 @@ VLIB_NODE_FN (ip6_policer_classify_node) (vlib_main_t * vm,
POLICER_CLASSIFY_TABLE_IP6);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_policer_classify_node) = {
.name = "ip6-policer-classify",
.vector_size = sizeof (u32),
@@ -704,7 +701,6 @@ VLIB_REGISTER_NODE (ip6_policer_classify_node) = {
[POLICER_CLASSIFY_NEXT_INDEX_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (l2_policer_classify_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -713,7 +709,6 @@ VLIB_NODE_FN (l2_policer_classify_node) (vlib_main_t * vm,
return policer_classify_inline (vm, node, frame, POLICER_CLASSIFY_TABLE_L2);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_policer_classify_node) = {
.name = "l2-policer-classify",
.vector_size = sizeof (u32),
@@ -725,7 +720,6 @@ VLIB_REGISTER_NODE (l2_policer_classify_node) = {
[POLICER_CLASSIFY_NEXT_INDEX_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
static clib_error_t *
diff --git a/src/vnet/policer/police.h b/src/vnet/policer/police.h
index 5ad249ef40e..8f126e22175 100644
--- a/src/vnet/policer/police.h
+++ b/src/vnet/policer/police.h
@@ -73,8 +73,6 @@ typedef enum
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- u32 lock; // for exclusive access to the struct
-
u32 single_rate; // 1 = single rate policer, 0 = two rate policer
u32 color_aware; // for hierarchical policing
u32 scale; // power-of-2 shift amount for lower rates
@@ -93,11 +91,9 @@ typedef struct
u32 current_bucket; // MOD
u32 extended_limit;
u32 extended_bucket; // MOD
-
- u64 last_update_time; // MOD
u32 thread_index; // Tie policer to a thread, rather than lock
- u32 pad32;
-
+ u64 last_update_time; // MOD
+ u8 *name;
} policer_t;
STATIC_ASSERT_SIZEOF (policer_t, CLIB_CACHE_LINE_BYTES);
diff --git a/src/vnet/policer/police_inlines.h b/src/vnet/policer/police_inlines.h
index 6b0c0ecf725..08000b9a303 100644
--- a/src/vnet/policer/police_inlines.h
+++ b/src/vnet/policer/police_inlines.h
@@ -123,7 +123,7 @@ policer_handoff (vlib_main_t *vm, vlib_node_runtime_t *node,
u32 n_enq, n_left_from, *from;
vnet_policer_main_t *pm;
policer_t *policer;
- u32 this_thread, policer_thread;
+ u32 this_thread, policer_thread = 0;
bool single_policer_node = (policer_index != ~0);
pm = &vnet_policer_main;
diff --git a/src/vnet/policer/policer.api b/src/vnet/policer/policer.api
index f4bf9384f10..a5a60b35c6b 100644
--- a/src/vnet/policer/policer.api
+++ b/src/vnet/policer/policer.api
@@ -13,7 +13,7 @@
* limitations under the License.
*/
-option version = "2.0.0";
+option version = "3.0.0";
import "vnet/interface_types.api";
import "vnet/policer/policer_types.api";
@@ -35,6 +35,16 @@ autoreply define policer_bind
bool bind_enable;
};
+autoreply define policer_bind_v2
+{
+ u32 client_index;
+ u32 context;
+
+ u32 policer_index;
+ u32 worker_index;
+ bool bind_enable;
+};
+
/** \brief policer input: Apply policer as an input feature.
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -52,6 +62,16 @@ autoreply define policer_input
bool apply;
};
+autoreply define policer_input_v2
+{
+ u32 client_index;
+ u32 context;
+
+ u32 policer_index;
+ vl_api_interface_index_t sw_if_index;
+ bool apply;
+};
+
/** \brief policer output: Apply policer as an output feature.
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -69,6 +89,16 @@ autoreply define policer_output
bool apply;
};
+autoreply define policer_output_v2
+{
+ u32 client_index;
+ u32 context;
+
+ u32 policer_index;
+ vl_api_interface_index_t sw_if_index;
+ bool apply;
+};
+
/** \brief Add/del policer
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -106,6 +136,40 @@ define policer_add_del
vl_api_sse2_qos_action_t violate_action;
};
+define policer_add
+{
+ u32 client_index;
+ u32 context;
+
+ string name[64];
+ vl_api_policer_config_t infos;
+};
+
+autoreply define policer_del
+{
+ u32 client_index;
+ u32 context;
+
+ u32 policer_index;
+};
+
+autoreply define policer_update
+{
+ u32 client_index;
+ u32 context;
+
+ u32 policer_index;
+ vl_api_policer_config_t infos;
+};
+
+autoreply define policer_reset
+{
+ u32 client_index;
+ u32 context;
+
+ u32 policer_index;
+};
+
/** \brief Add/del policer response
@param context - sender context, to match reply w/ request
@param retval - return value for request
@@ -118,6 +182,13 @@ define policer_add_del_reply
u32 policer_index;
};
+define policer_add_reply
+{
+ u32 context;
+ i32 retval;
+ u32 policer_index;
+};
+
/** \brief Get list of policers
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -133,6 +204,23 @@ define policer_dump
string match_name[64];
};
+/** \brief Get list of policers
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param policer_index - index of policer in the pool, ~0 to request all
+*/
+define policer_dump_v2
+{
+ u32 client_index;
+ u32 context;
+
+ u32 policer_index;
+};
+
+service {
+ rpc policer_dump_v2 returns stream policer_details;
+};
+
/** \brief Policer operational state response.
@param context - sender context, to match reply w/ request
@param name - policer name
diff --git a/src/vnet/policer/policer.c b/src/vnet/policer/policer.c
index 0513563e1ec..eb7d40a340a 100644
--- a/src/vnet/policer/policer.c
+++ b/src/vnet/policer/policer.c
@@ -49,105 +49,161 @@ vlib_combined_counter_main_t policer_counters[] = {
},
};
-clib_error_t *
-policer_add_del (vlib_main_t *vm, u8 *name, qos_pol_cfg_params_st *cfg,
- u32 *policer_index, u8 is_add)
+int
+policer_add (vlib_main_t *vm, const u8 *name, const qos_pol_cfg_params_st *cfg,
+ u32 *policer_index)
{
vnet_policer_main_t *pm = &vnet_policer_main;
policer_t test_policer;
policer_t *policer;
+ policer_t *pp;
+ qos_pol_cfg_params_st *cp;
uword *p;
u32 pi;
int rv;
+ int i;
p = hash_get_mem (pm->policer_config_by_name, name);
- if (is_add == 0)
- {
- /* free policer config and template */
- if (p == 0)
- {
- vec_free (name);
- return clib_error_return (0, "No such policer configuration");
- }
- pool_put_index (pm->configs, p[0]);
- pool_put_index (pm->policer_templates, p[0]);
- hash_unset_mem (pm->policer_config_by_name, name);
+ if (p != NULL)
+ return VNET_API_ERROR_VALUE_EXIST;
- /* free policer */
- p = hash_get_mem (pm->policer_index_by_name, name);
- if (p == 0)
- {
- vec_free (name);
- return clib_error_return (0, "No such policer");
- }
- pool_put_index (pm->policers, p[0]);
- hash_unset_mem (pm->policer_index_by_name, name);
+ /* Vet the configuration before adding it to the table */
+ rv = pol_logical_2_physical (cfg, &test_policer);
- vec_free (name);
- return 0;
- }
+ if (rv != 0)
+ return VNET_API_ERROR_INVALID_VALUE;
- if (p != 0)
+ pool_get (pm->configs, cp);
+ pool_get_aligned (pm->policers, policer, CLIB_CACHE_LINE_BYTES);
+
+ clib_memcpy (cp, cfg, sizeof (*cp));
+ clib_memcpy (policer, &test_policer, sizeof (*pp));
+
+ policer->name = format (0, "%s%c", name, 0);
+ pi = policer - pm->policers;
+
+ hash_set_mem (pm->policer_config_by_name, policer->name, cp - pm->configs);
+ hash_set_mem (pm->policer_index_by_name, policer->name, pi);
+ *policer_index = pi;
+ policer->thread_index = ~0;
+
+ for (i = 0; i < NUM_POLICE_RESULTS; i++)
{
- vec_free (name);
- return clib_error_return (0, "Policer already exists");
+ vlib_validate_combined_counter (&policer_counters[i], pi);
+ vlib_zero_combined_counter (&policer_counters[i], pi);
}
- /* Vet the configuration before adding it to the table */
- rv = pol_logical_2_physical (cfg, &test_policer);
+ return 0;
+}
+
+int
+policer_del (vlib_main_t *vm, u32 policer_index)
+{
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ policer_t *policer;
+ uword *p;
+
+ if (pool_is_free_index (pm->policers, policer_index))
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ policer = &pm->policers[policer_index];
+
+ p = hash_get_mem (pm->policer_config_by_name, policer->name);
- if (rv == 0)
+ /* free policer config */
+ if (p != NULL)
{
- policer_t *pp;
- qos_pol_cfg_params_st *cp;
- int i;
+ pool_put_index (pm->configs, p[0]);
+ hash_unset_mem (pm->policer_config_by_name, policer->name);
+ }
- pool_get (pm->configs, cp);
- pool_get (pm->policer_templates, pp);
+ /* free policer */
+ hash_unset_mem (pm->policer_index_by_name, policer->name);
+ vec_free (policer->name);
+ pool_put_index (pm->policers, policer_index);
+
+ return 0;
+}
+
+int
+policer_update (vlib_main_t *vm, u32 policer_index,
+ const qos_pol_cfg_params_st *cfg)
+{
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ policer_t test_policer;
+ policer_t *policer;
+ qos_pol_cfg_params_st *cp;
+ uword *p;
+ u8 *name;
+ int rv;
+ int i;
- ASSERT (cp - pm->configs == pp - pm->policer_templates);
+ if (pool_is_free_index (pm->policers, policer_index))
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
- clib_memcpy (cp, cfg, sizeof (*cp));
- clib_memcpy (pp, &test_policer, sizeof (*pp));
+ policer = &pm->policers[policer_index];
- hash_set_mem (pm->policer_config_by_name, name, cp - pm->configs);
- pool_get_aligned (pm->policers, policer, CLIB_CACHE_LINE_BYTES);
- policer[0] = pp[0];
- pi = policer - pm->policers;
- hash_set_mem (pm->policer_index_by_name, name, pi);
- *policer_index = pi;
- policer->thread_index = ~0;
+ /* Vet the configuration before adding it to the table */
+ rv = pol_logical_2_physical (cfg, &test_policer);
+ if (rv != 0)
+ return VNET_API_ERROR_INVALID_VALUE;
- for (i = 0; i < NUM_POLICE_RESULTS; i++)
- {
- vlib_validate_combined_counter (&policer_counters[i], pi);
- vlib_zero_combined_counter (&policer_counters[i], pi);
- }
+ p = hash_get_mem (pm->policer_config_by_name, policer->name);
+
+ if (PREDICT_TRUE (p != NULL))
+ {
+ cp = &pm->configs[p[0]];
}
else
{
- vec_free (name);
- return clib_error_return (0, "Config failed sanity check");
+ /* recover from a missing configuration */
+ pool_get (pm->configs, cp);
+ hash_set_mem (pm->policer_config_by_name, policer->name,
+ cp - pm->configs);
}
+ name = policer->name;
+
+ clib_memcpy (cp, cfg, sizeof (*cp));
+ clib_memcpy (policer, &test_policer, sizeof (*policer));
+
+ policer->name = name;
+ policer->thread_index = ~0;
+
+ for (i = 0; i < NUM_POLICE_RESULTS; i++)
+ vlib_zero_combined_counter (&policer_counters[i], policer_index);
+
return 0;
}
int
-policer_bind_worker (u8 *name, u32 worker, bool bind)
+policer_reset (vlib_main_t *vm, u32 policer_index)
{
vnet_policer_main_t *pm = &vnet_policer_main;
policer_t *policer;
- uword *p;
- p = hash_get_mem (pm->policer_index_by_name, name);
- if (p == 0)
- {
- return VNET_API_ERROR_NO_SUCH_ENTRY;
- }
+ if (pool_is_free_index (pm->policers, policer_index))
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ policer = &pm->policers[policer_index];
+
+ policer->current_bucket = policer->current_limit;
+ policer->extended_bucket = policer->extended_limit;
+
+ return 0;
+}
+
+int
+policer_bind_worker (u32 policer_index, u32 worker, bool bind)
+{
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ policer_t *policer;
+
+ if (pool_is_free_index (pm->policers, policer_index))
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
- policer = &pm->policers[p[0]];
+ policer = &pm->policers[policer_index];
if (bind)
{
@@ -166,21 +222,9 @@ policer_bind_worker (u8 *name, u32 worker, bool bind)
}
int
-policer_input (u8 *name, u32 sw_if_index, vlib_dir_t dir, bool apply)
+policer_input (u32 policer_index, u32 sw_if_index, vlib_dir_t dir, bool apply)
{
vnet_policer_main_t *pm = &vnet_policer_main;
- policer_t *policer;
- u32 policer_index;
- uword *p;
-
- p = hash_get_mem (pm->policer_index_by_name, name);
- if (p == 0)
- {
- return VNET_API_ERROR_NO_SUCH_ENTRY;
- }
-
- policer = &pm->policers[p[0]];
- policer_index = policer - pm->policers;
if (apply)
{
@@ -210,20 +254,21 @@ policer_input (u8 *name, u32 sw_if_index, vlib_dir_t dir, bool apply)
u8 *
format_policer_instance (u8 * s, va_list * va)
{
+ vnet_policer_main_t *pm = &vnet_policer_main;
policer_t *i = va_arg (*va, policer_t *);
- uword pi = va_arg (*va, uword);
+ u32 policer_index = i - pm->policers;
int result;
vlib_counter_t counts[NUM_POLICE_RESULTS];
for (result = 0; result < NUM_POLICE_RESULTS; result++)
{
- vlib_get_combined_counter (&policer_counters[result], pi,
+ vlib_get_combined_counter (&policer_counters[result], policer_index,
&counts[result]);
}
- s = format (s, "policer at %llx: %s rate, %s color-aware\n",
- i, i->single_rate ? "single" : "dual",
- i->color_aware ? "is" : "not");
+ s =
+ format (s, "Policer at index %d: %s rate, %s color-aware\n", policer_index,
+ i->single_rate ? "single" : "dual", i->color_aware ? "is" : "not");
s = format (s, "cir %u tok/period, pir %u tok/period, scale %u\n",
i->cir_tokens_per_period, i->pir_tokens_per_period, i->scale);
s = format (s, "cur lim %u, cur bkt %u, ext lim %u, ext bkt %u\n",
@@ -475,6 +520,7 @@ unformat_policer_classify_next_index (unformat_input_t * input, va_list * va)
return 0;
p = hash_get_mem (pm->policer_index_by_name, match_name);
+ vec_free (match_name);
if (p == 0)
return 0;
@@ -513,12 +559,16 @@ static clib_error_t *
policer_add_command_fn (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd)
{
+ vnet_policer_main_t *pm = &vnet_policer_main;
qos_pol_cfg_params_st c;
unformat_input_t _line_input, *line_input = &_line_input;
- u8 is_add = 1;
u8 *name = 0;
+ uword *p;
u32 pi;
+ u32 policer_index = ~0;
+ int rv = 0;
clib_error_t *error = NULL;
+ u8 is_update = cmd->function_arg;
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
@@ -528,9 +578,9 @@ policer_add_command_fn (vlib_main_t *vm, unformat_input_t *input,
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "del"))
- is_add = 0;
- else if (unformat (line_input, "name %s", &name))
+ if (unformat (line_input, "name %s", &name))
+ ;
+ else if (is_update && unformat (line_input, "index %u", &policer_index))
;
else if (unformat (line_input, "color-aware"))
c.color_aware = 1;
@@ -546,10 +596,41 @@ policer_add_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
}
- error = policer_add_del (vm, name, &c, &pi, is_add);
+ if (is_update)
+ {
+ if (~0 == policer_index && 0 != name)
+ {
+ p = hash_get_mem (pm->policer_index_by_name, name);
+ if (p != NULL)
+ policer_index = p[0];
+ }
+
+ if (~0 != policer_index)
+ {
+ rv = policer_update (vm, policer_index, &c);
+ }
+ }
+ else
+ {
+ rv = policer_add (vm, name, &c, &pi);
+ }
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error = clib_error_return (0, "No such policer");
+ break;
+ case VNET_API_ERROR_VALUE_EXIST:
+ error = clib_error_return (0, "Policer already exists");
+ break;
+ case VNET_API_ERROR_INVALID_VALUE:
+ error = clib_error_return (0, "Config failed sanity check");
+ break;
+ }
done:
unformat_free (line_input);
+ vec_free (name);
return error;
}
@@ -560,6 +641,10 @@ policer_del_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
unformat_input_t _line_input, *line_input = &_line_input;
clib_error_t *error = NULL;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ int rv;
+ u32 policer_index = ~0;
+ uword *p;
u8 *name = 0;
/* Get a line of input. */
@@ -570,6 +655,8 @@ policer_del_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
if (unformat (line_input, "name %s", &name))
;
+ else if (unformat (line_input, "index %u", &policer_index))
+ ;
else
{
error = clib_error_return (0, "unknown input `%U'",
@@ -578,10 +665,30 @@ policer_del_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
}
- error = policer_add_del (vm, name, NULL, NULL, 0);
+ if (~0 == policer_index && 0 != name)
+ {
+ p = hash_get_mem (pm->policer_index_by_name, name);
+ if (p != NULL)
+ policer_index = p[0];
+ }
+
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (~0 != policer_index)
+ rv = policer_del (vm, policer_index);
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_INVALID_VALUE:
+ error = clib_error_return (0, "No such policer configuration");
+ break;
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error = clib_error_return (0, "No such policer");
+ break;
+ }
done:
unformat_free (line_input);
+ vec_free (name);
return error;
}
@@ -592,13 +699,14 @@ policer_bind_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
unformat_input_t _line_input, *line_input = &_line_input;
clib_error_t *error = NULL;
- u8 bind, *name = 0;
- u32 worker;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ u8 bind = 1;
+ u8 *name = 0;
+ u32 worker = ~0;
+ u32 policer_index = ~0;
+ uword *p;
int rv;
- bind = 1;
- worker = ~0;
-
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
@@ -607,6 +715,8 @@ policer_bind_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
if (unformat (line_input, "name %s", &name))
;
+ else if (unformat (line_input, "index %u", &policer_index))
+ ;
else if (unformat (line_input, "unbind"))
bind = 0;
else if (unformat (line_input, "%d", &worker))
@@ -626,7 +736,16 @@ policer_bind_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
else
{
- rv = policer_bind_worker (name, worker, bind);
+ if (~0 == policer_index && 0 != name)
+ {
+ p = hash_get_mem (pm->policer_index_by_name, name);
+ if (p != NULL)
+ policer_index = p[0];
+ }
+
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (~0 != policer_index)
+ rv = policer_bind_worker (policer_index, worker, bind);
if (rv)
error = clib_error_return (0, "failed: `%d'", rv);
@@ -634,6 +753,7 @@ policer_bind_command_fn (vlib_main_t *vm, unformat_input_t *input,
done:
unformat_free (line_input);
+ vec_free (name);
return error;
}
@@ -644,14 +764,15 @@ policer_input_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
unformat_input_t _line_input, *line_input = &_line_input;
clib_error_t *error = NULL;
- u8 apply, *name = 0;
- u32 sw_if_index;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ u8 apply = 1;
+ u8 *name = 0;
+ u32 sw_if_index = ~0;
+ u32 policer_index = ~0;
+ uword *p;
int rv;
vlib_dir_t dir = cmd->function_arg;
- apply = 1;
- sw_if_index = ~0;
-
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
@@ -660,6 +781,8 @@ policer_input_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
if (unformat (line_input, "name %s", &name))
;
+ else if (unformat (line_input, "index %u", &policer_index))
+ ;
else if (unformat (line_input, "unapply"))
apply = 0;
else if (unformat (line_input, "%U", unformat_vnet_sw_interface,
@@ -680,7 +803,16 @@ policer_input_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
else
{
- rv = policer_input (name, sw_if_index, dir, apply);
+ if (~0 == policer_index && 0 != name)
+ {
+ p = hash_get_mem (pm->policer_index_by_name, name);
+ if (p != NULL)
+ policer_index = p[0];
+ }
+
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (~0 != policer_index)
+ rv = policer_input (policer_index, sw_if_index, dir, apply);
if (rv)
error = clib_error_return (0, "failed: `%d'", rv);
@@ -688,101 +820,199 @@ policer_input_command_fn (vlib_main_t *vm, unformat_input_t *input,
done:
unformat_free (line_input);
+ vec_free (name);
+
+ return error;
+}
+
+static clib_error_t *
+policer_reset_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = NULL;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ int rv;
+ u32 policer_index = ~0;
+ uword *p;
+ u8 *name = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "name %s", &name))
+ ;
+ else if (unformat (line_input, "index %u", &policer_index))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (~0 == policer_index && 0 != name)
+ {
+ p = hash_get_mem (pm->policer_index_by_name, name);
+ if (p != NULL)
+ policer_index = p[0];
+ }
+
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (~0 != policer_index)
+ rv = policer_reset (vm, policer_index);
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error = clib_error_return (0, "No such policer");
+ break;
+ }
+
+done:
+ unformat_free (line_input);
+ vec_free (name);
return error;
}
VLIB_CLI_COMMAND (configure_policer_command, static) = {
.path = "configure policer",
- .short_help = "configure policer name <name> <params> ",
+ .short_help = "configure policer [name <name> | index <index>] [type 1r2c | "
+ "1r3c | 2r3c-2698 "
+ "| 2r3c-4115] [color-aware] [cir <cir>] [cb <cb>] [eir <eir>] "
+ "[eb <eb>] [rate kbps | pps] [round closest | up | down] "
+ "[conform-action drop | transmit | mark-and-transmit <dscp>] "
+ "[exceed-action drop | transmit | mark-and-transmit <dscp>] "
+ "[violate-action drop | transmit | mark-and-transmit <dscp>]",
.function = policer_add_command_fn,
+ .function_arg = 1
};
VLIB_CLI_COMMAND (policer_add_command, static) = {
.path = "policer add",
- .short_help = "policer name <name> <params> ",
+ .short_help = "policer add name <name> [type 1r2c | 1r3c | 2r3c-2698 | "
+ "2r3c-4115] [color-aware] [cir <cir>] [cb <cb>] [eir <eir>] "
+ "[eb <eb>] [rate kbps | pps] [round closest | up | down] "
+ "[conform-action drop | transmit | mark-and-transmit <dscp>] "
+ "[exceed-action drop | transmit | mark-and-transmit <dscp>] "
+ "[violate-action drop | transmit | mark-and-transmit <dscp>]",
.function = policer_add_command_fn,
+ .function_arg = 0
};
VLIB_CLI_COMMAND (policer_del_command, static) = {
.path = "policer del",
- .short_help = "policer del name <name> ",
+ .short_help = "policer del [name <name> | index <index>]",
.function = policer_del_command_fn,
};
VLIB_CLI_COMMAND (policer_bind_command, static) = {
.path = "policer bind",
- .short_help = "policer bind [unbind] name <name> <worker>",
+ .short_help = "policer bind [unbind] [name <name> | index <index>] <worker>",
.function = policer_bind_command_fn,
};
VLIB_CLI_COMMAND (policer_input_command, static) = {
.path = "policer input",
- .short_help = "policer input [unapply] name <name> <interfac>",
+ .short_help =
+ "policer input [unapply] [name <name> | index <index>] <interface>",
.function = policer_input_command_fn,
.function_arg = VLIB_RX,
};
VLIB_CLI_COMMAND (policer_output_command, static) = {
.path = "policer output",
- .short_help = "policer output [unapply] name <name> <interfac>",
+ .short_help =
+ "policer output [unapply] [name <name> | index <index>] <interface>",
.function = policer_input_command_fn,
.function_arg = VLIB_TX,
};
+VLIB_CLI_COMMAND (policer_reset_command, static) = {
+ .path = "policer reset",
+ .short_help = "policer reset [name <name> | index <index>]",
+ .function = policer_reset_command_fn
+};
+
static clib_error_t *
show_policer_command_fn (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
vnet_policer_main_t *pm = &vnet_policer_main;
- hash_pair_t *p;
- u32 pool_index;
- u8 *match_name = 0;
- u8 *name;
- uword *pi;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ policer_t *policer;
+ u32 policer_index = ~0;
+ u8 *name = 0;
+ uword *ci, *pi;
qos_pol_cfg_params_st *config;
- policer_t *templ;
-
- (void) unformat (input, "name %s", &match_name);
-
- /* *INDENT-OFF* */
- hash_foreach_pair (p, pm->policer_config_by_name,
- ({
- name = (u8 *) p->key;
- if (match_name == 0 || !strcmp((char *) name, (char *) match_name))
- {
- pi = hash_get_mem (pm->policer_index_by_name, name);
-
- pool_index = p->value[0];
- config = pool_elt_at_index (pm->configs, pool_index);
- templ = pool_elt_at_index (pm->policer_templates, pool_index);
- vlib_cli_output (vm, "Name \"%s\" %U ", name, format_policer_config,
- config);
- if (pi)
- {
- vlib_cli_output (vm, "Template %U", format_policer_instance, templ,
- pi[0]);
- }
- else
- {
- vlib_cli_output (
- vm, "Cannot print template - policer index hash lookup failed");
- }
- vlib_cli_output (vm, "-----------");
- }
- }));
- /* *INDENT-ON* */
- return 0;
+ clib_error_t *error = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ {
+ pool_foreach (policer, pm->policers)
+ {
+ ci = hash_get_mem (pm->policer_config_by_name, policer->name);
+ config = pool_elt_at_index (pm->configs, ci[0]);
+
+ vlib_cli_output (vm, "Name \"%s\" %U ", policer->name,
+ format_policer_config, config);
+ vlib_cli_output (vm, "%U", format_policer_instance, policer);
+ vlib_cli_output (vm, "-----------");
+ }
+ return 0;
+ }
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "name %s", &name))
+ ;
+ else if (unformat (line_input, "index %u", &policer_index))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (~0 == policer_index && 0 != name)
+ {
+ pi = hash_get_mem (pm->policer_index_by_name, name);
+ if (pi != NULL)
+ policer_index = pi[0];
+ }
+
+ if (~0 == policer_index || pool_is_free_index (pm->policers, policer_index))
+ goto done;
+
+ policer = &pm->policers[policer_index];
+ ci = hash_get_mem (pm->policer_config_by_name, policer->name);
+ config = pool_elt_at_index (pm->configs, ci[0]);
+ vlib_cli_output (vm, "Name \"%s\" %U ", policer->name, format_policer_config,
+ config);
+ vlib_cli_output (vm, "%U", format_policer_instance, policer);
+ vlib_cli_output (vm, "-----------");
+
+done:
+ unformat_free (line_input);
+ vec_free (name);
+
+ return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_policer_command, static) = {
- .path = "show policer",
- .short_help = "show policer [name]",
- .function = show_policer_command_fn,
+ .path = "show policer",
+ .short_help = "show policer [name <name> | index <index>]",
+ .function = show_policer_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_policer_pools_command_fn (vlib_main_t * vm,
@@ -791,19 +1021,15 @@ show_policer_pools_command_fn (vlib_main_t * vm,
{
vnet_policer_main_t *pm = &vnet_policer_main;
- vlib_cli_output (vm, "pool sizes: configs=%d templates=%d policers=%d",
- pool_elts (pm->configs),
- pool_elts (pm->policer_templates),
- pool_elts (pm->policers));
+ vlib_cli_output (vm, "pool sizes: configs=%d policers=%d",
+ pool_elts (pm->configs), pool_elts (pm->policers));
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_policer_pools_command, static) = {
.path = "show policer pools",
.short_help = "show policer pools",
.function = show_policer_pools_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
policer_init (vlib_main_t * vm)
diff --git a/src/vnet/policer/policer.h b/src/vnet/policer/policer.h
index f5b6c0d3b31..7ce7fc79d47 100644
--- a/src/vnet/policer/policer.h
+++ b/src/vnet/policer/policer.h
@@ -32,7 +32,7 @@ typedef struct
qos_pol_cfg_params_st *configs;
policer_t *policer_templates;
- /* Config by name hash */
+ /* Config by policer name hash */
uword *policer_config_by_name;
/* Policer by name hash */
@@ -68,11 +68,16 @@ typedef enum
} vnet_policer_next_t;
u8 *format_policer_instance (u8 * s, va_list * va);
-clib_error_t *policer_add_del (vlib_main_t *vm, u8 *name,
- qos_pol_cfg_params_st *cfg, u32 *policer_index,
- u8 is_add);
-int policer_bind_worker (u8 *name, u32 worker, bool bind);
-int policer_input (u8 *name, u32 sw_if_index, vlib_dir_t dir, bool apply);
+int policer_add (vlib_main_t *vm, const u8 *name,
+ const qos_pol_cfg_params_st *cfg, u32 *policer_index);
+
+int policer_update (vlib_main_t *vm, u32 policer_index,
+ const qos_pol_cfg_params_st *cfg);
+int policer_del (vlib_main_t *vm, u32 policer_index);
+int policer_reset (vlib_main_t *vm, u32 policer_index);
+int policer_bind_worker (u32 policer_index, u32 worker, bool bind);
+int policer_input (u32 policer_index, u32 sw_if_index, vlib_dir_t dir,
+ bool apply);
#endif /* __included_policer_h__ */
diff --git a/src/vnet/policer/policer.rst b/src/vnet/policer/policer.rst
new file mode 100644
index 00000000000..0e7369e373b
--- /dev/null
+++ b/src/vnet/policer/policer.rst
@@ -0,0 +1,217 @@
+.. _policer:
+
+Policing
+========
+
+VPP implements several policer types, that don't always conform
+to the related RFCs [#rfc2697]_ [#rfc2698]_ [#rfc4115]_.
+Only policers implemented in VPP will be presented, along with
+the differences they have compared to RFCs.
+
+.. contents:: :local:
+ :depth: 1
+
+
+1 rate 2 color (1r2c)
+---------------------
+
+This is the most straightforward policer. There is no RFC describing it,
+however we can found its description in many documentation [#juniper]_ [#cisco]_ .
+
+A 1r2c policer is great to classify incoming packets into two categories:
+conforming packets (said green), and violating ones (said red).
+
+Parameters
+~~~~~~~~~~
+
+To set-up such a policer, only two parameters are needed:
+
+Committed Information Rate (CIR)
+ Given in bytes per second, this parameter is the average
+ throughput allowed by the policer.
+
+ It sets the limit between conforming arriving packets (those making the
+ traffic fall below the CIR), and violating arriving packets
+ (those making the traffic exceed the CIR).
+
+Committed Burst Size (CBS)
+ It represents the size (in bytes) of a token bucket used to allow
+ some burstiness from the incoming traffic.
+
+.. figure:: /_images/policer-1r2c-bucket.png
+ :align: center
+ :scale: 25%
+
+ Figure 1: 1r2c bucket filling logic
+
+The committed token bucket (C) is filling up at CIR tokens (bytes)
+per second, up to CBS tokens. All overflowing tokens are lost.
+
+Color-Blind algorithm
+~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: /_images/policer-1r2c-blind.png
+ :align: center
+ :scale: 75%
+
+|
+
+Color-Aware algorithm
+~~~~~~~~~~~~~~~~~~~~~
+
+In online documentation, there is no trace of a color-aware 1r2c policer.
+However, VPP implementation allows such a thing.
+
+.. image:: /_images/policer-1r2c-aware.png
+ :align: center
+ :scale: 75%
+
+|
+
+
+1 rate 3 color (1r3c) RFC 2697 [#rfc2697]_
+------------------------------------------
+
+As for the `1 rate 2 color (1r2c)`_ policer, only one rate parameters is required
+to setup a 1r3c policer. However, such a policer adds another kind of packet category:
+exceeding ones (said yellow).
+
+Parameters
+~~~~~~~~~~
+
+To set-up such a policer, three parameters are needed:
+
+Committed Information Rate (CIR)
+ As in the `1 rate 2 color (1r2c)`_ policer.
+
+Committed Burst Size (CBS)
+ As in the `1 rate 2 color (1r2c)`_ policer.
+
+Excess Burst Size (EBS)
+ It represents the size (in bytes) of a second token bucket used
+ to allow an additional burstiness from the incoming traffic, when
+ traffic as been below the CIR for some time.
+
+.. figure:: /_images/policer-1r3c-bucket.png
+ :align: center
+ :scale: 25%
+
+ Figure 2: 1r3c buckets filling logic
+
+The committed token bucket (C) is filling up at CIR tokens (bytes)
+per second, up to CBS tokens. When C is full, tokens are overflowing
+into the excess token bucket (E), up to EBS tokens. Only overflowing
+tokens from E are lost.
+
+Color-Blind algorithm
+~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: /_images/policer-1r3c-blind.png
+ :align: center
+ :scale: 75%
+
+|
+
+Color-Aware algorithm
+~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: /_images/policer-1r3c-aware.png
+ :align: center
+ :scale: 75%
+
+|
+
+Notes
+~~~~~
+
+In the RFC 2697 [#rfc2697]_ describing the 1r3c policer, conforming (green) packets
+only consume tokens from the token bucket C. Whereas, in VPP, they also consume tokens from E.
+
+One way to stick to the RFC is then to set the EBS parameter to be superior to CBS, so that
+EBS - CBS corresponds to the EBS from the RFC.
+
+However, VPP does not enforce setting EBS > CBS, which could result in undesired behavior.
+
+2 rate 3 color (2r3c) RFC 2698 [#rfc2698]_
+------------------------------------------
+
+Instead of setting the limit between yellow and red packets in terms of bursts,
+as it is done by `1 rate 3 color (1r3c) RFC 2697`_ policers, two rate policers introduce
+another rate parameter to discriminate between those two kinds of packets.
+
+Parameters
+~~~~~~~~~~
+
+To set-up such a policer, four parameters are needed:
+
+Committed Information Rate (CIR)
+ As in the `1 rate 2 color (1r2c)`_ policer.
+
+Committed Burst Size (CBS)
+ As in the `1 rate 2 color (1r2c)`_ policer.
+
+Peak Information Rate (PIR)
+ Given in bytes per second, this parameter is the average
+ throughput allowed by the policer when there is a peak in
+ traffic.
+
+ It sets a second limit between exceeding arriving packets
+ (those making the traffic fall below the PIR, but above CIR),
+ and violating arriving packets (those making the traffic exceed the PIR).
+
+Peak Burst Size (PBS)
+ It represents the size (in bytes) of a second token bucket used
+ to allow an additional peak traffic.
+
+.. figure:: /_images/policer-2r3c-bucket.png
+ :align: center
+ :scale: 25%
+
+ Figure 2: 2r3c-rfc2698 buckets filling logic
+
+The committed token bucket (C) is filling up at CIR tokens (bytes)
+per second, up to CBS tokens. In the meantime, the peak token bucket (P)
+is filling up at PIR tokens per second, up to PBS. All overflowing tokens
+from C and P are lost.
+
+Color-Blind algorithm
+~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: /_images/policer-2r3c-blind.png
+ :align: center
+ :scale: 75%
+
+|
+
+Color-Aware algorithm
+~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: /_images/policer-2r3c-aware.png
+ :align: center
+ :scale: 50%
+
+|
+
+Notes
+~~~~~
+
+To have a working policer, the condition PIR >= CIR needs to hold.
+Indeed, since we assume that peak traffic should have a greater
+rate than committed ones.
+
+
+2 rate 3 color (2r3c) RFC 4115 [#rfc4115]_
+------------------------------------------
+
+The 2r3c-RFC4115 is an allowed choice by VPP. However, there is currently
+no implementation of such a policer. Hence, the only two rate policer VPP
+implements is the `2 rate 3 color (2r3c) RFC 2698`_ policer.
+
+
+.. rubric:: References:
+
+.. [#juniper] https://www.juniper.net/documentation/us/en/software/junos/traffic-mgmt-nfx/routing-policy/topics/concept/tcm-overview-cos-qfx-series-understanding.html
+.. [#cisco] https://www.cisco.com/c/en/us/td/docs/ios-xml/ios/qos_mqc/configuration/xe-16-8/qos-mqc-xe-16-8-book/qos-pkt-policing.html
+.. [#rfc2697] https://www.rfc-editor.org/rfc/rfc2697.html
+.. [#rfc2698] https://www.rfc-editor.org/rfc/rfc2698.html
+.. [#rfc4115] https://www.rfc-editor.org/rfc/rfc4115.html
diff --git a/src/vnet/policer/policer_api.c b/src/vnet/policer/policer_api.c
index 4f9baa09feb..df35b472a89 100644
--- a/src/vnet/policer/policer_api.c
+++ b/src/vnet/policer/policer_api.c
@@ -35,126 +35,293 @@ static void
vl_api_policer_add_del_t_handler (vl_api_policer_add_del_t * mp)
{
vlib_main_t *vm = vlib_get_main ();
+ vnet_policer_main_t *pm = &vnet_policer_main;
vl_api_policer_add_del_reply_t *rmp;
int rv = 0;
- u8 *name = NULL;
+ uword *p;
+ char name[sizeof (mp->name) + 1];
qos_pol_cfg_params_st cfg;
- clib_error_t *error;
u32 policer_index;
- name = format (0, "%s", mp->name);
- vec_terminate_c_string (name);
-
- clib_memset (&cfg, 0, sizeof (cfg));
- cfg.rfc = (qos_policer_type_en) mp->type;
- cfg.rnd_type = (qos_round_type_en) mp->round_type;
- cfg.rate_type = (qos_rate_type_en) mp->rate_type;
- cfg.rb.kbps.cir_kbps = ntohl (mp->cir);
- cfg.rb.kbps.eir_kbps = ntohl (mp->eir);
- cfg.rb.kbps.cb_bytes = clib_net_to_host_u64 (mp->cb);
- cfg.rb.kbps.eb_bytes = clib_net_to_host_u64 (mp->eb);
- cfg.conform_action.action_type =
- (qos_action_type_en) mp->conform_action.type;
- cfg.conform_action.dscp = mp->conform_action.dscp;
- cfg.exceed_action.action_type = (qos_action_type_en) mp->exceed_action.type;
- cfg.exceed_action.dscp = mp->exceed_action.dscp;
- cfg.violate_action.action_type =
- (qos_action_type_en) mp->violate_action.type;
- cfg.violate_action.dscp = mp->violate_action.dscp;
-
- cfg.color_aware = mp->color_aware;
-
- error = policer_add_del (vm, name, &cfg, &policer_index, mp->is_add);
-
- if (error)
+ snprintf (name, sizeof (name), "%s", mp->name);
+
+ if (mp->is_add)
{
- rv = VNET_API_ERROR_UNSPECIFIED;
- clib_error_free (error);
+ clib_memset (&cfg, 0, sizeof (cfg));
+ cfg.rfc = (qos_policer_type_en) mp->type;
+ cfg.rnd_type = (qos_round_type_en) mp->round_type;
+ cfg.rate_type = (qos_rate_type_en) mp->rate_type;
+ cfg.rb.kbps.cir_kbps = ntohl (mp->cir);
+ cfg.rb.kbps.eir_kbps = ntohl (mp->eir);
+ cfg.rb.kbps.cb_bytes = clib_net_to_host_u64 (mp->cb);
+ cfg.rb.kbps.eb_bytes = clib_net_to_host_u64 (mp->eb);
+ cfg.conform_action.action_type =
+ (qos_action_type_en) mp->conform_action.type;
+ cfg.conform_action.dscp = mp->conform_action.dscp;
+ cfg.exceed_action.action_type =
+ (qos_action_type_en) mp->exceed_action.type;
+ cfg.exceed_action.dscp = mp->exceed_action.dscp;
+ cfg.violate_action.action_type =
+ (qos_action_type_en) mp->violate_action.type;
+ cfg.violate_action.dscp = mp->violate_action.dscp;
+ cfg.color_aware = mp->color_aware;
+
+ rv = policer_add (vm, (u8 *) name, &cfg, &policer_index);
}
+ else
+ {
+ p = hash_get_mem (pm->policer_index_by_name, name);
+
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (p != NULL)
+ rv = policer_del (vm, p[0]);
+ }
+
+ REPLY_MACRO2 (VL_API_POLICER_ADD_DEL_REPLY, ({
+ if (rv == 0 && mp->is_add)
+ rmp->policer_index = htonl (policer_index);
+ else
+ rmp->policer_index = ~0;
+ }));
+}
+
+static_always_inline void
+policer_set_configuration (qos_pol_cfg_params_st *cfg,
+ vl_api_policer_config_t *infos)
+{
+ clib_memset (cfg, 0, sizeof (*cfg));
+ cfg->rfc = (qos_policer_type_en) infos->type;
+ cfg->rnd_type = (qos_round_type_en) infos->round_type;
+ cfg->rate_type = (qos_rate_type_en) infos->rate_type;
+ cfg->rb.kbps.cir_kbps = ntohl (infos->cir);
+ cfg->rb.kbps.eir_kbps = ntohl (infos->eir);
+ cfg->rb.kbps.cb_bytes = clib_net_to_host_u64 (infos->cb);
+ cfg->rb.kbps.eb_bytes = clib_net_to_host_u64 (infos->eb);
+ cfg->conform_action.action_type =
+ (qos_action_type_en) infos->conform_action.type;
+ cfg->conform_action.dscp = infos->conform_action.dscp;
+ cfg->exceed_action.action_type =
+ (qos_action_type_en) infos->exceed_action.type;
+ cfg->exceed_action.dscp = infos->exceed_action.dscp;
+ cfg->violate_action.action_type =
+ (qos_action_type_en) infos->violate_action.type;
+ cfg->violate_action.dscp = infos->violate_action.dscp;
+ cfg->color_aware = infos->color_aware;
+}
+
+static void
+vl_api_policer_add_t_handler (vl_api_policer_add_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_policer_add_reply_t *rmp;
+ int rv = 0;
+ char name[sizeof (mp->name) + 1];
+ qos_pol_cfg_params_st cfg;
+ u32 policer_index;
+
+ snprintf (name, sizeof (name), "%s", mp->name);
+
+ policer_set_configuration (&cfg, &mp->infos);
+
+ rv = policer_add (vm, (u8 *) name, &cfg, &policer_index);
- /* *INDENT-OFF* */
- REPLY_MACRO2(VL_API_POLICER_ADD_DEL_REPLY,
- ({
- if (rv == 0 && mp->is_add)
- rmp->policer_index = ntohl(policer_index);
- else
- rmp->policer_index = ~0;
- }));
- /* *INDENT-ON* */
+ REPLY_MACRO2 (VL_API_POLICER_ADD_REPLY, ({
+ if (rv == 0)
+ rmp->policer_index = htonl (policer_index);
+ else
+ rmp->policer_index = ~0;
+ }));
+}
+
+static void
+vl_api_policer_del_t_handler (vl_api_policer_del_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_policer_del_reply_t *rmp;
+ u32 policer_index;
+ int rv = 0;
+
+ policer_index = ntohl (mp->policer_index);
+ rv = policer_del (vm, policer_index);
+
+ REPLY_MACRO (VL_API_POLICER_DEL_REPLY);
+}
+
+static void
+vl_api_policer_update_t_handler (vl_api_policer_update_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_policer_update_reply_t *rmp;
+ int rv = 0;
+ qos_pol_cfg_params_st cfg;
+ u32 policer_index;
+
+ policer_set_configuration (&cfg, &mp->infos);
+
+ policer_index = ntohl (mp->policer_index);
+ rv = policer_update (vm, policer_index, &cfg);
+
+ REPLY_MACRO (VL_API_POLICER_UPDATE_REPLY);
+}
+
+static void
+vl_api_policer_reset_t_handler (vl_api_policer_reset_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_policer_reset_reply_t *rmp;
+ u32 policer_index;
+ int rv = 0;
+
+ policer_index = ntohl (mp->policer_index);
+ rv = policer_reset (vm, policer_index);
+
+ REPLY_MACRO (VL_API_POLICER_RESET_REPLY);
}
static void
vl_api_policer_bind_t_handler (vl_api_policer_bind_t *mp)
{
vl_api_policer_bind_reply_t *rmp;
- u8 *name;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ char name[sizeof (mp->name) + 1];
+ uword *p;
u32 worker_index;
u8 bind_enable;
int rv;
- name = format (0, "%s", mp->name);
- vec_terminate_c_string (name);
+ snprintf (name, sizeof (name), "%s", mp->name);
worker_index = ntohl (mp->worker_index);
bind_enable = mp->bind_enable;
- rv = policer_bind_worker (name, worker_index, bind_enable);
- vec_free (name);
+ p = hash_get_mem (pm->policer_index_by_name, name);
+
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (p != NULL)
+ rv = policer_bind_worker (p[0], worker_index, bind_enable);
+
REPLY_MACRO (VL_API_POLICER_BIND_REPLY);
}
static void
+vl_api_policer_bind_v2_t_handler (vl_api_policer_bind_v2_t *mp)
+{
+ vl_api_policer_bind_v2_reply_t *rmp;
+ u32 policer_index;
+ u32 worker_index;
+ u8 bind_enable;
+ int rv;
+
+ policer_index = ntohl (mp->policer_index);
+ worker_index = ntohl (mp->worker_index);
+ bind_enable = mp->bind_enable;
+
+ rv = policer_bind_worker (policer_index, worker_index, bind_enable);
+
+ REPLY_MACRO (VL_API_POLICER_BIND_V2_REPLY);
+}
+
+static void
vl_api_policer_input_t_handler (vl_api_policer_input_t *mp)
{
- vl_api_policer_bind_reply_t *rmp;
- u8 *name;
+ vl_api_policer_input_reply_t *rmp;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ char name[sizeof (mp->name) + 1];
+ uword *p;
u32 sw_if_index;
u8 apply;
int rv;
VALIDATE_SW_IF_INDEX (mp);
- name = format (0, "%s", mp->name);
- vec_terminate_c_string (name);
+ snprintf (name, sizeof (name), "%s", mp->name);
sw_if_index = ntohl (mp->sw_if_index);
apply = mp->apply;
- rv = policer_input (name, sw_if_index, VLIB_RX, apply);
- vec_free (name);
+ p = hash_get_mem (pm->policer_index_by_name, name);
+
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (p != NULL)
+ rv = policer_input (p[0], sw_if_index, VLIB_RX, apply);
BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_POLICER_INPUT_REPLY);
}
static void
-vl_api_policer_output_t_handler (vl_api_policer_input_t *mp)
+vl_api_policer_input_v2_t_handler (vl_api_policer_input_v2_t *mp)
{
- vl_api_policer_bind_reply_t *rmp;
- u8 *name;
+ vl_api_policer_input_v2_reply_t *rmp;
+ u32 policer_index;
+ u32 sw_if_index;
+ u8 apply;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ policer_index = ntohl (mp->policer_index);
+ sw_if_index = ntohl (mp->sw_if_index);
+ apply = mp->apply;
+
+ rv = policer_input (policer_index, sw_if_index, VLIB_RX, apply);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_POLICER_INPUT_REPLY);
+}
+
+static void
+vl_api_policer_output_t_handler (vl_api_policer_output_t *mp)
+{
+ vl_api_policer_output_reply_t *rmp;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ char name[sizeof (mp->name) + 1];
+ uword *p;
u32 sw_if_index;
u8 apply;
int rv;
VALIDATE_SW_IF_INDEX (mp);
- name = format (0, "%s", mp->name);
- vec_terminate_c_string (name);
+ snprintf (name, sizeof (name), "%s", mp->name);
sw_if_index = ntohl (mp->sw_if_index);
apply = mp->apply;
- rv = policer_input (name, sw_if_index, VLIB_TX, apply);
- vec_free (name);
+ p = hash_get_mem (pm->policer_index_by_name, name);
+
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (p != NULL)
+ rv = policer_input (p[0], sw_if_index, VLIB_TX, apply);
BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_POLICER_OUTPUT_REPLY);
}
static void
-send_policer_details (u8 *name, qos_pol_cfg_params_st *config,
- policer_t *templ, vl_api_registration_t *reg,
- u32 context)
+vl_api_policer_output_v2_t_handler (vl_api_policer_output_v2_t *mp)
+{
+ vl_api_policer_output_reply_t *rmp;
+ u32 policer_index;
+ u32 sw_if_index;
+ u8 apply;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ policer_index = ntohl (mp->policer_index);
+ sw_if_index = ntohl (mp->sw_if_index);
+ apply = mp->apply;
+
+ rv = policer_input (policer_index, sw_if_index, VLIB_TX, apply);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_POLICER_OUTPUT_REPLY);
+}
+
+static void
+send_policer_details (qos_pol_cfg_params_st *config, policer_t *policer,
+ vl_api_registration_t *reg, u32 context)
{
vl_api_policer_details_t *mp;
@@ -170,26 +337,27 @@ send_policer_details (u8 *name, qos_pol_cfg_params_st *config,
mp->round_type = (vl_api_sse2_qos_round_type_t) config->rnd_type;
mp->type = (vl_api_sse2_qos_policer_type_t) config->rfc;
mp->conform_action.type =
- (vl_api_sse2_qos_action_type_t) config->conform_action.action_type;
- mp->conform_action.dscp = config->conform_action.dscp;
+ (vl_api_sse2_qos_action_type_t) policer->action[POLICE_CONFORM];
+ mp->conform_action.dscp = policer->mark_dscp[POLICE_CONFORM];
mp->exceed_action.type =
- (vl_api_sse2_qos_action_type_t) config->exceed_action.action_type;
- mp->exceed_action.dscp = config->exceed_action.dscp;
+ (vl_api_sse2_qos_action_type_t) policer->action[POLICE_EXCEED];
+ mp->exceed_action.dscp = policer->mark_dscp[POLICE_EXCEED];
mp->violate_action.type =
- (vl_api_sse2_qos_action_type_t) config->violate_action.action_type;
- mp->violate_action.dscp = config->violate_action.dscp;
- mp->single_rate = templ->single_rate ? 1 : 0;
- mp->color_aware = templ->color_aware ? 1 : 0;
- mp->scale = htonl (templ->scale);
- mp->cir_tokens_per_period = htonl (templ->cir_tokens_per_period);
- mp->pir_tokens_per_period = htonl (templ->pir_tokens_per_period);
- mp->current_limit = htonl (templ->current_limit);
- mp->current_bucket = htonl (templ->current_bucket);
- mp->extended_limit = htonl (templ->extended_limit);
- mp->extended_bucket = htonl (templ->extended_bucket);
- mp->last_update_time = clib_host_to_net_u64 (templ->last_update_time);
-
- strncpy ((char *) mp->name, (char *) name, ARRAY_LEN (mp->name) - 1);
+ (vl_api_sse2_qos_action_type_t) policer->action[POLICE_VIOLATE];
+ mp->violate_action.dscp = policer->mark_dscp[POLICE_VIOLATE];
+ mp->single_rate = policer->single_rate ? 1 : 0;
+ mp->color_aware = policer->color_aware ? 1 : 0;
+ mp->scale = htonl (policer->scale);
+ mp->cir_tokens_per_period = htonl (policer->cir_tokens_per_period);
+ mp->pir_tokens_per_period = htonl (policer->pir_tokens_per_period);
+ mp->current_limit = htonl (policer->current_limit);
+ mp->current_bucket = htonl (policer->current_bucket);
+ mp->extended_limit = htonl (policer->extended_limit);
+ mp->extended_bucket = htonl (policer->extended_bucket);
+ mp->last_update_time = clib_host_to_net_u64 (policer->last_update_time);
+
+ strncpy ((char *) mp->name, (char *) policer->name,
+ ARRAY_LEN (mp->name) - 1);
vl_api_send_msg (reg, (u8 *) mp);
}
@@ -199,13 +367,11 @@ vl_api_policer_dump_t_handler (vl_api_policer_dump_t * mp)
{
vl_api_registration_t *reg;
vnet_policer_main_t *pm = &vnet_policer_main;
- hash_pair_t *hp;
- uword *p;
- u32 pool_index;
+ uword *p, *pi;
+ u32 pool_index, policer_index;
u8 *match_name = 0;
- u8 *name;
qos_pol_cfg_params_st *config;
- policer_t *templ;
+ policer_t *policer;
reg = vl_api_client_index_to_registration (mp->client_index);
if (!reg)
@@ -220,26 +386,67 @@ vl_api_policer_dump_t_handler (vl_api_policer_dump_t * mp)
if (mp->match_name_valid)
{
p = hash_get_mem (pm->policer_config_by_name, match_name);
- if (p)
+ pi = hash_get_mem (pm->policer_index_by_name, match_name);
+ if (0 == p || 0 == pi)
+ return;
+
+ pool_index = p[0];
+ policer_index = pi[0];
+ config = pool_elt_at_index (pm->configs, pool_index);
+ policer = pool_elt_at_index (pm->policers, policer_index);
+ send_policer_details (config, policer, reg, mp->context);
+ }
+ else
+ {
+ pool_foreach (policer, pm->policers)
+ {
+ p = hash_get_mem (pm->policer_config_by_name, policer->name);
+ if (0 == p)
+ continue;
+
+ pool_index = p[0];
+ config = pool_elt_at_index (pm->configs, pool_index);
+ send_policer_details (config, policer, reg, mp->context);
+ };
+ }
+}
+
+static void
+vl_api_policer_dump_v2_t_handler (vl_api_policer_dump_v2_t *mp)
+{
+ vl_api_registration_t *reg;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ qos_pol_cfg_params_st *config;
+ u32 policer_index, pool_index;
+ policer_t *policer;
+ uword *p;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ policer_index = ntohl (mp->policer_index);
+
+ if (~0 == policer_index)
+ {
+ pool_foreach (policer, pm->policers)
{
+ p = hash_get_mem (pm->policer_config_by_name, policer->name);
pool_index = p[0];
config = pool_elt_at_index (pm->configs, pool_index);
- templ = pool_elt_at_index (pm->policer_templates, pool_index);
- send_policer_details (match_name, config, templ, reg, mp->context);
- }
+ send_policer_details (config, policer, reg, mp->context);
+ };
}
else
{
- /* *INDENT-OFF* */
- hash_foreach_pair (hp, pm->policer_config_by_name,
- ({
- name = (u8 *) hp->key;
- pool_index = hp->value[0];
- config = pool_elt_at_index (pm->configs, pool_index);
- templ = pool_elt_at_index (pm->policer_templates, pool_index);
- send_policer_details(name, config, templ, reg, mp->context);
- }));
- /* *INDENT-ON* */
+ if (pool_is_free_index (pm->policers, policer_index))
+ return;
+
+ policer = &pm->policers[policer_index];
+ p = hash_get_mem (pm->policer_config_by_name, policer->name);
+ pool_index = p[0];
+ config = pool_elt_at_index (pm->configs, pool_index);
+ send_policer_details (config, policer, reg, mp->context);
}
}
diff --git a/src/vnet/policer/policer_types.api b/src/vnet/policer/policer_types.api
index 3e21b7d707c..9d4c6447f69 100644
--- a/src/vnet/policer/policer_types.api
+++ b/src/vnet/policer/policer_types.api
@@ -56,6 +56,34 @@ typedef sse2_qos_action
u8 dscp;
};
+/** \brief Policer configuration
+ @param cir - CIR
+ @param eir - EIR
+ @param cb - Committed Burst
+ @param eb - Excess or Peak Burst
+ @param rate_type - rate type
+ @param round_type - rounding type
+ @param type - policer algorithm
+ @param color_aware - 0=color-blind, 1=color-aware
+ @param conform_action - conform action
+ @param exceed_action - exceed action type
+ @param violate_action - violate action type
+*/
+typedef policer_config
+{
+ u32 cir;
+ u32 eir;
+ u64 cb;
+ u64 eb;
+ vl_api_sse2_qos_rate_type_t rate_type;
+ vl_api_sse2_qos_round_type_t round_type;
+ vl_api_sse2_qos_policer_type_t type;
+ bool color_aware;
+ vl_api_sse2_qos_action_t conform_action;
+ vl_api_sse2_qos_action_t exceed_action;
+ vl_api_sse2_qos_action_t violate_action;
+};
+
/*
* Local Variables:
* eval: (c-set-style "gnu")
diff --git a/src/vnet/policer/xlate.c b/src/vnet/policer/xlate.c
index 9c4d76fd990..bffd208716d 100644
--- a/src/vnet/policer/xlate.c
+++ b/src/vnet/policer/xlate.c
@@ -1058,7 +1058,7 @@ x86_pol_compute_hw_params (qos_pol_cfg_params_st *cfg, policer_t *hw)
* Return: Status, success or failure code.
*/
int
-pol_logical_2_physical (qos_pol_cfg_params_st *cfg, policer_t *phys)
+pol_logical_2_physical (const qos_pol_cfg_params_st *cfg, policer_t *phys)
{
int rc;
qos_pol_cfg_params_st kbps_cfg;
diff --git a/src/vnet/policer/xlate.h b/src/vnet/policer/xlate.h
index 722ac2fb777..7f6ebe7b65d 100644
--- a/src/vnet/policer/xlate.h
+++ b/src/vnet/policer/xlate.h
@@ -158,7 +158,7 @@ typedef struct qos_pol_hw_params_st_
u32 extd_bkt;
} qos_pol_hw_params_st;
-int pol_logical_2_physical (qos_pol_cfg_params_st *cfg, policer_t *phys);
+int pol_logical_2_physical (const qos_pol_cfg_params_st *cfg, policer_t *phys);
#endif /* __included_xlate_h__ */
diff --git a/src/vnet/ppp/node.c b/src/vnet/ppp/node.c
index eead2b2f0c1..fa056bfb99f 100644
--- a/src/vnet/ppp/node.c
+++ b/src/vnet/ppp/node.c
@@ -265,7 +265,6 @@ static char *ppp_error_strings[] = {
#undef ppp_error
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ppp_input_node) = {
.function = ppp_input,
.name = "ppp-input",
@@ -288,7 +287,6 @@ VLIB_REGISTER_NODE (ppp_input_node) = {
.format_trace = format_ppp_input_trace,
.unformat_buffer = unformat_ppp_header,
};
-/* *INDENT-ON* */
static clib_error_t *
ppp_input_runtime_init (vlib_main_t * vm)
diff --git a/src/vnet/ppp/ppp.c b/src/vnet/ppp/ppp.c
index b1fafa13145..8aa8504fcdd 100644
--- a/src/vnet/ppp/ppp.c
+++ b/src/vnet/ppp/ppp.c
@@ -197,7 +197,6 @@ ppp_build_rewrite (vnet_main_t * vnm,
return (rewrite);
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (ppp_hw_interface_class) = {
.name = "PPP",
.format_header = format_ppp_header_with_length,
@@ -205,7 +204,6 @@ VNET_HW_INTERFACE_CLASS (ppp_hw_interface_class) = {
.build_rewrite = ppp_build_rewrite,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
-/* *INDENT-ON* */
static void
add_protocol (ppp_main_t * pm, ppp_protocol_t protocol, char *protocol_name)
diff --git a/src/vnet/qos/qos_egress_map.c b/src/vnet/qos/qos_egress_map.c
index 7985579d3cf..43c0c55df07 100644
--- a/src/vnet/qos/qos_egress_map.c
+++ b/src/vnet/qos/qos_egress_map.c
@@ -47,13 +47,11 @@ qos_egress_map_get_id (index_t qemi)
qos_egress_map_id_t qid;
index_t qmi;
- /* *INDENT-OFF* */
hash_foreach(qid, qmi, qem_db,
({
if (qmi == qemi)
return (qid);
}));
- /* *INDENT-OFF* */
return (~0);
}
@@ -129,12 +127,10 @@ qos_egress_map_walk (qos_egress_map_walk_cb_t fn, void *c)
qos_egress_map_id_t qid;
index_t qmi;
- /* *INDENT-OFF* */
hash_foreach(qid, qmi, qem_db,
({
fn(qid, pool_elt_at_index(qem_pool, qmi), c);
}));
- /* *INDENT-OFF* */
}
static clib_error_t *
@@ -181,14 +177,12 @@ qos_egress_map_update_cli (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{qos egress map id 0 [ip][4]=4}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (qos_egress_map_update_command, static) = {
.path = "qos egress map",
.short_help = "qos egress map id %d [delete] {[SOURCE][INPUT]=OUTPUT}",
.function = qos_egress_map_update_cli,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
u8 *format_qos_egress_map (u8 * s, va_list * args)
{
@@ -239,7 +233,6 @@ VLIB_CLI_COMMAND (qos_egress_map_update_command, static) = {
{
index_t qemi;
- /* *INDENT-OFF* */
hash_foreach(map_id, qemi, qem_db,
({
vlib_cli_output (vm, " Map-ID:%d\n%U",
@@ -247,7 +240,6 @@ VLIB_CLI_COMMAND (qos_egress_map_update_command, static) = {
format_qos_egress_map,
pool_elt_at_index(qem_pool, qemi), 2);
}));
- /* *INDENT-ON* */
}
else
{
@@ -274,14 +266,12 @@ VLIB_CLI_COMMAND (qos_egress_map_update_command, static) = {
* @cliexpar
* @cliexcmd{show qos egress map}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (qos_egress_map_show_command, static) = {
.path = "show qos egress map",
.short_help = "show qos egress map id %d",
.function = qos_egress_map_show,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/qos/qos_mark.c b/src/vnet/qos/qos_mark.c
index 44bb34bd010..3817c89a009 100644
--- a/src/vnet/qos/qos_mark.c
+++ b/src/vnet/qos/qos_mark.c
@@ -187,14 +187,12 @@ qos_mark_cli (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{qos egress interface GigEthernet0/9/0 id 0 output ip}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (qos_egress_map_interface_command, static) = {
.path = "qos mark",
.short_help = "qos mark <SOURCE> <INTERFACE> id <MAP>",
.function = qos_mark_cli,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static void
qos_mark_show_one_interface (vlib_main_t * vm, u32 sw_if_index)
@@ -271,14 +269,12 @@ qos_mark_show (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{show qos egress map}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (qos_mark_show_command, static) = {
.path = "show qos mark",
.short_help = "show qos mark [interface]",
.function = qos_mark_show,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/qos/qos_mark_node.c b/src/vnet/qos/qos_mark_node.c
index f12e66b4fa0..16a487aede8 100644
--- a/src/vnet/qos/qos_mark_node.c
+++ b/src/vnet/qos/qos_mark_node.c
@@ -212,7 +212,6 @@ VLIB_NODE_FN (vlan_ip6_qos_mark_node) (vlib_main_t * vm,
return (qos_mark_inline (vm, node, frame, QOS_SOURCE_VLAN, 0));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_qos_mark_node) = {
.name = "ip4-qos-mark",
.vector_size = sizeof (u32),
@@ -330,7 +329,6 @@ VNET_FEATURE_INIT (vlan_mpls_qos_mark_node, static) = {
.runs_after = VNET_FEATURES ("mpls-qos-mark"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/qos/qos_record.c b/src/vnet/qos/qos_record.c
index d52c1442d8d..fdf79766471 100644
--- a/src/vnet/qos/qos_record.c
+++ b/src/vnet/qos/qos_record.c
@@ -203,14 +203,12 @@ qos_record_cli (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{qos record ip GigEthernet0/1/0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (qos_record_command, static) = {
.path = "qos record",
.short_help = "qos record <record-source> <INTERFACE> [disable]",
.function = qos_record_cli,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static void
qos_record_show_one_interface (vlib_main_t * vm, u32 sw_if_index)
@@ -285,14 +283,12 @@ qos_record_show (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{show qos egress map}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (qos_record_show_command, static) = {
.path = "show qos record",
.short_help = "show qos record [interface]",
.function = qos_record_show,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/qos/qos_record_node.c b/src/vnet/qos/qos_record_node.c
index 75e1421dc08..1a34891f85d 100644
--- a/src/vnet/qos/qos_record_node.c
+++ b/src/vnet/qos/qos_record_node.c
@@ -222,7 +222,6 @@ VLIB_NODE_FN (l2_ip_qos_record_node) (vlib_main_t * vm,
return (qos_record_inline (vm, node, frame, QOS_SOURCE_VLAN, 0, 1));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_qos_record_node) = {
.name = "ip4-qos-record",
.vector_size = sizeof (u32),
@@ -372,7 +371,6 @@ VLIB_REGISTER_NODE (l2_ip_qos_record_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/qos/qos_store.c b/src/vnet/qos/qos_store.c
index 1e8a53bbdfc..3424a914e35 100644
--- a/src/vnet/qos/qos_store.c
+++ b/src/vnet/qos/qos_store.c
@@ -211,14 +211,12 @@ qos_store_cli (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{qos store ip GigEthernet0/1/0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (qos_store_command, static) = {
.path = "qos store",
.short_help = "qos store <store-source> <INTERFACE> [disable]",
.function = qos_store_cli,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static void
qos_store_show_one_interface (vlib_main_t * vm, u32 sw_if_index)
@@ -295,14 +293,12 @@ qos_store_show (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{show qos egress map}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (qos_store_show_command, static) = {
.path = "show qos store",
.short_help = "show qos store [interface]",
.function = qos_store_show,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/qos/qos_store_node.c b/src/vnet/qos/qos_store_node.c
index 2273b2eac77..6a5ad24453d 100644
--- a/src/vnet/qos/qos_store_node.c
+++ b/src/vnet/qos/qos_store_node.c
@@ -121,7 +121,6 @@ VLIB_NODE_FN (ip6_qos_store_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_qos_store_node) = {
.name = "ip4-qos-store",
.vector_size = sizeof (u32),
@@ -168,7 +167,6 @@ VNET_FEATURE_INIT (ip6m_qos_store_node, static) = {
.node_name = "ip6-qos-store",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c
index 3b2c7cdb35a..c66548507e5 100644
--- a/src/vnet/session/application.c
+++ b/src/vnet/session/application.c
@@ -31,10 +31,12 @@ static app_main_t app_main;
static app_listener_t *
app_listener_alloc (application_t * app)
{
+ app_main_t *am = &app_main;
app_listener_t *app_listener;
- pool_get (app->listeners, app_listener);
+
+ pool_get (am->listeners, app_listener);
clib_memset (app_listener, 0, sizeof (*app_listener));
- app_listener->al_index = app_listener - app->listeners;
+ app_listener->al_index = app_listener - am->listeners;
app_listener->app_index = app->app_index;
app_listener->session_index = SESSION_INVALID_INDEX;
app_listener->local_index = SESSION_INVALID_INDEX;
@@ -43,18 +45,23 @@ app_listener_alloc (application_t * app)
}
app_listener_t *
-app_listener_get (application_t * app, u32 app_listener_index)
+app_listener_get (u32 app_listener_index)
{
- return pool_elt_at_index (app->listeners, app_listener_index);
+ app_main_t *am = &app_main;
+
+ return pool_elt_at_index (am->listeners, app_listener_index);
}
static void
app_listener_free (application_t * app, app_listener_t * app_listener)
{
+ app_main_t *am = &app_main;
+
clib_bitmap_free (app_listener->workers);
+ vec_free (app_listener->cl_listeners);
if (CLIB_DEBUG)
clib_memset (app_listener, 0xfa, sizeof (*app_listener));
- pool_put (app->listeners, app_listener);
+ pool_put (am->listeners, app_listener);
}
session_handle_t
@@ -63,24 +70,14 @@ app_listener_handle (app_listener_t * al)
return al->ls_handle;
}
-app_listener_t *
-app_listener_get_w_session (session_t * ls)
-{
- application_t *app;
-
- app = application_get_if_valid (ls->app_index);
- if (!app)
- return 0;
- return app_listener_get (app, ls->al_index);
-}
-
session_handle_t
app_listen_session_handle (session_t * ls)
{
app_listener_t *al;
- al = app_listener_get_w_session (ls);
- if (!al)
+ /* TODO(fcoras): quic session handles */
+ if (ls->al_index == SESSION_INVALID_INDEX)
return listen_session_get_handle (ls);
+ al = app_listener_get (ls->al_index);
return al->ls_handle;
}
@@ -91,7 +88,7 @@ app_listener_get_w_handle (session_handle_t handle)
ls = session_get_from_handle_if_valid (handle);
if (!ls)
return 0;
- return app_listener_get_w_session (ls);
+ return app_listener_get (ls->al_index);
}
app_listener_t *
@@ -112,7 +109,7 @@ app_listener_lookup (application_t * app, session_endpoint_cfg_t * sep_ext)
if (handle != SESSION_INVALID_HANDLE)
{
ls = listen_session_get_from_handle (handle);
- return app_listener_get_w_session (ls);
+ return app_listener_get (ls->al_index);
}
}
@@ -122,7 +119,7 @@ app_listener_lookup (application_t * app, session_endpoint_cfg_t * sep_ext)
if (handle != SESSION_INVALID_HANDLE)
{
ls = listen_session_get_from_handle (handle);
- return app_listener_get_w_session ((session_t *) ls);
+ return app_listener_get (ls->al_index);
}
/*
@@ -144,7 +141,7 @@ app_listener_lookup (application_t * app, session_endpoint_cfg_t * sep_ext)
if (handle != SESSION_INVALID_HANDLE)
{
ls = listen_session_get_from_handle (handle);
- return app_listener_get_w_session ((session_t *) ls);
+ return app_listener_get (ls->al_index);
}
}
}
@@ -181,7 +178,6 @@ app_listener_alloc_and_init (application_t * app,
local_st = session_type_from_proto_and_ip (TRANSPORT_PROTO_NONE,
sep->is_ip4);
ls = listen_session_alloc (0, local_st);
- ls->app_index = app->app_index;
ls->app_wrk_index = sep->app_wrk_index;
lh = session_handle (ls);
@@ -189,11 +185,12 @@ app_listener_alloc_and_init (application_t * app,
{
ls = session_get_from_handle (lh);
session_free (ls);
+ app_listener_free (app, app_listener);
return rv;
}
ls = session_get_from_handle (lh);
- app_listener = app_listener_get (app, al_index);
+ app_listener = app_listener_get (al_index);
app_listener->local_index = ls->session_index;
app_listener->ls_handle = lh;
ls->al_index = al_index;
@@ -212,7 +209,6 @@ app_listener_alloc_and_init (application_t * app,
* build it's own specific listening connection.
*/
ls = listen_session_alloc (0, st);
- ls->app_index = app->app_index;
ls->app_wrk_index = sep->app_wrk_index;
/* Listen pool can be reallocated if the transport is
@@ -223,10 +219,11 @@ app_listener_alloc_and_init (application_t * app,
{
ls = listen_session_get_from_handle (lh);
session_free (ls);
+ app_listener_free (app, app_listener);
return rv;
}
ls = listen_session_get_from_handle (lh);
- app_listener = app_listener_get (app, al_index);
+ app_listener = app_listener_get (al_index);
app_listener->session_index = ls->session_index;
app_listener->ls_handle = lh;
ls->al_index = al_index;
@@ -288,8 +285,9 @@ app_listener_cleanup (app_listener_t * al)
}
static app_worker_t *
-app_listener_select_worker (application_t * app, app_listener_t * al)
+app_listener_select_worker (app_listener_t *al)
{
+ application_t *app;
u32 wrk_index;
app = application_get (al->app_index);
@@ -319,6 +317,13 @@ app_listener_get_local_session (app_listener_t * al)
return listen_session_get (al->local_index);
}
+session_t *
+app_listener_get_wrk_cl_session (app_listener_t *al, u32 wrk_map_index)
+{
+ u32 si = vec_elt (al->cl_listeners, wrk_map_index);
+ return session_get (si, 0 /* listener thread */);
+}
+
static app_worker_map_t *
app_worker_map_alloc (application_t * app)
{
@@ -723,6 +728,12 @@ application_get_if_valid (u32 app_index)
return pool_elt_at_index (app_main.app_pool, app_index);
}
+static int
+_null_app_tx_callback (session_t *s)
+{
+ return 0;
+}
+
static void
application_verify_cb_fns (session_cb_vft_t * cb_fns)
{
@@ -734,6 +745,8 @@ application_verify_cb_fns (session_cb_vft_t * cb_fns)
clib_warning ("No session disconnect callback function provided");
if (cb_fns->session_reset_callback == 0)
clib_warning ("No session reset callback function provided");
+ if (!cb_fns->builtin_app_tx_callback)
+ cb_fns->builtin_app_tx_callback = _null_app_tx_callback;
}
/**
@@ -763,8 +776,8 @@ application_verify_cfg (ssvm_segment_type_t st)
return 1;
}
-static int
-application_alloc_and_init (app_init_args_t * a)
+static session_error_t
+application_alloc_and_init (app_init_args_t *a)
{
ssvm_segment_type_t seg_type = SSVM_SEGMENT_MEMFD;
segment_manager_props_t *props;
@@ -785,15 +798,15 @@ application_alloc_and_init (app_init_args_t * a)
{
clib_warning ("mq eventfds can only be used if socket transport is "
"used for binary api");
- return VNET_API_ERROR_APP_UNSUPPORTED_CFG;
+ return SESSION_E_NOSUPPORT;
}
if (!application_verify_cfg (seg_type))
- return VNET_API_ERROR_APP_UNSUPPORTED_CFG;
+ return SESSION_E_NOSUPPORT;
if (opts[APP_OPTIONS_PREALLOC_FIFO_PAIRS] &&
opts[APP_OPTIONS_PREALLOC_FIFO_HDRS])
- return VNET_API_ERROR_APP_UNSUPPORTED_CFG;
+ return SESSION_E_NOSUPPORT;
/* Check that the obvious things are properly set up */
application_verify_cb_fns (a->session_cb_vft);
@@ -874,12 +887,10 @@ application_free (application_t * app)
* Free workers
*/
- /* *INDENT-OFF* */
pool_flush (wrk_map, app->worker_maps, ({
app_wrk = app_worker_get (wrk_map->wrk_index);
app_worker_free (app_wrk);
}));
- /* *INDENT-ON* */
pool_free (app->worker_maps);
/*
@@ -922,13 +933,11 @@ application_detach_process (application_t * app, u32 api_client_index)
APP_DBG ("Detaching for app %v index %u api client index %u", app->name,
app->app_index, api_client_index);
- /* *INDENT-OFF* */
pool_foreach (wrk_map, app->worker_maps) {
app_wrk = app_worker_get (wrk_map->wrk_index);
if (app_wrk->api_client_index == api_client_index)
vec_add1 (wrks, app_wrk->wrk_index);
}
- /* *INDENT-ON* */
if (!vec_len (wrks))
{
@@ -999,12 +1008,55 @@ application_n_workers (application_t * app)
app_worker_t *
application_listener_select_worker (session_t * ls)
{
- application_t *app;
app_listener_t *al;
- app = application_get (ls->app_index);
- al = app_listener_get (app, ls->al_index);
- return app_listener_select_worker (app, al);
+ al = app_listener_get (ls->al_index);
+ return app_listener_select_worker (al);
+}
+
+always_inline u32
+app_listener_cl_flow_hash (session_dgram_hdr_t *hdr)
+{
+ u32 hash = 0;
+
+ if (hdr->is_ip4)
+ {
+ hash = clib_crc32c_u32 (hash, hdr->rmt_ip.ip4.as_u32);
+ hash = clib_crc32c_u32 (hash, hdr->lcl_ip.ip4.as_u32);
+ hash = clib_crc32c_u16 (hash, hdr->rmt_port);
+ hash = clib_crc32c_u16 (hash, hdr->lcl_port);
+ }
+ else
+ {
+ hash = clib_crc32c_u64 (hash, hdr->rmt_ip.ip6.as_u64[0]);
+ hash = clib_crc32c_u64 (hash, hdr->rmt_ip.ip6.as_u64[1]);
+ hash = clib_crc32c_u64 (hash, hdr->lcl_ip.ip6.as_u64[0]);
+ hash = clib_crc32c_u64 (hash, hdr->lcl_ip.ip6.as_u64[1]);
+ hash = clib_crc32c_u16 (hash, hdr->rmt_port);
+ hash = clib_crc32c_u16 (hash, hdr->lcl_port);
+ }
+
+ return hash;
+}
+
+session_t *
+app_listener_select_wrk_cl_session (session_t *ls, session_dgram_hdr_t *hdr)
+{
+ u32 wrk_map_index = 0;
+ app_listener_t *al;
+
+ al = app_listener_get (ls->al_index);
+ /* Crude test to check if only worker 0 is set */
+ if (al->workers[0] != 1)
+ {
+ u32 hash = app_listener_cl_flow_hash (hdr);
+ hash %= vec_len (al->workers) * sizeof (uword);
+ wrk_map_index = clib_bitmap_next_set (al->workers, hash);
+ if (wrk_map_index == ~0)
+ wrk_map_index = clib_bitmap_first_set (al->workers);
+ }
+
+ return app_listener_get_wrk_cl_session (al, wrk_map_index);
}
int
@@ -1046,8 +1098,8 @@ application_alloc_worker_and_init (application_t * app, app_worker_t ** wrk)
return 0;
}
-int
-vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a)
+session_error_t
+vnet_app_worker_add_del (vnet_app_worker_add_del_args_t *a)
{
fifo_segment_t *fs;
app_worker_map_t *wrk_map;
@@ -1058,7 +1110,7 @@ vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a)
app = application_get (a->app_index);
if (!app)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
if (a->is_add)
{
@@ -1081,11 +1133,11 @@ vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a)
{
wrk_map = app_worker_map_get (app, a->wrk_map_index);
if (!wrk_map)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
app_wrk = app_worker_get (wrk_map->wrk_index);
if (!app_wrk)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
application_api_table_del (app_wrk->api_client_index);
if (appns_sapi_enabled ())
@@ -1098,8 +1150,8 @@ vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a)
return 0;
}
-static int
-app_validate_namespace (u8 * namespace_id, u64 secret, u32 * app_ns_index)
+static session_error_t
+app_validate_namespace (u8 *namespace_id, u64 secret, u32 *app_ns_index)
{
app_namespace_t *app_ns;
if (vec_len (namespace_id) == 0)
@@ -1111,12 +1163,12 @@ app_validate_namespace (u8 * namespace_id, u64 secret, u32 * app_ns_index)
*app_ns_index = app_namespace_index_from_id (namespace_id);
if (*app_ns_index == APP_NAMESPACE_INVALID_INDEX)
- return VNET_API_ERROR_APP_INVALID_NS;
+ return SESSION_E_INVALID_NS;
app_ns = app_namespace_get (*app_ns_index);
if (!app_ns)
- return VNET_API_ERROR_APP_INVALID_NS;
+ return SESSION_E_INVALID_NS;
if (app_ns->ns_secret != secret)
- return VNET_API_ERROR_APP_WRONG_NS_SECRET;
+ return SESSION_E_WRONG_NS_SECRET;
return 0;
}
@@ -1140,8 +1192,8 @@ app_name_from_api_index (u32 api_client_index)
* to external app and a segment manager for shared memory fifo based
* communication with the external app.
*/
-int
-vnet_application_attach (vnet_app_attach_args_t * a)
+session_error_t
+vnet_application_attach (vnet_app_attach_args_t *a)
{
fifo_segment_t *fs;
application_t *app = 0;
@@ -1150,17 +1202,17 @@ vnet_application_attach (vnet_app_attach_args_t * a)
u32 app_ns_index = 0;
u8 *app_name = 0;
u64 secret;
- int rv;
+ session_error_t rv;
if (a->api_client_index != APP_INVALID_INDEX)
app = application_lookup (a->api_client_index);
else if (a->name)
app = application_lookup_name (a->name);
else
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
if (app)
- return VNET_API_ERROR_APP_ALREADY_ATTACHED;
+ return SESSION_E_APP_ATTACHED;
/* Socket api sets the name and validates namespace prior to attach */
if (!a->use_sock_api)
@@ -1214,8 +1266,8 @@ vnet_application_attach (vnet_app_attach_args_t * a)
/**
* Detach application from vpp
*/
-int
-vnet_application_detach (vnet_app_detach_args_t * a)
+session_error_t
+vnet_application_detach (vnet_app_detach_args_t *a)
{
application_t *app;
@@ -1223,7 +1275,7 @@ vnet_application_detach (vnet_app_detach_args_t * a)
if (!app)
{
clib_warning ("app not attached");
- return VNET_API_ERROR_APPLICATION_NOT_ATTACHED;
+ return SESSION_E_NOAPP;
}
app_interface_check_thread_and_barrier (vnet_application_detach, a);
@@ -1297,8 +1349,8 @@ session_endpoint_update_for_app (session_endpoint_cfg_t * sep,
}
}
-int
-vnet_listen (vnet_listen_args_t * a)
+session_error_t
+vnet_listen (vnet_listen_args_t *a)
{
app_listener_t *app_listener;
app_worker_t *app_wrk;
@@ -1351,13 +1403,13 @@ vnet_listen (vnet_listen_args_t * a)
return 0;
}
-int
-vnet_connect (vnet_connect_args_t * a)
+session_error_t
+vnet_connect (vnet_connect_args_t *a)
{
app_worker_t *client_wrk;
application_t *client;
- ASSERT (vlib_thread_is_main_w_barrier ());
+ ASSERT (session_vlib_thread_is_cl_thread ());
if (session_endpoint_is_zero (&a->sep))
return SESSION_E_INVALID_RMT_IP;
@@ -1375,7 +1427,7 @@ vnet_connect (vnet_connect_args_t * a)
*/
if (application_has_local_scope (client))
{
- int rv;
+ session_error_t rv;
a->sep_ext.original_tp = a->sep_ext.transport_proto;
a->sep_ext.transport_proto = TRANSPORT_PROTO_NONE;
@@ -1390,8 +1442,8 @@ vnet_connect (vnet_connect_args_t * a)
return app_worker_connect_session (client_wrk, &a->sep_ext, &a->sh);
}
-int
-vnet_unlisten (vnet_unlisten_args_t * a)
+session_error_t
+vnet_unlisten (vnet_unlisten_args_t *a)
{
app_worker_t *app_wrk;
app_listener_t *al;
@@ -1421,7 +1473,7 @@ vnet_unlisten (vnet_unlisten_args_t * a)
return app_worker_stop_listen (app_wrk, al);
}
-int
+session_error_t
vnet_shutdown_session (vnet_shutdown_args_t *a)
{
app_worker_t *app_wrk;
@@ -1442,8 +1494,8 @@ vnet_shutdown_session (vnet_shutdown_args_t *a)
return 0;
}
-int
-vnet_disconnect_session (vnet_disconnect_args_t * a)
+session_error_t
+vnet_disconnect_session (vnet_disconnect_args_t *a)
{
app_worker_t *app_wrk;
session_t *s;
@@ -1483,7 +1535,7 @@ application_change_listener_owner (session_t * s, app_worker_t * app_wrk)
if (!app)
return SESSION_E_NOAPP;
- app_listener = app_listener_get (app, s->al_index);
+ app_listener = app_listener_get (s->al_index);
/* Only remove from lb for now */
app_listener->workers = clib_bitmap_set (app_listener->workers,
@@ -1527,6 +1579,12 @@ application_has_global_scope (application_t * app)
return app->flags & APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE;
}
+int
+application_original_dst_is_enabled (application_t *app)
+{
+ return app->flags & APP_OPTIONS_FLAGS_GET_ORIGINAL_DST;
+}
+
static clib_error_t *
application_start_stop_proxy_fib_proto (application_t * app, u8 fib_proto,
u8 transport_proto, u8 is_start)
@@ -1688,7 +1746,6 @@ application_format_listeners (application_t * app, int verbose)
return;
}
- /* *INDENT-OFF* */
pool_foreach (wrk_map, app->worker_maps) {
app_wrk = app_worker_get (wrk_map->wrk_index);
if (hash_elts (app_wrk->listeners_table) == 0)
@@ -1698,7 +1755,6 @@ application_format_listeners (application_t * app, int verbose)
handle, sm_index, verbose);
}));
}
- /* *INDENT-ON* */
}
static void
@@ -1713,12 +1769,10 @@ application_format_connects (application_t * app, int verbose)
return;
}
- /* *INDENT-OFF* */
pool_foreach (wrk_map, app->worker_maps) {
app_wrk = app_worker_get (wrk_map->wrk_index);
app_worker_format_connects (app_wrk, verbose);
}
- /* *INDENT-ON* */
}
u8 *
@@ -1819,12 +1873,10 @@ format_application (u8 * s, va_list * args)
format_memory_size, props->rx_fifo_size,
format_memory_size, props->tx_fifo_size);
- /* *INDENT-OFF* */
pool_foreach (wrk_map, app->worker_maps) {
app_wrk = app_worker_get (wrk_map->wrk_index);
s = format (s, "%U", format_app_worker, app_wrk);
}
- /* *INDENT-ON* */
return s;
}
@@ -1842,11 +1894,9 @@ application_format_all_listeners (vlib_main_t * vm, int verbose)
application_format_listeners (0, verbose);
- /* *INDENT-OFF* */
pool_foreach (app, app_main.app_pool) {
application_format_listeners (app, verbose);
}
- /* *INDENT-ON* */
}
void
@@ -1862,11 +1912,9 @@ application_format_all_clients (vlib_main_t * vm, int verbose)
application_format_connects (0, verbose);
- /* *INDENT-OFF* */
pool_foreach (app, app_main.app_pool) {
application_format_connects (app, verbose);
}
- /* *INDENT-ON* */
}
static clib_error_t *
@@ -1876,11 +1924,9 @@ show_certificate_command_fn (vlib_main_t * vm, unformat_input_t * input,
app_cert_key_pair_t *ckpair;
session_cli_return_if_not_enabled ();
- /* *INDENT-OFF* */
pool_foreach (ckpair, app_main.cert_key_pair_store) {
vlib_cli_output (vm, "%U", format_cert_key_pair, ckpair);
}
- /* *INDENT-ON* */
return 0;
}
@@ -1891,14 +1937,12 @@ appliction_format_app_mq (vlib_main_t * vm, application_t * app)
app_worker_t *wrk;
int i;
- /* *INDENT-OFF* */
pool_foreach (map, app->worker_maps) {
wrk = app_worker_get (map->wrk_index);
vlib_cli_output (vm, "[A%d][%d]%U", app->app_index,
map->wrk_index, format_svm_msg_q,
wrk->event_queue);
}
- /* *INDENT-ON* */
for (i = 0; i < vec_len (app->rx_mqs); i++)
vlib_cli_output (vm, "[A%d][R%d]%U", app->app_index, i, format_svm_msg_q,
@@ -1919,11 +1963,9 @@ appliction_format_all_app_mq (vlib_main_t * vm)
session_main_get_vpp_event_queue (i));
}
- /* *INDENT-OFF* */
pool_foreach (app, app_main.app_pool) {
appliction_format_app_mq (vm, app);
}
- /* *INDENT-ON* */
return 0;
}
@@ -2074,7 +2116,7 @@ vnet_app_del_cert_key_pair (u32 index)
u32 *app_index;
if (!(ckpair = app_cert_key_pair_get_if_valid (index)))
- return (VNET_API_ERROR_INVALID_VALUE);
+ return SESSION_E_INVALID;
vec_foreach (app_index, ckpair->app_interests)
{
diff --git a/src/vnet/session/application.h b/src/vnet/session/application.h
index 09737a6752d..c68a911230f 100644
--- a/src/vnet/session/application.h
+++ b/src/vnet/session/application.h
@@ -77,17 +77,17 @@ typedef struct app_worker_
/** Pool of half-open session handles. Tracked in case worker detaches */
session_handle_t *half_open_table;
+ /* Per vpp worker fifos of events for app worker */
+ session_event_t **wrk_evts;
+
+ /* Vector of vpp workers mq congestion flags */
+ u8 *wrk_mq_congested;
+
/** Protects detached seg managers */
clib_spinlock_t detached_seg_managers_lock;
/** Vector of detached listener segment managers */
u32 *detached_seg_managers;
-
- /** Fifo of messages postponed because of mq congestion */
- app_wrk_postponed_msg_t *postponed_mq_msgs;
-
- /** Lock to add/sub message from ref @postponed_mq_msgs */
- clib_spinlock_t postponed_mq_msgs_lock;
} app_worker_t;
typedef struct app_worker_map_
@@ -106,6 +106,8 @@ typedef struct app_listener_
session_handle_t ls_handle; /**< session handle of the local or global
listening session that also identifies
the app listener */
+ u32 *cl_listeners; /**< vector that maps app workers to their
+ cl sessions with fifos */
} app_listener_t;
typedef enum app_rx_mq_flags_
@@ -149,9 +151,6 @@ typedef struct application_
u16 proxied_transports;
- /** Pool of listeners for the app */
- app_listener_t *listeners;
-
/** Preferred tls engine */
u8 tls_engine;
@@ -198,6 +197,9 @@ typedef struct app_main_
*/
application_t *app_pool;
+ /** Pool of app listeners */
+ app_listener_t *listeners;
+
/**
* Hash table of apps by api client index
*/
@@ -246,7 +248,7 @@ typedef struct _vnet_app_worker_add_del_args
#define APP_NS_INVALID_INDEX ((u32)~0)
#define APP_INVALID_SEGMENT_MANAGER_INDEX ((u32) ~0)
-app_listener_t *app_listener_get (application_t * app, u32 al_index);
+app_listener_t *app_listener_get (u32 al_index);
int app_listener_alloc_and_init (application_t * app,
session_endpoint_cfg_t * sep,
app_listener_t ** listener);
@@ -254,6 +256,8 @@ void app_listener_cleanup (app_listener_t * app_listener);
session_handle_t app_listener_handle (app_listener_t * app_listener);
app_listener_t *app_listener_lookup (application_t * app,
session_endpoint_cfg_t * sep);
+session_t *app_listener_select_wrk_cl_session (session_t *ls,
+ session_dgram_hdr_t *hdr);
/**
* Get app listener handle for listening session
@@ -277,9 +281,9 @@ session_handle_t app_listen_session_handle (session_t * ls);
* @return pointer to app listener or 0
*/
app_listener_t *app_listener_get_w_handle (session_handle_t handle);
-app_listener_t *app_listener_get_w_session (session_t * ls);
session_t *app_listener_get_session (app_listener_t * al);
session_t *app_listener_get_local_session (app_listener_t * al);
+session_t *app_listener_get_wrk_cl_session (app_listener_t *al, u32 wrk_index);
application_t *application_get (u32 index);
application_t *application_get_if_valid (u32 index);
@@ -300,6 +304,7 @@ u8 application_has_global_scope (application_t * app);
void application_setup_proxy (application_t * app);
void application_remove_proxy (application_t * app);
void application_namespace_cleanup (app_namespace_t *app_ns);
+int application_original_dst_is_enabled (application_t *app);
segment_manager_props_t *application_get_segment_manager_properties (u32
app_index);
@@ -316,6 +321,12 @@ void application_enable_rx_mqs_nodes (u8 is_en);
* App worker
*/
+always_inline u8
+app_worker_mq_is_congested (app_worker_t *app_wrk)
+{
+ return app_wrk->mq_congested > 0;
+}
+
app_worker_t *app_worker_alloc (application_t * app);
int application_alloc_worker_and_init (application_t * app,
app_worker_t ** wrk);
@@ -326,9 +337,14 @@ int app_worker_own_session (app_worker_t * app_wrk, session_t * s);
void app_worker_free (app_worker_t * app_wrk);
int app_worker_connect_session (app_worker_t *app, session_endpoint_cfg_t *sep,
session_handle_t *rsh);
-int app_worker_start_listen (app_worker_t * app_wrk, app_listener_t * lstnr);
+session_error_t app_worker_start_listen (app_worker_t *app_wrk,
+ app_listener_t *lstnr);
int app_worker_stop_listen (app_worker_t * app_wrk, app_listener_t * al);
int app_worker_init_accepted (session_t * s);
+int app_worker_listened_notify (app_worker_t *app_wrk, session_handle_t alsh,
+ u32 opaque, session_error_t err);
+int app_worker_unlisten_reply (app_worker_t *app_wrk, session_handle_t sh,
+ u32 opaque, session_error_t err);
int app_worker_accept_notify (app_worker_t * app_wrk, session_t * s);
int app_worker_init_connected (app_worker_t * app_wrk, session_t * s);
int app_worker_connect_notify (app_worker_t * app_wrk, session_t * s,
@@ -341,13 +357,21 @@ int app_worker_transport_closed_notify (app_worker_t * app_wrk,
int app_worker_reset_notify (app_worker_t * app_wrk, session_t * s);
int app_worker_cleanup_notify (app_worker_t * app_wrk, session_t * s,
session_cleanup_ntf_t ntf);
+int app_worker_cleanup_notify_custom (app_worker_t *app_wrk, session_t *s,
+ session_cleanup_ntf_t ntf,
+ void (*cleanup_cb) (session_t *s));
int app_worker_migrate_notify (app_worker_t * app_wrk, session_t * s,
session_handle_t new_sh);
-int app_worker_builtin_rx (app_worker_t * app_wrk, session_t * s);
-int app_worker_builtin_tx (app_worker_t * app_wrk, session_t * s);
+int app_worker_rx_notify (app_worker_t *app_wrk, session_t *s);
int app_worker_session_fifo_tuning (app_worker_t * app_wrk, session_t * s,
svm_fifo_t * f,
session_ft_action_t act, u32 len);
+void app_worker_add_event (app_worker_t *app_wrk, session_t *s,
+ session_evt_type_t evt_type);
+void app_worker_add_event_custom (app_worker_t *app_wrk, u32 thread_index,
+ session_event_t *evt);
+int app_wrk_flush_wrk_events (app_worker_t *app_wrk, u32 thread_index);
+void app_worker_del_all_events (app_worker_t *app_wrk);
segment_manager_t *app_worker_get_listen_segment_manager (app_worker_t *,
session_t *);
segment_manager_t *app_worker_get_connect_segment_manager (app_worker_t *);
@@ -362,9 +386,10 @@ void app_wrk_send_ctrl_evt_fd (app_worker_t *app_wrk, u8 evt_type, void *msg,
u32 msg_len, int fd);
void app_wrk_send_ctrl_evt (app_worker_t *app_wrk, u8 evt_type, void *msg,
u32 msg_len);
-int app_worker_send_event (app_worker_t * app, session_t * s, u8 evt);
-int app_worker_lock_and_send_event (app_worker_t * app, session_t * s,
- u8 evt_type);
+u8 app_worker_mq_wrk_is_congested (app_worker_t *app_wrk, u32 thread_index);
+void app_worker_set_mq_wrk_congested (app_worker_t *app_wrk, u32 thread_index);
+void app_worker_unset_wrk_mq_congested (app_worker_t *app_wrk,
+ u32 thread_index);
session_t *app_worker_proxy_listener (app_worker_t * app, u8 fib_proto,
u8 transport_proto);
void app_worker_del_detached_sm (app_worker_t * app_wrk, u32 sm_index);
@@ -373,7 +398,7 @@ u8 *format_app_worker_listener (u8 * s, va_list * args);
u8 *format_crypto_engine (u8 * s, va_list * args);
u8 *format_crypto_context (u8 * s, va_list * args);
void app_worker_format_connects (app_worker_t * app_wrk, int verbose);
-int vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a);
+session_error_t vnet_app_worker_add_del (vnet_app_worker_add_del_args_t *a);
uword unformat_application_proto (unformat_input_t * input, va_list * args);
@@ -381,18 +406,17 @@ app_cert_key_pair_t *app_cert_key_pair_get (u32 index);
app_cert_key_pair_t *app_cert_key_pair_get_if_valid (u32 index);
app_cert_key_pair_t *app_cert_key_pair_get_default ();
-/* Needed while we support both bapi and mq ctrl messages */
-int mq_send_session_bound_cb (u32 app_wrk_index, u32 api_context,
- session_handle_t handle, int rv);
-int mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context,
- session_t * s, session_error_t err);
-void mq_send_unlisten_reply (app_worker_t * app_wrk, session_handle_t sh,
- u32 context, int rv);
void sapi_socket_close_w_handle (u32 api_handle);
crypto_engine_type_t app_crypto_engine_type_add (void);
u8 app_crypto_engine_n_types (void);
+static inline u8
+app_worker_application_is_builtin (app_worker_t *app_wrk)
+{
+ return app_wrk->app_is_builtin;
+}
+
#endif /* SRC_VNET_SESSION_APPLICATION_H_ */
/*
diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c
index 74f456a1eab..a62f914d43a 100644
--- a/src/vnet/session/application_interface.c
+++ b/src/vnet/session/application_interface.c
@@ -73,8 +73,8 @@ unformat_vnet_uri (unformat_input_t * input, va_list * args)
static u8 *cache_uri;
static session_endpoint_cfg_t *cache_sep;
-int
-parse_uri (char *uri, session_endpoint_cfg_t * sep)
+session_error_t
+parse_uri (char *uri, session_endpoint_cfg_t *sep)
{
unformat_input_t _input, *input = &_input;
@@ -92,7 +92,7 @@ parse_uri (char *uri, session_endpoint_cfg_t * sep)
if (!unformat (input, "%U", unformat_vnet_uri, sep))
{
unformat_free (input);
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
}
unformat_free (input);
@@ -106,8 +106,8 @@ parse_uri (char *uri, session_endpoint_cfg_t * sep)
return 0;
}
-int
-vnet_bind_uri (vnet_listen_args_t * a)
+session_error_t
+vnet_bind_uri (vnet_listen_args_t *a)
{
session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
int rv;
@@ -120,36 +120,36 @@ vnet_bind_uri (vnet_listen_args_t * a)
return vnet_listen (a);
}
-int
-vnet_unbind_uri (vnet_unlisten_args_t * a)
+session_error_t
+vnet_unbind_uri (vnet_unlisten_args_t *a)
{
session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
application_t *app;
session_t *listener;
u32 table_index;
- int rv;
+ session_error_t rv;
if ((rv = parse_uri (a->uri, &sep)))
return rv;
app = application_get (a->app_index);
if (!app)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
table_index = application_session_table (app, fib_ip_proto (!sep.is_ip4));
listener = session_lookup_listener (table_index,
(session_endpoint_t *) & sep);
if (!listener)
- return VNET_API_ERROR_ADDRESS_NOT_IN_USE;
+ return SESSION_E_ADDR_NOT_IN_USE;
a->handle = listen_session_get_handle (listener);
return vnet_unlisten (a);
}
-int
-vnet_connect_uri (vnet_connect_args_t * a)
+session_error_t
+vnet_connect_uri (vnet_connect_args_t *a)
{
session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
- int rv;
+ session_error_t rv;
if ((rv = parse_uri (a->uri, &sep)))
return rv;
diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h
index 9fc03a0e97a..f175e4a58c6 100644
--- a/src/vnet/session/application_interface.h
+++ b/src/vnet/session/application_interface.h
@@ -62,6 +62,13 @@ typedef struct session_cb_vft_
/** Notify app that session pool migration happened */
void (*session_migrate_callback) (session_t * s, session_handle_t new_sh);
+ /** Notify app (external only) that listen was processed */
+ int (*session_listened_callback) (u32 app_wrk_index, u32 api_context,
+ session_handle_t handle, int rv);
+ /** Notify app (external only) that unlisten was processed */
+ void (*session_unlistened_callback) (u32 app_wrk_index, session_handle_t sh,
+ u32 context, int rv);
+
/** Direct RX callback for built-in application */
int (*builtin_app_rx_callback) (session_t * session);
@@ -74,6 +81,8 @@ typedef struct session_cb_vft_
/** Delegate fifo-tuning-logic to application */
int (*fifo_tuning_callback) (session_t * s, svm_fifo_t * f,
session_ft_action_t act, u32 bytes);
+ /** Custom fifo allocation for proxy */
+ int (*proxy_alloc_session_fifos) (session_t *s);
} session_cb_vft_t;
@@ -117,7 +126,7 @@ typedef struct _vnet_bind_args_t
/*
* Results
*/
- u64 handle;
+ session_handle_t handle;
} vnet_listen_args_t;
typedef struct _vnet_unlisten_args_t
@@ -125,7 +134,7 @@ typedef struct _vnet_unlisten_args_t
union
{
char *uri;
- u64 handle; /**< Session handle */
+ session_handle_t handle; /**< Session handle */
};
u32 app_index; /**< Owning application index */
u32 wrk_map_index; /**< App's local pool worker index */
@@ -233,7 +242,8 @@ typedef enum
_ (USE_LOCAL_SCOPE, "App can use local session scope") \
_ (EVT_MQ_USE_EVENTFD, "Use eventfds for signaling") \
_ (MEMFD_FOR_BUILTIN, "Use memfd for builtin app segs") \
- _ (USE_HUGE_PAGE, "Use huge page for FIFO")
+ _ (USE_HUGE_PAGE, "Use huge page for FIFO") \
+ _ (GET_ORIGINAL_DST, "Get original dst enabled")
typedef enum _app_options
{
@@ -270,24 +280,26 @@ typedef enum session_fd_flag_
#undef _
} session_fd_flag_t;
-int parse_uri (char *uri, session_endpoint_cfg_t * sep);
-int vnet_bind_uri (vnet_listen_args_t *);
-int vnet_unbind_uri (vnet_unlisten_args_t * a);
-int vnet_connect_uri (vnet_connect_args_t * a);
+session_error_t parse_uri (char *uri, session_endpoint_cfg_t *sep);
+session_error_t vnet_bind_uri (vnet_listen_args_t *);
+session_error_t vnet_unbind_uri (vnet_unlisten_args_t *a);
+session_error_t vnet_connect_uri (vnet_connect_args_t *a);
-int vnet_application_attach (vnet_app_attach_args_t * a);
-int vnet_application_detach (vnet_app_detach_args_t * a);
-int vnet_listen (vnet_listen_args_t * a);
-int vnet_connect (vnet_connect_args_t * a);
-int vnet_unlisten (vnet_unlisten_args_t * a);
-int vnet_shutdown_session (vnet_shutdown_args_t *a);
-int vnet_disconnect_session (vnet_disconnect_args_t * a);
+session_error_t vnet_application_attach (vnet_app_attach_args_t *a);
+session_error_t vnet_application_detach (vnet_app_detach_args_t *a);
+session_error_t vnet_listen (vnet_listen_args_t *a);
+session_error_t vnet_connect (vnet_connect_args_t *a);
+session_error_t vnet_unlisten (vnet_unlisten_args_t *a);
+session_error_t vnet_shutdown_session (vnet_shutdown_args_t *a);
+session_error_t vnet_disconnect_session (vnet_disconnect_args_t *a);
int vnet_app_add_cert_key_pair (vnet_app_add_cert_key_pair_args_t * a);
int vnet_app_del_cert_key_pair (u32 index);
/** Ask for app cb on pair deletion */
int vnet_app_add_cert_key_interest (u32 index, u32 app_index);
+uword unformat_vnet_uri (unformat_input_t *input, va_list *args);
+
typedef struct app_session_transport_
{
ip46_address_t rmt_ip; /**< remote ip */
@@ -297,15 +309,15 @@ typedef struct app_session_transport_
u8 is_ip4; /**< set if uses ip4 networking */
} app_session_transport_t;
-#define foreach_app_session_field \
- _(svm_fifo_t, *rx_fifo) /**< rx fifo */ \
- _(svm_fifo_t, *tx_fifo) /**< tx fifo */ \
- _(session_type_t, session_type) /**< session type */ \
- _(volatile u8, session_state) /**< session state */ \
- _(u32, session_index) /**< index in owning pool */ \
- _(app_session_transport_t, transport) /**< transport info */ \
- _(svm_msg_q_t, *vpp_evt_q) /**< vpp event queue */ \
- _(u8, is_dgram) /**< flag for dgram mode */ \
+#define foreach_app_session_field \
+ _ (svm_fifo_t, *rx_fifo) /**< rx fifo */ \
+ _ (svm_fifo_t, *tx_fifo) /**< tx fifo */ \
+ _ (session_type_t, session_type) /**< session type */ \
+ _ (volatile u8, session_state) /**< session state */ \
+ _ (u32, session_index) /**< index in owning pool */ \
+ _ (app_session_transport_t, transport) /**< transport info */ \
+ _ (svm_msg_q_t, *vpp_evt_q) /**< vpp event queue */ \
+ _ (u8, is_dgram) /**< flag for dgram mode */
typedef struct
{
@@ -344,7 +356,7 @@ STATIC_ASSERT (sizeof (session_listen_uri_msg_t) <= SESSION_CTRL_MSG_MAX_SIZE,
typedef struct session_bound_msg_
{
u32 context;
- u64 handle;
+ session_handle_t handle;
i32 retval;
u8 lcl_is_ip4;
u8 lcl_ip[16];
@@ -367,15 +379,15 @@ typedef struct session_unlisten_msg_
typedef struct session_unlisten_reply_msg_
{
u32 context;
- u64 handle;
+ session_handle_t handle;
i32 retval;
} __clib_packed session_unlisten_reply_msg_t;
typedef struct session_accepted_msg_
{
u32 context;
- u64 listener_handle;
- u64 handle;
+ session_handle_t listener_handle;
+ session_handle_t handle;
uword server_rx_fifo;
uword server_tx_fifo;
u64 segment_handle;
@@ -384,13 +396,15 @@ typedef struct session_accepted_msg_
transport_endpoint_t lcl;
transport_endpoint_t rmt;
u8 flags;
+ u32 original_dst_ip4;
+ u16 original_dst_port;
} __clib_packed session_accepted_msg_t;
typedef struct session_accepted_reply_msg_
{
u32 context;
i32 retval;
- u64 handle;
+ session_handle_t handle;
} __clib_packed session_accepted_reply_msg_t;
typedef struct session_connect_msg_
@@ -430,7 +444,7 @@ typedef struct session_connected_msg_
{
u32 context;
i32 retval;
- u64 handle;
+ session_handle_t handle;
uword server_rx_fifo;
uword server_tx_fifo;
u64 segment_handle;
@@ -460,33 +474,33 @@ typedef struct session_disconnected_msg_
{
u32 client_index;
u32 context;
- u64 handle;
+ session_handle_t handle;
} __clib_packed session_disconnected_msg_t;
typedef struct session_disconnected_reply_msg_
{
u32 context;
i32 retval;
- u64 handle;
+ session_handle_t handle;
} __clib_packed session_disconnected_reply_msg_t;
typedef struct session_reset_msg_
{
u32 client_index;
u32 context;
- u64 handle;
+ session_handle_t handle;
} __clib_packed session_reset_msg_t;
typedef struct session_reset_reply_msg_
{
u32 context;
i32 retval;
- u64 handle;
+ session_handle_t handle;
} __clib_packed session_reset_reply_msg_t;
typedef struct session_req_worker_update_msg_
{
- u64 session_handle;
+ session_handle_t session_handle;
} __clib_packed session_req_worker_update_msg_t;
/* NOTE: using u16 for wrk indices because message needs to fit in 18B */
@@ -495,12 +509,12 @@ typedef struct session_worker_update_msg_
u32 client_index;
u16 wrk_index;
u16 req_wrk_index;
- u64 handle;
+ session_handle_t handle;
} __clib_packed session_worker_update_msg_t;
typedef struct session_worker_update_reply_msg_
{
- u64 handle;
+ session_handle_t handle;
uword rx_fifo;
uword tx_fifo;
u64 segment_handle;
@@ -641,14 +655,18 @@ app_send_io_evt_to_vpp (svm_msg_q_t * mq, u32 session_index, u8 evt_type,
}
}
+#define app_send_dgram_raw(f, at, vpp_evt_q, data, len, evt_type, do_evt, \
+ noblock) \
+ app_send_dgram_raw_gso (f, at, vpp_evt_q, data, len, 0, evt_type, do_evt, \
+ noblock)
+
always_inline int
-app_send_dgram_raw (svm_fifo_t * f, app_session_transport_t * at,
- svm_msg_q_t * vpp_evt_q, u8 * data, u32 len, u8 evt_type,
- u8 do_evt, u8 noblock)
+app_send_dgram_raw_gso (svm_fifo_t *f, app_session_transport_t *at,
+ svm_msg_q_t *vpp_evt_q, u8 *data, u32 len,
+ u16 gso_size, u8 evt_type, u8 do_evt, u8 noblock)
{
session_dgram_hdr_t hdr;
int rv;
-
if (svm_fifo_max_enqueue_prod (f) < (sizeof (session_dgram_hdr_t) + len))
return 0;
@@ -659,10 +677,8 @@ app_send_dgram_raw (svm_fifo_t * f, app_session_transport_t * at,
hdr.rmt_port = at->rmt_port;
clib_memcpy_fast (&hdr.lcl_ip, &at->lcl_ip, sizeof (ip46_address_t));
hdr.lcl_port = at->lcl_port;
-
- /* *INDENT-OFF* */
+ hdr.gso_size = gso_size;
svm_fifo_seg_t segs[2] = {{ (u8 *) &hdr, sizeof (hdr) }, { data, len }};
- /* *INDENT-ON* */
rv = svm_fifo_enqueue_segments (f, segs, 2, 0 /* allow partial */ );
if (PREDICT_FALSE (rv < 0))
@@ -787,13 +803,11 @@ app_recv (app_session_t * s, u8 * data, u32 len)
return app_recv_stream (s, data, len);
}
-/* *INDENT-OFF* */
static char *session_error_str[] = {
#define _(sym, str) str,
foreach_session_error
#undef _
};
-/* *INDENT-ON* */
static inline u8 *
format_session_error (u8 * s, va_list * args)
diff --git a/src/vnet/session/application_local.c b/src/vnet/session/application_local.c
index 8590d041600..3cb743d10e0 100644
--- a/src/vnet/session/application_local.c
+++ b/src/vnet/session/application_local.c
@@ -53,6 +53,8 @@ typedef struct ct_worker_
ct_cleanup_req_t *pending_cleanups; /**< Fifo of pending indices */
u8 have_connects; /**< Set if connect rpc pending */
u8 have_cleanups; /**< Set if cleanup rpc pending */
+ clib_spinlock_t pending_connects_lock; /**< Lock for pending connects */
+ u32 *new_connects; /**< Burst of connects to be done */
} ct_worker_t;
typedef struct ct_main_
@@ -65,6 +67,9 @@ typedef struct ct_main_
clib_rwlock_t app_segs_lock; /**< RW lock for seg contexts */
uword *app_segs_ctxs_table; /**< App handle to segment pool map */
ct_segments_ctx_t *app_seg_ctxs; /**< Pool of ct segment contexts */
+ u32 **fwrk_pending_connects; /**< First wrk pending half-opens */
+ u32 fwrk_thread; /**< First worker thread */
+ u8 fwrk_have_flush; /**< Flag for connect flush rpc */
} ct_main_t;
static ct_main_t ct_main;
@@ -81,7 +86,8 @@ ct_connection_alloc (u32 thread_index)
ct_worker_t *wrk = ct_worker_get (thread_index);
ct_connection_t *ct;
- pool_get_zero (wrk->connections, ct);
+ pool_get_aligned_safe (wrk->connections, ct, CLIB_CACHE_LINE_BYTES);
+ clib_memset (ct, 0, sizeof (*ct));
ct->c_c_index = ct - wrk->connections;
ct->c_thread_index = thread_index;
ct->client_wrk = ~0;
@@ -123,11 +129,18 @@ ct_half_open_alloc (void)
clib_spinlock_lock (&cm->ho_reuseable_lock);
vec_foreach (hip, cm->ho_reusable)
- pool_put_index (cm->wrk[0].connections, *hip);
+ pool_put_index (cm->wrk[cm->fwrk_thread].connections, *hip);
vec_reset_length (cm->ho_reusable);
clib_spinlock_unlock (&cm->ho_reuseable_lock);
- return ct_connection_alloc (0);
+ return ct_connection_alloc (cm->fwrk_thread);
+}
+
+static ct_connection_t *
+ct_half_open_get (u32 ho_index)
+{
+ ct_main_t *cm = &ct_main;
+ return ct_connection_get (ho_index, cm->fwrk_thread);
}
void
@@ -181,6 +194,12 @@ ct_set_invalid_app_wrk (ct_connection_t *ct, u8 is_client)
}
}
+static inline u64
+ct_client_seg_handle (u64 server_sh, u32 client_wrk_index)
+{
+ return (((u64) client_wrk_index << 56) | server_sh);
+}
+
static void
ct_session_dealloc_fifos (ct_connection_t *ct, svm_fifo_t *rx_fifo,
svm_fifo_t *tx_fifo)
@@ -301,7 +320,8 @@ ct_session_dealloc_fifos (ct_connection_t *ct, svm_fifo_t *rx_fifo,
segment_manager_t *csm;
csm = app_worker_get_connect_segment_manager (app_wrk);
if (!segment_manager_app_detached (csm))
- app_worker_del_segment_notify (app_wrk, ct->segment_handle);
+ app_worker_del_segment_notify (
+ app_wrk, ct_client_seg_handle (ct->segment_handle, ct->client_wrk));
}
/* Notify server app and free segment */
@@ -363,9 +383,10 @@ ct_session_connect_notify (session_t *ss, session_error_t err)
ss = session_get (ss_index, thread_index);
cs->session_type = ss->session_type;
cs->listener_handle = SESSION_INVALID_HANDLE;
- cs->session_state = SESSION_STATE_CONNECTING;
+ session_set_state (cs, SESSION_STATE_CONNECTING);
cs->app_wrk_index = client_wrk->wrk_index;
cs->connection_index = cct->c_c_index;
+ cs->opaque = opaque;
cct->c_s_index = cs->session_index;
/* This will allocate fifos for the session. They won't be used for
@@ -379,7 +400,7 @@ ct_session_connect_notify (session_t *ss, session_error_t err)
goto connect_error;
}
- cs->session_state = SESSION_STATE_CONNECTING;
+ session_set_state (cs, SESSION_STATE_CONNECTING);
if (app_worker_connect_notify (client_wrk, cs, 0, opaque))
{
@@ -390,7 +411,7 @@ ct_session_connect_notify (session_t *ss, session_error_t err)
}
cs = session_get (cct->c_s_index, cct->c_thread_index);
- cs->session_state = SESSION_STATE_READY;
+ session_set_state (cs, SESSION_STATE_READY);
return 0;
@@ -441,11 +462,11 @@ ct_alloc_segment (ct_main_t *cm, app_worker_t *server_wrk, u64 table_handle,
segment_manager_t *sm, u32 client_wrk_index)
{
u32 seg_ctx_index = ~0, sm_index, pair_bytes;
+ u64 seg_size, seg_handle, client_seg_handle;
segment_manager_props_t *props;
const u32 margin = 16 << 10;
ct_segments_ctx_t *seg_ctx;
app_worker_t *client_wrk;
- u64 seg_size, seg_handle;
application_t *server;
ct_segment_t *ct_seg;
uword *spp;
@@ -507,7 +528,11 @@ ct_alloc_segment (ct_main_t *cm, app_worker_t *server_wrk, u64 table_handle,
goto error;
client_wrk = app_worker_get (client_wrk_index);
- if (app_worker_add_segment_notify (client_wrk, seg_handle))
+ /* Make sure client workers do not have overlapping segment handles.
+ * Ideally, we should attach fs to client worker segment manager and
+ * create a new handle but that's not currently possible. */
+ client_seg_handle = ct_client_seg_handle (seg_handle, client_wrk_index);
+ if (app_worker_add_segment_notify (client_wrk, client_seg_handle))
{
app_worker_del_segment_notify (server_wrk, seg_handle);
goto error;
@@ -645,7 +670,7 @@ ct_accept_one (u32 thread_index, u32 ho_index)
cct = ct_connection_alloc (thread_index);
cct_index = cct->c_c_index;
- ho = ct_connection_get (ho_index, 0);
+ ho = ct_half_open_get (ho_index);
/* Unlikely but half-open session and transport could have been freed */
if (PREDICT_FALSE (!ho))
@@ -701,7 +726,7 @@ ct_accept_one (u32 thread_index, u32 ho_index)
sct->c_is_ip4);
ss->connection_index = sct->c_c_index;
ss->listener_handle = listen_session_get_handle (ll);
- ss->session_state = SESSION_STATE_CREATED;
+ session_set_state (ss, SESSION_STATE_CREATED);
server_wrk = application_listener_select_worker (ll);
ss->app_wrk_index = server_wrk->wrk_index;
@@ -724,9 +749,10 @@ ct_accept_one (u32 thread_index, u32 ho_index)
cct->client_tx_fifo = ss->rx_fifo;
cct->client_rx_fifo->refcnt++;
cct->client_tx_fifo->refcnt++;
- cct->segment_handle = sct->segment_handle;
+ cct->segment_handle =
+ ct_client_seg_handle (sct->segment_handle, cct->client_wrk);
- ss->session_state = SESSION_STATE_ACCEPTING;
+ session_set_state (ss, SESSION_STATE_ACCEPTING);
if (app_worker_accept_notify (server_wrk, ss))
{
ct_session_connect_notify (ss, SESSION_E_REFUSED);
@@ -739,39 +765,90 @@ ct_accept_one (u32 thread_index, u32 ho_index)
static void
ct_accept_rpc_wrk_handler (void *rpc_args)
{
- u32 thread_index, ho_index, n_connects, i, n_pending;
+ u32 thread_index, n_connects, i, n_pending;
const u32 max_connects = 32;
ct_worker_t *wrk;
+ u8 need_rpc = 0;
thread_index = pointer_to_uword (rpc_args);
wrk = ct_worker_get (thread_index);
- /* Sub without lock as main enqueues with worker barrier */
+ /* Connects could be handled without worker barrier so grab lock */
+ clib_spinlock_lock (&wrk->pending_connects_lock);
+
n_pending = clib_fifo_elts (wrk->pending_connects);
n_connects = clib_min (n_pending, max_connects);
+ vec_validate (wrk->new_connects, n_connects);
for (i = 0; i < n_connects; i++)
- {
- clib_fifo_sub1 (wrk->pending_connects, ho_index);
- ct_accept_one (thread_index, ho_index);
- }
+ clib_fifo_sub1 (wrk->pending_connects, wrk->new_connects[i]);
if (n_pending == n_connects)
wrk->have_connects = 0;
else
+ need_rpc = 1;
+
+ clib_spinlock_unlock (&wrk->pending_connects_lock);
+
+ for (i = 0; i < n_connects; i++)
+ ct_accept_one (thread_index, wrk->new_connects[i]);
+
+ if (need_rpc)
session_send_rpc_evt_to_thread_force (
thread_index, ct_accept_rpc_wrk_handler,
uword_to_pointer (thread_index, void *));
}
-static int
-ct_connect (app_worker_t * client_wrk, session_t * ll,
- session_endpoint_cfg_t * sep)
+static void
+ct_fwrk_flush_connects (void *rpc_args)
{
- u32 thread_index, ho_index;
+ u32 thread_index, fwrk_index, n_workers;
ct_main_t *cm = &ct_main;
- ct_connection_t *ho;
ct_worker_t *wrk;
+ u8 need_rpc;
+
+ fwrk_index = cm->fwrk_thread;
+ n_workers = vec_len (cm->fwrk_pending_connects);
+
+ for (thread_index = fwrk_index; thread_index < n_workers; thread_index++)
+ {
+ if (!vec_len (cm->fwrk_pending_connects[thread_index]))
+ continue;
+
+ wrk = ct_worker_get (thread_index);
+
+ /* Connects can be done without worker barrier, grab dst worker lock */
+ if (thread_index != fwrk_index)
+ clib_spinlock_lock (&wrk->pending_connects_lock);
+
+ clib_fifo_add (wrk->pending_connects,
+ cm->fwrk_pending_connects[thread_index],
+ vec_len (cm->fwrk_pending_connects[thread_index]));
+ if (!wrk->have_connects)
+ {
+ wrk->have_connects = 1;
+ need_rpc = 1;
+ }
+
+ if (thread_index != fwrk_index)
+ clib_spinlock_unlock (&wrk->pending_connects_lock);
+
+ vec_reset_length (cm->fwrk_pending_connects[thread_index]);
+
+ if (need_rpc)
+ session_send_rpc_evt_to_thread_force (
+ thread_index, ct_accept_rpc_wrk_handler,
+ uword_to_pointer (thread_index, void *));
+ }
+
+ cm->fwrk_have_flush = 0;
+}
+
+static void
+ct_program_connect_to_wrk (u32 ho_index)
+{
+ ct_main_t *cm = &ct_main;
+ u32 thread_index;
/* Simple round-robin policy for spreading sessions over workers. We skip
* thread index 0, i.e., offset the index by 1, when we have workers as it
@@ -780,6 +857,25 @@ ct_connect (app_worker_t * client_wrk, session_t * ll,
cm->n_sessions += 1;
thread_index = cm->n_workers ? (cm->n_sessions % cm->n_workers) + 1 : 0;
+ /* Pospone flushing of connect request to dst worker until after session
+ * layer fully initializes the half-open session. */
+ vec_add1 (cm->fwrk_pending_connects[thread_index], ho_index);
+ if (!cm->fwrk_have_flush)
+ {
+ session_send_rpc_evt_to_thread_force (
+ cm->fwrk_thread, ct_fwrk_flush_connects,
+ uword_to_pointer (thread_index, void *));
+ cm->fwrk_have_flush = 1;
+ }
+}
+
+static int
+ct_connect (app_worker_t *client_wrk, session_t *ll,
+ session_endpoint_cfg_t *sep)
+{
+ ct_connection_t *ho;
+ u32 ho_index;
+
/*
* Alloc and init client half-open transport
*/
@@ -800,21 +896,10 @@ ct_connect (app_worker_t * client_wrk, session_t * ll,
ho->actual_tp = sep->original_tp;
/*
- * Accept connection on thread selected above. Connected reply comes
+ * Program connect on a worker, connected reply comes
* after server accepts the connection.
*/
-
- wrk = ct_worker_get (thread_index);
-
- /* Worker barrier held, add without additional lock */
- clib_fifo_add1 (wrk->pending_connects, ho_index);
- if (!wrk->have_connects)
- {
- wrk->have_connects = 1;
- session_send_rpc_evt_to_thread_force (
- thread_index, ct_accept_rpc_wrk_handler,
- uword_to_pointer (thread_index, void *));
- }
+ ct_program_connect_to_wrk (ho_index);
return ho_index;
}
@@ -852,9 +937,9 @@ ct_listener_get (u32 ct_index)
}
static transport_connection_t *
-ct_half_open_get (u32 ct_index)
+ct_session_half_open_get (u32 ct_index)
{
- return (transport_connection_t *) ct_connection_get (ct_index, 0);
+ return (transport_connection_t *) ct_half_open_get (ct_index);
}
static void
@@ -876,7 +961,10 @@ ct_session_cleanup (u32 conn_index, u32 thread_index)
static void
ct_cleanup_ho (u32 ho_index)
{
- ct_connection_free (ct_connection_get (ho_index, 0));
+ ct_connection_t *ho;
+
+ ho = ct_half_open_get (ho_index);
+ ct_connection_free (ho);
}
static int
@@ -907,7 +995,7 @@ ct_session_connect (transport_endpoint_cfg_t * tep)
goto global_scope;
ll = listen_session_get_from_handle (lh);
- al = app_listener_get_w_session (ll);
+ al = app_listener_get (ll->al_index);
/*
* Break loop if rule in local table points to connecting app. This
@@ -936,8 +1024,12 @@ global_scope:
ll = session_lookup_listener_wildcard (table_index, sep);
/* Avoid connecting app to own listener */
- if (ll && ll->app_index != app->app_index)
- return ct_connect (app_wrk, ll, sep_ext);
+ if (ll)
+ {
+ al = app_listener_get (ll->al_index);
+ if (al->app_index != app->app_index)
+ return ct_connect (app_wrk, ll, sep_ext);
+ }
/* Failed to connect but no error */
return SESSION_E_LOCAL_CONNECT;
@@ -946,6 +1038,8 @@ global_scope:
static inline int
ct_close_is_reset (ct_connection_t *ct, session_t *s)
{
+ if (ct->flags & CT_CONN_F_RESET)
+ return 1;
if (ct->flags & CT_CONN_F_CLIENT)
return (svm_fifo_max_dequeue (ct->client_rx_fifo) > 0);
else
@@ -953,6 +1047,17 @@ ct_close_is_reset (ct_connection_t *ct, session_t *s)
}
static void
+ct_session_cleanup_server_session (session_t *s)
+{
+ ct_connection_t *ct;
+
+ ct = (ct_connection_t *) session_get_transport (s);
+ ct_session_dealloc_fifos (ct, s->rx_fifo, s->tx_fifo);
+ session_free (s);
+ ct_connection_free (ct);
+}
+
+static void
ct_session_postponed_cleanup (ct_connection_t *ct)
{
ct_connection_t *peer_ct;
@@ -972,33 +1077,38 @@ ct_session_postponed_cleanup (ct_connection_t *ct)
}
session_transport_closed_notify (&ct->connection);
+ /* It would be cleaner to call session_transport_delete_notify
+ * but then we can't control session cleanup lower */
+ session_set_state (s, SESSION_STATE_TRANSPORT_DELETED);
+ if (app_wrk)
+ app_worker_cleanup_notify (app_wrk, s, SESSION_CLEANUP_TRANSPORT);
+
if (ct->flags & CT_CONN_F_CLIENT)
{
- if (app_wrk)
- app_worker_cleanup_notify (app_wrk, s, SESSION_CLEANUP_TRANSPORT);
-
/* Normal free for client session as the fifos are allocated through
* the connects segment manager in a segment that's not shared with
* the server */
ct_session_dealloc_fifos (ct, ct->client_rx_fifo, ct->client_tx_fifo);
- session_free_w_fifos (s);
+ session_program_cleanup (s);
+ ct_connection_free (ct);
}
else
{
/* Manual session and fifo segment cleanup to avoid implicit
* segment manager cleanups and notifications */
- app_wrk = app_worker_get_if_valid (s->app_wrk_index);
if (app_wrk)
{
- app_worker_cleanup_notify (app_wrk, s, SESSION_CLEANUP_TRANSPORT);
- app_worker_cleanup_notify (app_wrk, s, SESSION_CLEANUP_SESSION);
+ /* Remove custom cleanup notify infra when/if switching to normal
+ * session cleanup. Note that ct is freed in the cb function */
+ app_worker_cleanup_notify_custom (app_wrk, s,
+ SESSION_CLEANUP_SESSION,
+ ct_session_cleanup_server_session);
+ }
+ else
+ {
+ ct_connection_free (ct);
}
-
- ct_session_dealloc_fifos (ct, s->rx_fifo, s->tx_fifo);
- session_free (s);
}
-
- ct_connection_free (ct);
}
static void
@@ -1022,10 +1132,10 @@ ct_handle_cleanups (void *args)
clib_fifo_sub2 (wrk->pending_cleanups, req);
ct = ct_connection_get (req->ct_index, thread_index);
s = session_get (ct->c_s_index, ct->c_thread_index);
- if (!svm_fifo_has_event (s->tx_fifo))
- ct_session_postponed_cleanup (ct);
- else
+ if (svm_fifo_has_event (s->tx_fifo) || (s->flags & SESSION_F_RX_EVT))
clib_fifo_add1 (wrk->pending_cleanups, *req);
+ else
+ ct_session_postponed_cleanup (ct);
n_to_handle -= 1;
}
@@ -1090,6 +1200,15 @@ ct_session_close (u32 ct_index, u32 thread_index)
ct_program_cleanup (ct);
}
+static void
+ct_session_reset (u32 ct_index, u32 thread_index)
+{
+ ct_connection_t *ct;
+ ct = ct_connection_get (ct_index, thread_index);
+ ct->flags |= CT_CONN_F_RESET;
+ ct_session_close (ct_index, thread_index);
+}
+
static transport_connection_t *
ct_session_get (u32 ct_index, u32 thread_index)
{
@@ -1178,7 +1297,7 @@ format_ct_half_open (u8 *s, va_list *args)
{
u32 ho_index = va_arg (*args, u32);
u32 verbose = va_arg (*args, u32);
- ct_connection_t *ct = ct_connection_get (ho_index, 0);
+ ct_connection_t *ct = ct_half_open_get (ho_index);
s = format (s, "%-" SESSION_CLI_ID_LEN "U", format_ct_connection_id, ct);
if (verbose)
s = format (s, "%-" SESSION_CLI_STATE_LEN "s", "HALF-OPEN");
@@ -1229,26 +1348,31 @@ ct_enable_disable (vlib_main_t * vm, u8 is_en)
{
vlib_thread_main_t *vtm = &vlib_thread_main;
ct_main_t *cm = &ct_main;
+ ct_worker_t *wrk;
cm->n_workers = vlib_num_workers ();
+ cm->fwrk_thread = transport_cl_thread ();
vec_validate (cm->wrk, vtm->n_vlib_mains);
+ vec_foreach (wrk, cm->wrk)
+ clib_spinlock_init (&wrk->pending_connects_lock);
clib_spinlock_init (&cm->ho_reuseable_lock);
clib_rwlock_init (&cm->app_segs_lock);
+ vec_validate (cm->fwrk_pending_connects, cm->n_workers);
return 0;
}
-/* *INDENT-OFF* */
static const transport_proto_vft_t cut_thru_proto = {
.enable = ct_enable_disable,
.start_listen = ct_start_listen,
.stop_listen = ct_stop_listen,
.get_connection = ct_session_get,
.get_listener = ct_listener_get,
- .get_half_open = ct_half_open_get,
+ .get_half_open = ct_session_half_open_get,
.cleanup = ct_session_cleanup,
.cleanup_ho = ct_cleanup_ho,
.connect = ct_session_connect,
.close = ct_session_close,
+ .reset = ct_session_reset,
.custom_tx = ct_custom_tx,
.app_rx_evt = ct_app_rx_evt,
.format_listener = format_ct_listener,
@@ -1261,7 +1385,6 @@ static const transport_proto_vft_t cut_thru_proto = {
.service_type = TRANSPORT_SERVICE_VC,
},
};
-/* *INDENT-ON* */
static inline int
ct_session_can_tx (session_t *s)
@@ -1286,6 +1409,7 @@ ct_session_tx (session_t * s)
peer_s = session_get (peer_ct->c_s_index, peer_ct->c_thread_index);
if (peer_s->session_state >= SESSION_STATE_TRANSPORT_CLOSING)
return 0;
+ peer_s->flags |= SESSION_F_RX_EVT;
return session_enqueue_notify (peer_s);
}
diff --git a/src/vnet/session/application_local.h b/src/vnet/session/application_local.h
index 86edf243b22..fd2804c7baf 100644
--- a/src/vnet/session/application_local.h
+++ b/src/vnet/session/application_local.h
@@ -22,7 +22,8 @@
#define foreach_ct_flags \
_ (CLIENT, "client") \
- _ (HALF_OPEN, "half-open")
+ _ (HALF_OPEN, "half-open") \
+ _ (RESET, "reset")
enum
{
diff --git a/src/vnet/session/application_namespace.c b/src/vnet/session/application_namespace.c
index cd2636cff32..f547dcfc031 100644
--- a/src/vnet/session/application_namespace.c
+++ b/src/vnet/session/application_namespace.c
@@ -81,21 +81,20 @@ app_namespace_alloc (const u8 *ns_id)
return app_ns;
}
-int
-vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a)
+session_error_t
+vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t *a)
{
app_namespace_t *app_ns;
session_table_t *st;
u32 ns_index;
- int rv;
+ session_error_t rv;
if (a->is_add)
{
if (a->sw_if_index != APP_NAMESPACE_INVALID_INDEX
&& !vnet_get_sw_interface_or_null (vnet_get_main (),
a->sw_if_index))
- return VNET_API_ERROR_INVALID_SW_IF_INDEX;
-
+ return SESSION_E_INVALID;
if (a->sw_if_index != APP_NAMESPACE_INVALID_INDEX)
{
@@ -108,7 +107,7 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a)
}
if (a->sw_if_index == APP_NAMESPACE_INVALID_INDEX
&& a->ip4_fib_id == APP_NAMESPACE_INVALID_INDEX)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
app_ns = app_namespace_get_from_id (a->ns_id);
if (!app_ns)
@@ -119,11 +118,6 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a)
st->is_local = 1;
st->appns_index = app_namespace_index (app_ns);
app_ns->local_table_index = session_table_index (st);
- if (a->netns)
- {
- app_ns->netns = vec_dup (a->netns);
- vec_terminate_c_string (app_ns->netns);
- }
if (a->sock_name)
{
app_ns->sock_name = vec_dup (a->sock_name);
@@ -153,11 +147,11 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a)
{
ns_index = app_namespace_index_from_id (a->ns_id);
if (ns_index == APP_NAMESPACE_INVALID_INDEX)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
app_ns = app_namespace_get (ns_index);
if (!app_ns)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
application_namespace_cleanup (app_ns);
@@ -167,8 +161,6 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a)
st = session_table_get (app_ns->local_table_index);
session_table_free (st, FIB_PROTOCOL_MAX);
- if (app_ns->netns)
- vec_free (app_ns->netns);
if (app_ns->sock_name)
vec_free (app_ns->sock_name);
@@ -255,7 +247,6 @@ app_namespaces_init (void)
/* clang-format off */
vnet_app_namespace_add_del_args_t a = {
.ns_id = ns_id,
- .netns = 0,
.sock_name = 0,
.secret = 0,
.sw_if_index = APP_NAMESPACE_INVALID_INDEX,
@@ -272,7 +263,7 @@ app_ns_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
u8 is_add = 0, *ns_id = 0, secret_set = 0, sw_if_index_set = 0;
- u8 *netns = 0, *sock_name = 0;
+ u8 *sock_name = 0;
unformat_input_t _line_input, *line_input = &_line_input;
u32 sw_if_index, fib_id = APP_NAMESPACE_INVALID_INDEX;
vnet_main_t *vnm = vnet_get_main ();
@@ -302,8 +293,6 @@ app_ns_fn (vlib_main_t * vm, unformat_input_t * input,
sw_if_index_set = 1;
else if (unformat (line_input, "fib_id", &fib_id))
;
- else if (unformat (line_input, "netns %_%v%_", &netns))
- ;
else if (unformat (line_input, "sock-name %_%v%_", &sock_name))
;
else
@@ -329,7 +318,6 @@ app_ns_fn (vlib_main_t * vm, unformat_input_t * input,
/* clang-format off */
vnet_app_namespace_add_del_args_t args = {
.ns_id = ns_id,
- .netns = netns,
.secret = secret,
.sw_if_index = sw_if_index,
.sock_name = sock_name,
@@ -344,21 +332,18 @@ app_ns_fn (vlib_main_t * vm, unformat_input_t * input,
done:
vec_free (ns_id);
- vec_free (netns);
vec_free (sock_name);
unformat_free (line_input);
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (app_ns_command, static) = {
.path = "app ns",
.short_help = "app ns [add|del] id <namespace-id> secret <secret> "
- "sw_if_index <sw_if_index> if <interface> [netns <ns>]",
+ "sw_if_index <sw_if_index> if <interface>",
.function = app_ns_fn,
};
-/* *INDENT-ON* */
u8 *
format_app_namespace (u8 * s, va_list * args)
@@ -371,8 +356,6 @@ format_app_namespace (u8 * s, va_list * args)
if (app_ns->sw_if_index != (u32) ~0)
s = format (s, "\nInterface: %U", format_vnet_sw_if_index_name, vnm,
app_ns->sw_if_index);
- if (app_ns->netns)
- s = format (s, "\nNetns: %s", app_ns->netns);
if (app_ns->sock_name)
s = format (s, "\nSocket: %s", app_ns->sock_name);
@@ -401,7 +384,6 @@ app_namespace_show_api (vlib_main_t * vm, app_namespace_t * app_ns)
vlib_cli_output (vm, "%12s%12s%5s", "app index", "wrk index", "fd");
- /* *INDENT-OFF* */
pool_foreach (cs, app_ns->app_sockets) {
handle = (app_ns_api_handle_t *) &cs->private_data;
cf = clib_file_get (&file_main, handle->aah_file_index);
@@ -414,7 +396,6 @@ app_namespace_show_api (vlib_main_t * vm, app_namespace_t * app_ns)
vlib_cli_output (vm, "%12d%12d%5u", app_wrk->app_index,
app_wrk->wrk_map_index, cf->file_descriptor);
}
- /* *INDENT-ON* */
}
static clib_error_t *
@@ -482,8 +463,7 @@ show_app_ns_fn (vlib_main_t * vm, unformat_input_t * main_input,
}
do_ns_list:
- table_add_header_col (t, 6, "Index", "Secret", "Interface", "Id", "Netns",
- "Socket");
+ table_add_header_col (t, 5, "Index", "Secret", "Interface", "Id", "Socket");
int i = 0;
pool_foreach (app_ns, app_namespace_pool)
{
@@ -493,7 +473,6 @@ do_ns_list:
table_format_cell (t, i, j++, "%U", format_vnet_sw_if_index_name, vnm,
app_ns->sw_if_index);
table_format_cell (t, i, j++, "%s", app_ns->ns_id);
- table_format_cell (t, i, j++, "%s", app_ns->netns);
table_format_cell (t, i++, j++, "%s", app_ns->sock_name);
}
@@ -510,13 +489,11 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_app_ns_command, static) = {
.path = "show app ns",
.short_help = "show app ns [id <id> [api-clients]]",
.function = show_app_ns_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/session/application_namespace.h b/src/vnet/session/application_namespace.h
index 1750d41fff8..261325cbe0e 100644
--- a/src/vnet/session/application_namespace.h
+++ b/src/vnet/session/application_namespace.h
@@ -51,11 +51,6 @@ typedef struct _app_namespace
u8 *ns_id;
/**
- * Linux netns if one was provided
- */
- u8 *netns;
-
- /**
* Name of socket applications can use to attach to session layer
*/
u8 *sock_name;
@@ -69,7 +64,6 @@ typedef struct _app_namespace
typedef struct _vnet_app_namespace_add_del_args
{
u8 *ns_id;
- u8 *netns;
u8 *sock_name;
u64 secret;
u32 sw_if_index;
@@ -88,7 +82,8 @@ const u8 *app_namespace_id (app_namespace_t * app_ns);
const u8 *app_namespace_id_from_index (u32 index);
u32 app_namespace_index_from_id (const u8 *ns_id);
void app_namespaces_init (void);
-int vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a);
+session_error_t
+vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t *a);
u32 app_namespace_get_fib_index (app_namespace_t * app_ns, u8 fib_proto);
session_table_t *app_namespace_get_local_table (app_namespace_t * app_ns);
diff --git a/src/vnet/session/application_worker.c b/src/vnet/session/application_worker.c
index 844e78f7fa9..befdb7c7002 100644
--- a/src/vnet/session/application_worker.c
+++ b/src/vnet/session/application_worker.c
@@ -26,6 +26,7 @@ app_worker_t *
app_worker_alloc (application_t * app)
{
app_worker_t *app_wrk;
+
pool_get (app_workers, app_wrk);
clib_memset (app_wrk, 0, sizeof (*app_wrk));
app_wrk->wrk_index = app_wrk - app_workers;
@@ -33,7 +34,8 @@ app_worker_alloc (application_t * app)
app_wrk->wrk_map_index = ~0;
app_wrk->connects_seg_manager = APP_INVALID_SEGMENT_MANAGER_INDEX;
clib_spinlock_init (&app_wrk->detached_seg_managers_lock);
- clib_spinlock_init (&app_wrk->postponed_mq_msgs_lock);
+ vec_validate (app_wrk->wrk_evts, vlib_num_workers ());
+ vec_validate (app_wrk->wrk_mq_congested, vlib_num_workers ());
APP_DBG ("New app %v worker %u", app->name, app_wrk->wrk_index);
return app_wrk;
}
@@ -56,26 +58,34 @@ void
app_worker_free (app_worker_t * app_wrk)
{
application_t *app = application_get (app_wrk->app_index);
+ session_handle_t handle, *handles = 0, *sh;
vnet_unlisten_args_t _a, *a = &_a;
- u64 handle, *handles = 0, *sm_indices = 0;
segment_manager_t *sm;
- session_handle_t *sh;
+ u64 *sm_indices = 0;
session_t *ls;
u32 sm_index;
int i;
/*
+ * Cleanup vpp wrk events
+ */
+ app_worker_del_all_events (app_wrk);
+ for (i = 0; i < vec_len (app_wrk->wrk_evts); i++)
+ clib_fifo_free (app_wrk->wrk_evts[i]);
+
+ vec_free (app_wrk->wrk_evts);
+ vec_free (app_wrk->wrk_mq_congested);
+
+ /*
* Listener cleanup
*/
- /* *INDENT-OFF* */
hash_foreach (handle, sm_index, app_wrk->listeners_table, ({
ls = listen_session_get_from_handle (handle);
vec_add1 (handles, app_listen_session_handle (ls));
vec_add1 (sm_indices, sm_index);
sm = segment_manager_get (sm_index);
}));
- /* *INDENT-ON* */
for (i = 0; i < vec_len (handles); i++)
{
@@ -92,7 +102,7 @@ app_worker_free (app_worker_t * app_wrk)
segment_manager_init_free (sm);
}
}
- vec_reset_length (handles);
+ vec_free (handles);
vec_free (sm_indices);
hash_free (app_wrk->listeners_table);
@@ -127,7 +137,6 @@ app_worker_free (app_worker_t * app_wrk)
}
vec_free (app_wrk->detached_seg_managers);
clib_spinlock_free (&app_wrk->detached_seg_managers_lock);
- clib_spinlock_free (&app_wrk->postponed_mq_msgs_lock);
if (CLIB_DEBUG)
clib_memset (app_wrk, 0xfe, sizeof (*app_wrk));
@@ -177,12 +186,67 @@ app_worker_alloc_session_fifos (segment_manager_t * sm, session_t * s)
}
int
+app_worker_alloc_wrk_cl_session (app_worker_t *app_wrk, session_t *ls)
+{
+ svm_fifo_t *rx_fifo = 0, *tx_fifo = 0;
+ segment_manager_t *sm;
+ session_handle_t lsh;
+ app_listener_t *al;
+ session_t *s;
+
+ al = app_listener_get (ls->al_index);
+ sm = app_worker_get_listen_segment_manager (app_wrk, ls);
+ lsh = session_handle (ls);
+
+ s = session_alloc (0 /* listener on main worker */);
+ session_set_state (s, SESSION_STATE_LISTENING);
+ s->flags |= SESSION_F_IS_CLESS;
+ s->app_wrk_index = app_wrk->wrk_index;
+ ls = session_get_from_handle (lsh);
+ s->session_type = ls->session_type;
+ s->connection_index = ls->connection_index;
+
+ segment_manager_alloc_session_fifos (sm, s->thread_index, &rx_fifo,
+ &tx_fifo);
+
+ rx_fifo->shr->master_session_index = s->session_index;
+ rx_fifo->master_thread_index = s->thread_index;
+
+ tx_fifo->shr->master_session_index = s->session_index;
+ tx_fifo->master_thread_index = s->thread_index;
+
+ s->rx_fifo = rx_fifo;
+ s->tx_fifo = tx_fifo;
+
+ vec_validate (al->cl_listeners, app_wrk->wrk_map_index);
+ al->cl_listeners[app_wrk->wrk_map_index] = s->session_index;
+
+ return 0;
+}
+
+void
+app_worker_free_wrk_cl_session (app_worker_t *app_wrk, session_t *ls)
+{
+ app_listener_t *al;
+ session_t *s;
+
+ al = app_listener_get (ls->al_index);
+
+ s = app_listener_get_wrk_cl_session (al, app_wrk->wrk_map_index);
+ segment_manager_dealloc_fifos (s->rx_fifo, s->tx_fifo);
+ session_free (s);
+
+ al->cl_listeners[app_wrk->wrk_map_index] = SESSION_INVALID_INDEX;
+}
+
+int
app_worker_init_listener (app_worker_t * app_wrk, session_t * ls)
{
segment_manager_t *sm;
/* Allocate segment manager. All sessions derived out of a listen session
- * have fifos allocated by the same segment manager. */
+ * have fifos allocated by the same segment manager.
+ * TODO(fcoras): limit memory consumption by cless listeners */
if (!(sm = app_worker_alloc_segment_manager (app_wrk)))
return SESSION_E_ALLOC;
@@ -193,18 +257,14 @@ app_worker_init_listener (app_worker_t * app_wrk, session_t * ls)
hash_set (app_wrk->listeners_table, listen_session_get_handle (ls),
segment_manager_index (sm));
- if (transport_connection_is_cless (session_get_transport (ls)))
- {
- if (ls->rx_fifo)
- return SESSION_E_NOSUPPORT;
- return app_worker_alloc_session_fifos (sm, ls);
- }
+ if (ls->flags & SESSION_F_IS_CLESS)
+ return app_worker_alloc_wrk_cl_session (app_wrk, ls);
+
return 0;
}
-int
-app_worker_start_listen (app_worker_t * app_wrk,
- app_listener_t * app_listener)
+session_error_t
+app_worker_start_listen (app_worker_t *app_wrk, app_listener_t *app_listener)
{
session_t *ls;
int rv;
@@ -268,12 +328,8 @@ app_worker_stop_listen_session (app_worker_t * app_wrk, session_t * ls)
if (PREDICT_FALSE (!sm_indexp))
return;
- /* Dealloc fifos, if any (dgram listeners) */
- if (ls->rx_fifo)
- {
- segment_manager_dealloc_fifos (ls->rx_fifo, ls->tx_fifo);
- ls->tx_fifo = ls->rx_fifo = 0;
- }
+ if (ls->flags & SESSION_F_IS_CLESS)
+ app_worker_free_wrk_cl_session (app_wrk, ls);
/* Try to cleanup segment manager */
sm = segment_manager_get (*sm_indexp);
@@ -340,7 +396,7 @@ app_worker_init_accepted (session_t * s)
listener = listen_session_get_from_handle (s->listener_handle);
app_wrk = application_listener_select_worker (listener);
- if (PREDICT_FALSE (app_wrk->mq_congested))
+ if (PREDICT_FALSE (app_worker_mq_is_congested (app_wrk)))
return -1;
s->app_wrk_index = app_wrk->wrk_index;
@@ -356,10 +412,35 @@ app_worker_init_accepted (session_t * s)
}
int
+app_worker_listened_notify (app_worker_t *app_wrk, session_handle_t alsh,
+ u32 opaque, session_error_t err)
+{
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_BOUND,
+ .as_u64[0] = alsh,
+ .as_u64[1] = (u64) opaque << 32 | (u32) err };
+
+ app_worker_add_event_custom (app_wrk, 0 /* thread index */, &evt);
+
+ return 0;
+}
+
+int
+app_worker_unlisten_reply (app_worker_t *app_wrk, session_handle_t sh,
+ u32 opaque, session_error_t err)
+{
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_UNLISTEN_REPLY,
+ .as_u64[0] = sh,
+ .as_u64[1] = (u64) opaque << 32 | (u32) err };
+
+ app_worker_add_event_custom (app_wrk, 0 /* thread index */, &evt);
+ return 0;
+}
+
+int
app_worker_accept_notify (app_worker_t * app_wrk, session_t * s)
{
- application_t *app = application_get (app_wrk->app_index);
- return app->cb_fns.session_accept_callback (s);
+ app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_ACCEPTED);
+ return 0;
}
int
@@ -373,7 +454,7 @@ app_worker_init_connected (app_worker_t * app_wrk, session_t * s)
/* Allocate fifos for session, unless the app is a builtin proxy */
if (application_is_builtin_proxy (app))
- return 0;
+ return app->cb_fns.proxy_alloc_session_fifos (s);
sm = app_worker_get_connect_segment_manager (app_wrk);
return app_worker_alloc_session_fifos (sm, s);
@@ -383,9 +464,13 @@ int
app_worker_connect_notify (app_worker_t * app_wrk, session_t * s,
session_error_t err, u32 opaque)
{
- application_t *app = application_get (app_wrk->app_index);
- return app->cb_fns.session_connected_callback (app_wrk->wrk_index, opaque,
- s, err);
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_CONNECTED,
+ .as_u64[0] = s ? s->session_index : ~0,
+ .as_u64[1] = (u64) opaque << 32 | (u32) err };
+ u32 thread_index = s ? s->thread_index : vlib_get_thread_index ();
+
+ app_worker_add_event_custom (app_wrk, thread_index, &evt);
+ return 0;
}
int
@@ -393,7 +478,7 @@ app_worker_add_half_open (app_worker_t *app_wrk, session_handle_t sh)
{
session_handle_t *shp;
- ASSERT (vlib_get_thread_index () == 0);
+ ASSERT (session_vlib_thread_is_cl_thread ());
pool_get (app_wrk->half_open_table, shp);
*shp = sh;
@@ -403,36 +488,28 @@ app_worker_add_half_open (app_worker_t *app_wrk, session_handle_t sh)
int
app_worker_del_half_open (app_worker_t *app_wrk, session_t *s)
{
- application_t *app = application_get (app_wrk->app_index);
- ASSERT (vlib_get_thread_index () <= 1);
- pool_put_index (app_wrk->half_open_table, s->ho_index);
- if (app->cb_fns.half_open_cleanup_callback)
- app->cb_fns.half_open_cleanup_callback (s);
+ app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_HALF_CLEANUP);
return 0;
}
int
app_worker_close_notify (app_worker_t * app_wrk, session_t * s)
{
- application_t *app = application_get (app_wrk->app_index);
- app->cb_fns.session_disconnect_callback (s);
+ app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_DISCONNECTED);
return 0;
}
int
app_worker_transport_closed_notify (app_worker_t * app_wrk, session_t * s)
{
- application_t *app = application_get (app_wrk->app_index);
- if (app->cb_fns.session_transport_closed_callback)
- app->cb_fns.session_transport_closed_callback (s);
+ app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_TRANSPORT_CLOSED);
return 0;
}
int
app_worker_reset_notify (app_worker_t * app_wrk, session_t * s)
{
- application_t *app = application_get (app_wrk->app_index);
- app->cb_fns.session_reset_callback (s);
+ app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_RESET);
return 0;
}
@@ -440,29 +517,33 @@ int
app_worker_cleanup_notify (app_worker_t * app_wrk, session_t * s,
session_cleanup_ntf_t ntf)
{
- application_t *app = application_get (app_wrk->app_index);
- if (app->cb_fns.session_cleanup_callback)
- app->cb_fns.session_cleanup_callback (s, ntf);
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_CLEANUP,
+ .as_u64[0] = (u64) ntf << 32 | s->session_index,
+ .as_u64[1] = pointer_to_uword (session_cleanup) };
+
+ app_worker_add_event_custom (app_wrk, s->thread_index, &evt);
+
return 0;
}
int
-app_worker_builtin_rx (app_worker_t * app_wrk, session_t * s)
+app_worker_cleanup_notify_custom (app_worker_t *app_wrk, session_t *s,
+ session_cleanup_ntf_t ntf,
+ void (*cleanup_cb) (session_t *s))
{
- application_t *app = application_get (app_wrk->app_index);
- app->cb_fns.builtin_app_rx_callback (s);
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_CLEANUP,
+ .as_u64[0] = (u64) ntf << 32 | s->session_index,
+ .as_u64[1] = pointer_to_uword (cleanup_cb) };
+
+ app_worker_add_event_custom (app_wrk, s->thread_index, &evt);
+
return 0;
}
int
-app_worker_builtin_tx (app_worker_t * app_wrk, session_t * s)
+app_worker_rx_notify (app_worker_t *app_wrk, session_t *s)
{
- application_t *app = application_get (app_wrk->app_index);
-
- if (!app->cb_fns.builtin_app_tx_callback)
- return 0;
-
- app->cb_fns.builtin_app_tx_callback (s);
+ app_worker_add_event (app_wrk, s, SESSION_IO_EVT_RX);
return 0;
}
@@ -470,8 +551,11 @@ int
app_worker_migrate_notify (app_worker_t * app_wrk, session_t * s,
session_handle_t new_sh)
{
- application_t *app = application_get (app_wrk->app_index);
- app->cb_fns.session_migrate_callback (s, new_sh);
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_MIGRATED,
+ .as_u64[0] = s->session_index,
+ .as_u64[1] = new_sh };
+
+ app_worker_add_event_custom (app_wrk, s->thread_index, &evt);
return 0;
}
@@ -480,6 +564,7 @@ app_worker_own_session (app_worker_t * app_wrk, session_t * s)
{
segment_manager_t *sm;
svm_fifo_t *rxf, *txf;
+ int rv;
if (s->session_state == SESSION_STATE_LISTENING)
return application_change_listener_owner (s, app_wrk);
@@ -496,8 +581,8 @@ app_worker_own_session (app_worker_t * app_wrk, session_t * s)
s->tx_fifo = 0;
sm = app_worker_get_connect_segment_manager (app_wrk);
- if (app_worker_alloc_session_fifos (sm, s))
- return -1;
+ if ((rv = app_worker_alloc_session_fifos (sm, s)))
+ return rv;
if (!svm_fifo_is_empty_cons (rxf))
svm_fifo_clone (s->rx_fifo, rxf);
@@ -514,7 +599,7 @@ int
app_worker_connect_session (app_worker_t *app_wrk, session_endpoint_cfg_t *sep,
session_handle_t *rsh)
{
- if (PREDICT_FALSE (app_wrk->mq_congested))
+ if (PREDICT_FALSE (app_worker_mq_is_congested (app_wrk)))
return SESSION_E_REFUSED;
sep->app_wrk_index = app_wrk->wrk_index;
@@ -560,14 +645,12 @@ app_worker_first_listener (app_worker_t * app_wrk, u8 fib_proto,
sst = session_type_from_proto_and_ip (transport_proto,
fib_proto == FIB_PROTOCOL_IP4);
- /* *INDENT-OFF* */
hash_foreach (handle, sm_index, app_wrk->listeners_table, ({
listener = listen_session_get_from_handle (handle);
if (listener->session_type == sst
&& !(listener->flags & SESSION_F_PROXY))
return listener;
}));
- /* *INDENT-ON* */
return 0;
}
@@ -584,13 +667,11 @@ app_worker_proxy_listener (app_worker_t * app_wrk, u8 fib_proto,
sst = session_type_from_proto_and_ip (transport_proto,
fib_proto == FIB_PROTOCOL_IP4);
- /* *INDENT-OFF* */
hash_foreach (handle, sm_index, app_wrk->listeners_table, ({
listener = listen_session_get_from_handle (handle);
if (listener->session_type == sst && (listener->flags & SESSION_F_PROXY))
return listener;
}));
- /* *INDENT-ON* */
return 0;
}
@@ -601,24 +682,23 @@ app_worker_proxy_listener (app_worker_t * app_wrk, u8 fib_proto,
int
app_worker_add_segment_notify (app_worker_t * app_wrk, u64 segment_handle)
{
- application_t *app = application_get (app_wrk->app_index);
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_APP_ADD_SEGMENT,
+ .as_u64[1] = segment_handle };
- return app->cb_fns.add_segment_callback (app_wrk->wrk_index,
- segment_handle);
+ app_worker_add_event_custom (app_wrk, vlib_get_thread_index (), &evt);
+
+ return 0;
}
int
app_worker_del_segment_notify (app_worker_t * app_wrk, u64 segment_handle)
{
- application_t *app = application_get (app_wrk->app_index);
- return app->cb_fns.del_segment_callback (app_wrk->wrk_index,
- segment_handle);
-}
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_APP_DEL_SEGMENT,
+ .as_u64[1] = segment_handle };
-static inline u8
-app_worker_application_is_builtin (app_worker_t * app_wrk)
-{
- return app_wrk->app_is_builtin;
+ app_worker_add_event_custom (app_wrk, vlib_get_thread_index (), &evt);
+
+ return 0;
}
static int
@@ -677,126 +757,38 @@ app_wrk_send_fd (app_worker_t *app_wrk, int fd)
return 0;
}
-static int
-mq_try_lock_and_alloc_msg (svm_msg_q_t *mq, session_mq_rings_e ring,
- svm_msg_q_msg_t *msg)
-{
- int rv, n_try = 0;
-
- while (n_try < 75)
- {
- rv = svm_msg_q_lock_and_alloc_msg_w_ring (mq, ring, SVM_Q_NOWAIT, msg);
- if (!rv)
- return 0;
- /*
- * Break the loop if mq is full, usually this is because the
- * app has crashed or is hanging on somewhere.
- */
- if (rv != -1)
- break;
- n_try += 1;
- usleep (1);
- }
-
- return -1;
-}
-
-typedef union app_wrk_mq_rpc_args_
-{
- struct
- {
- u32 thread_index;
- u32 app_wrk_index;
- };
- uword as_uword;
-} app_wrk_mq_rpc_ags_t;
-
-static int
-app_wrk_handle_mq_postponed_msgs (void *arg)
+void
+app_worker_add_event (app_worker_t *app_wrk, session_t *s,
+ session_evt_type_t evt_type)
{
- svm_msg_q_msg_t _mq_msg, *mq_msg = &_mq_msg;
- app_wrk_postponed_msg_t *pm;
- app_wrk_mq_rpc_ags_t args;
- u32 max_msg, n_msg = 0;
- app_worker_t *app_wrk;
session_event_t *evt;
- svm_msg_q_t *mq;
-
- args.as_uword = pointer_to_uword (arg);
- app_wrk = app_worker_get_if_valid (args.app_wrk_index);
- if (!app_wrk)
- return 0;
-
- mq = app_wrk->event_queue;
-
- clib_spinlock_lock (&app_wrk->postponed_mq_msgs_lock);
-
- max_msg = clib_min (32, clib_fifo_elts (app_wrk->postponed_mq_msgs));
- while (n_msg < max_msg)
- {
- pm = clib_fifo_head (app_wrk->postponed_mq_msgs);
- if (mq_try_lock_and_alloc_msg (mq, pm->ring, mq_msg))
- break;
-
- evt = svm_msg_q_msg_data (mq, mq_msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = pm->event_type;
- clib_memcpy_fast (evt->data, pm->data, pm->len);
-
- if (pm->fd != -1)
- app_wrk_send_fd (app_wrk, pm->fd);
-
- svm_msg_q_add_and_unlock (mq, mq_msg);
-
- clib_fifo_advance_head (app_wrk->postponed_mq_msgs, 1);
- n_msg += 1;
- }
+ ASSERT (s->thread_index == vlib_get_thread_index ());
+ clib_fifo_add2 (app_wrk->wrk_evts[s->thread_index], evt);
+ evt->session_index = s->session_index;
+ evt->event_type = evt_type;
+ evt->postponed = 0;
- if (!clib_fifo_elts (app_wrk->postponed_mq_msgs))
+ /* First event for this app_wrk. Schedule it for handling in session input */
+ if (clib_fifo_elts (app_wrk->wrk_evts[s->thread_index]) == 1)
{
- app_wrk->mq_congested = 0;
+ session_worker_t *wrk = session_main_get_worker (s->thread_index);
+ session_wrk_program_app_wrk_evts (wrk, app_wrk->wrk_index);
}
- else
- {
- session_send_rpc_evt_to_thread_force (
- args.thread_index, app_wrk_handle_mq_postponed_msgs,
- uword_to_pointer (args.as_uword, void *));
- }
-
- clib_spinlock_unlock (&app_wrk->postponed_mq_msgs_lock);
-
- return 0;
}
-static void
-app_wrk_add_mq_postponed_msg (app_worker_t *app_wrk, session_mq_rings_e ring,
- u8 evt_type, void *msg, u32 msg_len, int fd)
+void
+app_worker_add_event_custom (app_worker_t *app_wrk, u32 thread_index,
+ session_event_t *evt)
{
- app_wrk_postponed_msg_t *pm;
-
- clib_spinlock_lock (&app_wrk->postponed_mq_msgs_lock);
+ clib_fifo_add1 (app_wrk->wrk_evts[thread_index], *evt);
- app_wrk->mq_congested = 1;
-
- clib_fifo_add2 (app_wrk->postponed_mq_msgs, pm);
- clib_memcpy_fast (pm->data, msg, msg_len);
- pm->event_type = evt_type;
- pm->ring = ring;
- pm->len = msg_len;
- pm->fd = fd;
-
- if (clib_fifo_elts (app_wrk->postponed_mq_msgs) == 1)
+ /* First event for this app_wrk. Schedule it for handling in session input */
+ if (clib_fifo_elts (app_wrk->wrk_evts[thread_index]) == 1)
{
- app_wrk_mq_rpc_ags_t args = { .thread_index = vlib_get_thread_index (),
- .app_wrk_index = app_wrk->wrk_index };
-
- session_send_rpc_evt_to_thread_force (
- args.thread_index, app_wrk_handle_mq_postponed_msgs,
- uword_to_pointer (args.as_uword, void *));
+ session_worker_t *wrk = session_main_get_worker (thread_index);
+ session_wrk_program_app_wrk_evts (wrk, app_wrk->wrk_index);
}
-
- clib_spinlock_unlock (&app_wrk->postponed_mq_msgs_lock);
}
always_inline void
@@ -806,14 +798,9 @@ app_wrk_send_ctrl_evt_inline (app_worker_t *app_wrk, u8 evt_type, void *msg,
svm_msg_q_msg_t _mq_msg, *mq_msg = &_mq_msg;
svm_msg_q_t *mq = app_wrk->event_queue;
session_event_t *evt;
- int rv;
- if (PREDICT_FALSE (app_wrk->mq_congested))
- goto handle_congestion;
-
- rv = mq_try_lock_and_alloc_msg (mq, SESSION_MQ_CTRL_EVT_RING, mq_msg);
- if (PREDICT_FALSE (rv))
- goto handle_congestion;
+ ASSERT (!svm_msg_q_or_ring_is_full (mq, SESSION_MQ_CTRL_EVT_RING));
+ *mq_msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_CTRL_EVT_RING);
evt = svm_msg_q_msg_data (mq, mq_msg);
clib_memset (evt, 0, sizeof (*evt));
@@ -823,14 +810,7 @@ app_wrk_send_ctrl_evt_inline (app_worker_t *app_wrk, u8 evt_type, void *msg,
if (fd != -1)
app_wrk_send_fd (app_wrk, fd);
- svm_msg_q_add_and_unlock (mq, mq_msg);
-
- return;
-
-handle_congestion:
-
- app_wrk_add_mq_postponed_msg (app_wrk, SESSION_MQ_CTRL_EVT_RING, evt_type,
- msg, msg_len, fd);
+ svm_msg_q_add_raw (mq, mq_msg);
}
void
@@ -847,116 +827,33 @@ app_wrk_send_ctrl_evt (app_worker_t *app_wrk, u8 evt_type, void *msg,
app_wrk_send_ctrl_evt_inline (app_wrk, evt_type, msg, msg_len, -1);
}
-static inline int
-app_send_io_evt_rx (app_worker_t * app_wrk, session_t * s)
+u8
+app_worker_mq_wrk_is_congested (app_worker_t *app_wrk, u32 thread_index)
{
- svm_msg_q_msg_t _mq_msg = { 0 }, *mq_msg = &_mq_msg;
- session_event_t *evt;
- svm_msg_q_t *mq;
- u32 app_session;
- int rv;
-
- if (app_worker_application_is_builtin (app_wrk))
- return app_worker_builtin_rx (app_wrk, s);
-
- if (svm_fifo_has_event (s->rx_fifo))
- return 0;
-
- app_session = s->rx_fifo->shr->client_session_index;
- mq = app_wrk->event_queue;
-
- if (PREDICT_FALSE (app_wrk->mq_congested))
- goto handle_congestion;
-
- rv = mq_try_lock_and_alloc_msg (mq, SESSION_MQ_IO_EVT_RING, mq_msg);
-
- if (PREDICT_FALSE (rv))
- goto handle_congestion;
-
- evt = svm_msg_q_msg_data (mq, mq_msg);
- evt->event_type = SESSION_IO_EVT_RX;
- evt->session_index = app_session;
-
- (void) svm_fifo_set_event (s->rx_fifo);
-
- svm_msg_q_add_and_unlock (mq, mq_msg);
-
- return 0;
-
-handle_congestion:
-
- app_wrk_add_mq_postponed_msg (app_wrk, SESSION_MQ_IO_EVT_RING,
- SESSION_IO_EVT_RX, &app_session,
- sizeof (app_session), -1);
- return -1;
+ return app_wrk->wrk_mq_congested[thread_index] > 0;
}
-static inline int
-app_send_io_evt_tx (app_worker_t * app_wrk, session_t * s)
+void
+app_worker_set_mq_wrk_congested (app_worker_t *app_wrk, u32 thread_index)
{
- svm_msg_q_msg_t _mq_msg = { 0 }, *mq_msg = &_mq_msg;
- session_event_t *evt;
- svm_msg_q_t *mq;
- u32 app_session;
- int rv;
-
- if (app_worker_application_is_builtin (app_wrk))
- return app_worker_builtin_tx (app_wrk, s);
-
- app_session = s->tx_fifo->shr->client_session_index;
- mq = app_wrk->event_queue;
-
- if (PREDICT_FALSE (app_wrk->mq_congested))
- goto handle_congestion;
-
- rv = mq_try_lock_and_alloc_msg (mq, SESSION_MQ_IO_EVT_RING, mq_msg);
-
- if (PREDICT_FALSE (rv))
- goto handle_congestion;
-
- evt = svm_msg_q_msg_data (mq, mq_msg);
- evt->event_type = SESSION_IO_EVT_TX;
- evt->session_index = app_session;
-
- svm_msg_q_add_and_unlock (mq, mq_msg);
-
- return 0;
-
-handle_congestion:
-
- app_wrk_add_mq_postponed_msg (app_wrk, SESSION_MQ_IO_EVT_RING,
- SESSION_IO_EVT_TX, &app_session,
- sizeof (app_session), -1);
- return -1;
+ clib_atomic_fetch_add_relax (&app_wrk->mq_congested, 1);
+ ASSERT (thread_index == vlib_get_thread_index ());
+ app_wrk->wrk_mq_congested[thread_index] = 1;
}
-/* *INDENT-OFF* */
-typedef int (app_send_evt_handler_fn) (app_worker_t *app,
- session_t *s);
-static app_send_evt_handler_fn * const app_send_evt_handler_fns[2] = {
- app_send_io_evt_rx,
- app_send_io_evt_tx,
-};
-/* *INDENT-ON* */
-
-/**
- * Send event to application
- *
- * Logic from queue perspective is blocking. However, if queue is full,
- * we return.
- */
-int
-app_worker_lock_and_send_event (app_worker_t * app, session_t * s,
- u8 evt_type)
+void
+app_worker_unset_wrk_mq_congested (app_worker_t *app_wrk, u32 thread_index)
{
- return app_send_evt_handler_fns[evt_type] (app, s);
+ clib_atomic_fetch_sub_relax (&app_wrk->mq_congested, 1);
+ ASSERT (thread_index == vlib_get_thread_index ());
+ app_wrk->wrk_mq_congested[thread_index] = 0;
}
u8 *
format_app_worker_listener (u8 * s, va_list * args)
{
app_worker_t *app_wrk = va_arg (*args, app_worker_t *);
- u64 handle = va_arg (*args, u64);
+ session_handle_t handle = va_arg (*args, u64);
u32 sm_index = va_arg (*args, u32);
int verbose = va_arg (*args, int);
session_t *listener;
diff --git a/src/vnet/session/mma_template.h b/src/vnet/session/mma_template.h
index dc3545a4ffe..2c0230c2869 100644
--- a/src/vnet/session/mma_template.h
+++ b/src/vnet/session/mma_template.h
@@ -41,11 +41,9 @@ typedef struct
{
u32 action_index;
u32 *next_indices;
- /* *INDENT-OFF* */
RTT (mma_mask_or_match) mask;
RTT (mma_mask_or_match) match;
RTT (mma_mask_or_match) max_match;
- /* *INDENT-ON* */
} RTT (mma_rule);
typedef int (*RTT (rule_cmp_fn)) (RTT (mma_rule) * rule1,
diff --git a/src/vnet/session/segment_manager.c b/src/vnet/session/segment_manager.c
index c14cffa66a2..80bebdca9b5 100644
--- a/src/vnet/session/segment_manager.c
+++ b/src/vnet/session/segment_manager.c
@@ -105,8 +105,8 @@ segment_manager_add_segment_inline (segment_manager_t *sm, uword segment_size,
/* Not configured for addition of new segments and not first */
if (!props->add_segment && !segment_size)
{
- clib_warning ("cannot allocate new segment");
- return VNET_API_ERROR_INVALID_VALUE;
+ SESSION_DBG ("cannot allocate new segment");
+ return SESSION_E_INVALID;
}
/*
@@ -418,7 +418,7 @@ segment_manager_init_first (segment_manager_t * sm)
fs_index = segment_manager_add_segment (sm, max_seg_size, 0);
if (fs_index < 0)
{
- clib_warning ("Failed to preallocate segment %d", i);
+ SESSION_DBG ("Failed to preallocate segment %d", i);
return fs_index;
}
@@ -440,7 +440,7 @@ segment_manager_init_first (segment_manager_t * sm)
fs_index = segment_manager_add_segment (sm, first_seg_size, 0);
if (fs_index < 0)
{
- clib_warning ("Failed to allocate segment");
+ SESSION_DBG ("Failed to allocate segment");
return fs_index;
}
@@ -458,7 +458,7 @@ segment_manager_init_first (segment_manager_t * sm)
for (; i < fs->n_slices; i++)
{
if (fifo_segment_prealloc_fifo_hdrs (fs, i, hdrs_per_slice))
- return VNET_API_ERROR_SVM_SEGMENT_CREATE_FAIL;
+ return SESSION_E_SEG_CREATE;
}
}
@@ -499,11 +499,9 @@ segment_manager_free (segment_manager_t * sm)
* the manager is explicitly deleted/detached by the app. */
clib_rwlock_writer_lock (&sm->segments_rwlock);
- /* *INDENT-OFF* */
pool_foreach (fifo_segment, sm->segments) {
segment_manager_del_segment (sm, fifo_segment);
}
- /* *INDENT-ON* */
pool_free (sm->segments);
clib_rwlock_writer_unlock (&sm->segments_rwlock);
@@ -582,7 +580,6 @@ segment_manager_has_fifos (segment_manager_t * sm)
fifo_segment_t *seg;
u8 first = 1;
- /* *INDENT-OFF* */
segment_manager_foreach_segment_w_lock (seg, sm, ({
if (CLIB_DEBUG && !first && !fifo_segment_has_fifos (seg)
&& !(fifo_segment_flags (seg) & FIFO_SEGMENT_F_IS_PREALLOCATED))
@@ -597,7 +594,6 @@ segment_manager_has_fifos (segment_manager_t * sm)
return 1;
}
}));
- /* *INDENT-ON* */
return 0;
}
@@ -617,7 +613,6 @@ segment_manager_del_sessions (segment_manager_t * sm)
ASSERT (pool_elts (sm->segments) != 0);
/* Across all fifo segments used by the server */
- /* *INDENT-OFF* */
segment_manager_foreach_segment_w_lock (fs, sm, ({
for (slice_index = 0; slice_index < fs->n_slices; slice_index++)
{
@@ -642,7 +637,6 @@ segment_manager_del_sessions (segment_manager_t * sm)
* sessions if the segment can be removed.
*/
}));
- /* *INDENT-ON* */
vec_foreach (handle, handles)
{
@@ -807,7 +801,7 @@ sm_lock_and_alloc_segment_and_fifos (segment_manager_t *sm,
props->tx_fifo_size, rx_fifo, tx_fifo);
if (rv)
{
- clib_warning ("Added a segment, still can't allocate a fifo");
+ SESSION_DBG ("Added a segment, still can't allocate a fifo");
rv = SESSION_E_SEG_NO_SPACE2;
goto done;
}
@@ -866,7 +860,7 @@ segment_manager_dealloc_fifos (svm_fifo_t * rx_fifo, svm_fifo_t * tx_fifo)
/* Thread that allocated the fifos must be the one to clean them up */
ASSERT (rx_fifo->master_thread_index == vlib_get_thread_index () ||
- rx_fifo->refcnt > 1);
+ rx_fifo->refcnt > 1 || vlib_thread_is_main_w_barrier ());
/* It's possible to have no segment manager if the session was removed
* as result of a detach. */
@@ -961,12 +955,10 @@ segment_manager_alloc_queue (fifo_segment_t * segment,
fifo_evt_size = sizeof (session_event_t);
notif_q_size = clib_max (16, props->evt_q_size >> 4);
- /* *INDENT-OFF* */
svm_msg_q_ring_cfg_t rc[SESSION_MQ_N_RINGS] = {
{props->evt_q_size, fifo_evt_size, 0},
{notif_q_size, session_evt_size, 0}
};
- /* *INDENT-ON* */
cfg->consumer_pid = 0;
cfg->n_rings = 2;
cfg->q_nitems = props->evt_q_size;
@@ -1125,13 +1117,11 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (segment_manager_show_command, static) = {
.path = "show segment-manager",
.short_help = "show segment-manager [segments][verbose][index <nn>]",
.function = segment_manager_show_fn,
};
-/* *INDENT-ON* */
void
segment_manager_format_sessions (segment_manager_t * sm, int verbose)
@@ -1160,7 +1150,6 @@ segment_manager_format_sessions (segment_manager_t * sm, int verbose)
clib_rwlock_reader_lock (&sm->segments_rwlock);
- /* *INDENT-OFF* */
pool_foreach (fs, sm->segments) {
for (slice_index = 0; slice_index < fs->n_slices; slice_index++)
{
@@ -1192,7 +1181,6 @@ segment_manager_format_sessions (segment_manager_t * sm, int verbose)
vec_free (s);
}
}
- /* *INDENT-ON* */
clib_rwlock_reader_unlock (&sm->segments_rwlock);
}
diff --git a/src/vnet/session/segment_manager.h b/src/vnet/session/segment_manager.h
index e786b3144c2..1e99c4605a6 100644
--- a/src/vnet/session/segment_manager.h
+++ b/src/vnet/session/segment_manager.h
@@ -190,7 +190,9 @@ static inline void
segment_manager_parse_segment_handle (u64 segment_handle, u32 * sm_index,
u32 * segment_index)
{
- *sm_index = segment_handle >> 32;
+ /* Upper 8 bits zeroed out as they may be used for cut-through segments.
+ * See @ref ct_alloc_segment */
+ *sm_index = (segment_handle >> 32) & 0xFFFFFF;
*segment_index = segment_handle & 0xFFFFFFFF;
}
diff --git a/src/vnet/session/session.api b/src/vnet/session/session.api
index d2a942fb68b..6affae4112d 100644
--- a/src/vnet/session/session.api
+++ b/src/vnet/session/session.api
@@ -117,38 +117,6 @@ autoreply define app_del_cert_key_pair {
u32 index;
};
-/** \brief Application add TLS certificate
- ### WILL BE DEPRECATED POST 20.01 ###
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param cert_len - certificate length
- @param cert - certificate as a string
-*/
-autoreply define application_tls_cert_add {
- option deprecated="to be removed post 21.06";
- u32 client_index;
- u32 context;
- u32 app_index;
- u16 cert_len;
- u8 cert[cert_len];
-};
-
-/** \brief Application add TLS key
- ### WILL BE DEPRECATED POST 20.01 ###
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param key_len - certificate length
- @param key - PEM encoded key as a string
-*/
-autoreply define application_tls_key_add {
- option deprecated="to be removed post 21.06";
- u32 client_index;
- u32 context;
- u32 app_index;
- u16 key_len;
- u8 key[key_len];
-};
-
/** \brief add/del application worker
@param client_index - opaque cookie to identify the sender
client to vpp direction only
@@ -251,9 +219,49 @@ define app_namespace_add_del {
@param ip6_fib_id - id of ip6 fib that "supports" the namespace. Ignored
if sw_if_index set.
@param namespace_id - namespace id
+ @param sock_name - socket name (path, abstract socket name)
+*/
+define app_namespace_add_del_v4 {
+ option deprecated;
+ u32 client_index;
+ u32 context;
+ u64 secret;
+ bool is_add [default=true];
+ vl_api_interface_index_t sw_if_index [default=0xffffffff];
+ u32 ip4_fib_id;
+ u32 ip6_fib_id;
+ string namespace_id[64];
+ string sock_name[];
+};
+
+/** \brief Reply for app namespace add/del
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param appns_index - app namespace index
+*/
+define app_namespace_add_del_v4_reply
+{
+ u32 context;
+ i32 retval;
+ u32 appns_index;
+};
+
+/** \brief add/del application namespace
+ @param client_index - opaque cookie to identify the sender
+ client to vpp direction only
+ @param context - sender context, to match reply w/ request
+ @param secret - secret shared between app and vpp
+ @param sw_if_index - local interface that "supports" namespace. Set to
+ ~0 if no preference
+ @param ip4_fib_id - id of ip4 fib that "supports" the namespace. Ignored
+ if sw_if_index set.
+ @param ip6_fib_id - id of ip6 fib that "supports" the namespace. Ignored
+ if sw_if_index set.
+ @param namespace_id - namespace id
@param netns - linux net namespace
*/
define app_namespace_add_del_v2 {
+ option deprecated;
u32 client_index;
u32 context;
u64 secret;
@@ -280,6 +288,7 @@ define app_namespace_add_del_v2 {
@param sock_name - socket name (path, abstract socket name)
*/
define app_namespace_add_del_v3 {
+ option deprecated;
u32 client_index;
u32 context;
u64 secret;
@@ -312,6 +321,7 @@ define app_namespace_add_del_reply
*/
define app_namespace_add_del_v2_reply
{
+ option deprecated;
u32 context;
i32 retval;
u32 appns_index;
@@ -319,6 +329,7 @@ define app_namespace_add_del_v2_reply
define app_namespace_add_del_v3_reply
{
+ option deprecated;
u32 context;
i32 retval;
u32 appns_index;
diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c
index 3643e91a33a..67e7ee39001 100644
--- a/src/vnet/session/session.c
+++ b/src/vnet/session/session.c
@@ -17,6 +17,7 @@
* @brief Session and session manager
*/
+#include <vnet/plugin/plugin.h>
#include <vnet/session/session.h>
#include <vnet/session/application.h>
#include <vnet/dpo/load_balance.h>
@@ -59,7 +60,7 @@ session_send_evt_to_thread (void *data, void *args, u32 thread_index,
evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg);
evt->session_index = *(u32 *) data;
break;
- case SESSION_IO_EVT_BUILTIN_TX:
+ case SESSION_IO_EVT_TX_MAIN:
case SESSION_CTRL_EVT_CLOSE:
case SESSION_CTRL_EVT_RESET:
msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
@@ -96,6 +97,13 @@ session_send_io_evt_to_thread_custom (void *data, u32 thread_index,
}
int
+session_program_tx_io_evt (session_handle_tu_t sh, session_evt_type_t evt_type)
+{
+ return session_send_evt_to_thread ((void *) &sh.session_index, 0,
+ (u32) sh.thread_index, evt_type);
+}
+
+int
session_send_ctrl_evt_to_thread (session_t * s, session_evt_type_t evt_type)
{
/* only events supported are disconnect, shutdown and reset */
@@ -208,7 +216,7 @@ session_alloc (u32 thread_index)
clib_memset (s, 0, sizeof (*s));
s->session_index = s - wrk->sessions;
s->thread_index = thread_index;
- s->app_index = APP_INVALID_INDEX;
+ s->al_index = APP_INVALID_INDEX;
return s;
}
@@ -216,15 +224,12 @@ session_alloc (u32 thread_index)
void
session_free (session_t * s)
{
- if (CLIB_DEBUG)
- {
- u8 thread_index = s->thread_index;
- clib_memset (s, 0xFA, sizeof (*s));
- pool_put (session_main.wrk[thread_index].sessions, s);
- return;
- }
+ session_worker_t *wrk = &session_main.wrk[s->thread_index];
+
SESSION_EVT (SESSION_EVT_FREE, s);
- pool_put (session_main.wrk[s->thread_index].sessions, s);
+ if (CLIB_DEBUG)
+ clib_memset (s, 0xFA, sizeof (*s));
+ pool_put (wrk->sessions, s);
}
u8
@@ -242,35 +247,48 @@ session_is_valid (u32 si, u8 thread_index)
|| s->session_state <= SESSION_STATE_LISTENING)
return 1;
- if (s->session_state == SESSION_STATE_CONNECTING &&
+ if ((s->session_state == SESSION_STATE_CONNECTING ||
+ s->session_state == SESSION_STATE_TRANSPORT_CLOSED) &&
(s->flags & SESSION_F_HALF_OPEN))
return 1;
tc = session_get_transport (s);
- if (s->connection_index != tc->c_index
- || s->thread_index != tc->thread_index || tc->s_index != si)
+ if (s->connection_index != tc->c_index ||
+ s->thread_index != tc->thread_index || tc->s_index != si)
return 0;
return 1;
}
+void
+session_cleanup (session_t *s)
+{
+ segment_manager_dealloc_fifos (s->rx_fifo, s->tx_fifo);
+ session_free (s);
+}
+
static void
session_cleanup_notify (session_t * s, session_cleanup_ntf_t ntf)
{
app_worker_t *app_wrk;
app_wrk = app_worker_get_if_valid (s->app_wrk_index);
- if (!app_wrk)
- return;
+ if (PREDICT_FALSE (!app_wrk))
+ {
+ if (ntf == SESSION_CLEANUP_TRANSPORT)
+ return;
+
+ session_cleanup (s);
+ return;
+ }
app_worker_cleanup_notify (app_wrk, s, ntf);
}
void
-session_free_w_fifos (session_t * s)
+session_program_cleanup (session_t *s)
{
+ ASSERT (s->session_state == SESSION_STATE_TRANSPORT_DELETED);
session_cleanup_notify (s, SESSION_CLEANUP_SESSION);
- segment_manager_dealloc_fifos (s->rx_fifo, s->tx_fifo);
- session_free (s);
}
/**
@@ -287,7 +305,7 @@ session_delete (session_t * s)
if ((rv = session_lookup_del_session (s)))
clib_warning ("session %u hash delete rv %d", s->session_index, rv);
- session_free_w_fifos (s);
+ session_program_cleanup (s);
}
void
@@ -302,16 +320,27 @@ session_cleanup_half_open (session_handle_t ho_handle)
* session should be removed. */
if (ho->connection_index == ~0)
{
- ho->session_state = SESSION_STATE_CLOSED;
+ session_set_state (ho, SESSION_STATE_CLOSED);
return;
}
/* Migrated transports are no longer half-opens */
transport_cleanup (session_get_transport_proto (ho),
- ho->connection_index, ho->app_index /* overloaded */);
+ ho->connection_index, ho->al_index /* overloaded */);
+ }
+ else if (ho->session_state != SESSION_STATE_TRANSPORT_DELETED)
+ {
+ /* Cleanup half-open session lookup table if need be */
+ if (ho->session_state != SESSION_STATE_TRANSPORT_CLOSED)
+ {
+ transport_connection_t *tc;
+ tc = transport_get_half_open (session_get_transport_proto (ho),
+ ho->connection_index);
+ if (tc && !(tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP))
+ session_lookup_del_half_open (tc);
+ }
+ transport_cleanup_half_open (session_get_transport_proto (ho),
+ ho->connection_index);
}
- else
- transport_cleanup_half_open (session_get_transport_proto (ho),
- ho->connection_index);
session_free (ho);
}
@@ -320,10 +349,12 @@ session_half_open_free (session_t *ho)
{
app_worker_t *app_wrk;
- ASSERT (vlib_get_thread_index () <= 1);
- app_wrk = app_worker_get (ho->app_wrk_index);
- app_worker_del_half_open (app_wrk, ho);
- session_free (ho);
+ ASSERT (vlib_get_thread_index () <= transport_cl_thread ());
+ app_wrk = app_worker_get_if_valid (ho->app_wrk_index);
+ if (app_wrk)
+ app_worker_del_half_open (app_wrk, ho);
+ else
+ session_free (ho);
}
static void
@@ -336,16 +367,26 @@ session_half_open_free_rpc (void *args)
void
session_half_open_delete_notify (transport_connection_t *tc)
{
+ session_t *ho = ho_session_get (tc->s_index);
+
+ /* Cleanup half-open lookup table if need be */
+ if (ho->session_state != SESSION_STATE_TRANSPORT_CLOSED)
+ {
+ if (!(tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP))
+ session_lookup_del_half_open (tc);
+ }
+ session_set_state (ho, SESSION_STATE_TRANSPORT_DELETED);
+
/* Notification from ctrl thread accepted without rpc */
- if (!tc->thread_index)
+ if (tc->thread_index == transport_cl_thread ())
{
- session_half_open_free (ho_session_get (tc->s_index));
+ session_half_open_free (ho);
}
else
{
void *args = uword_to_pointer ((uword) tc->s_index, void *);
- session_send_rpc_evt_to_thread_force (0, session_half_open_free_rpc,
- args);
+ session_send_rpc_evt_to_thread_force (transport_cl_thread (),
+ session_half_open_free_rpc, args);
}
}
@@ -354,6 +395,9 @@ session_half_open_migrate_notify (transport_connection_t *tc)
{
session_t *ho;
+ /* Support half-open migrations only for transports with no lookup */
+ ASSERT (tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP);
+
ho = ho_session_get (tc->s_index);
ho->flags |= SESSION_F_IS_MIGRATING;
ho->connection_index = ~0;
@@ -373,8 +417,8 @@ session_half_open_migrated_notify (transport_connection_t *tc)
return -1;
}
ho->connection_index = tc->c_index;
- /* Overload app index for half-open with new thread */
- ho->app_index = tc->thread_index;
+ /* Overload al_index for half-open with new thread */
+ ho->al_index = tc->thread_index;
return 0;
}
@@ -389,7 +433,7 @@ session_alloc_for_connection (transport_connection_t * tc)
s = session_alloc (thread_index);
s->session_type = session_type_from_proto_and_ip (tc->proto, tc->is_ip4);
- s->session_state = SESSION_STATE_CLOSED;
+ session_set_state (s, SESSION_STATE_CLOSED);
/* Attach transport to session and vice versa */
s->connection_index = tc->c_index;
@@ -536,10 +580,162 @@ session_fifo_tuning (session_t * s, svm_fifo_t * f,
}
}
+void
+session_wrk_program_app_wrk_evts (session_worker_t *wrk, u32 app_wrk_index)
+{
+ u8 need_interrupt;
+
+ ASSERT ((wrk - session_main.wrk) == vlib_get_thread_index ());
+ need_interrupt = clib_bitmap_is_zero (wrk->app_wrks_pending_ntf);
+ wrk->app_wrks_pending_ntf =
+ clib_bitmap_set (wrk->app_wrks_pending_ntf, app_wrk_index, 1);
+
+ if (need_interrupt)
+ vlib_node_set_interrupt_pending (wrk->vm, session_input_node.index);
+}
+
+always_inline void
+session_program_io_event (app_worker_t *app_wrk, session_t *s,
+ session_evt_type_t et, u8 is_cl)
+{
+ if (is_cl)
+ {
+ /* Special events for connectionless sessions */
+ et += SESSION_IO_EVT_BUILTIN_RX - SESSION_IO_EVT_RX;
+
+ ASSERT (s->thread_index == 0 || et == SESSION_IO_EVT_TX_MAIN);
+ session_event_t evt = {
+ .event_type = et,
+ .session_handle = session_handle (s),
+ };
+
+ app_worker_add_event_custom (app_wrk, vlib_get_thread_index (), &evt);
+ }
+ else
+ {
+ app_worker_add_event (app_wrk, s, et);
+ }
+}
+
+static inline int
+session_notify_subscribers (u32 app_index, session_t *s, svm_fifo_t *f,
+ session_evt_type_t evt_type)
+{
+ app_worker_t *app_wrk;
+ application_t *app;
+ u8 is_cl;
+ int i;
+
+ app = application_get (app_index);
+ if (!app)
+ return -1;
+
+ is_cl = s->thread_index != vlib_get_thread_index ();
+ for (i = 0; i < f->shr->n_subscribers; i++)
+ {
+ app_wrk = application_get_worker (app, f->shr->subscribers[i]);
+ if (!app_wrk)
+ continue;
+ session_program_io_event (app_wrk, s, evt_type, is_cl ? 1 : 0);
+ }
+
+ return 0;
+}
+
+always_inline int
+session_enqueue_notify_inline (session_t *s, u8 is_cl)
+{
+ app_worker_t *app_wrk;
+
+ app_wrk = app_worker_get_if_valid (s->app_wrk_index);
+ if (PREDICT_FALSE (!app_wrk))
+ return -1;
+
+ session_program_io_event (app_wrk, s, SESSION_IO_EVT_RX, is_cl);
+
+ if (PREDICT_FALSE (svm_fifo_n_subscribers (s->rx_fifo)))
+ return session_notify_subscribers (app_wrk->app_index, s, s->rx_fifo,
+ SESSION_IO_EVT_RX);
+
+ return 0;
+}
+
+int
+session_enqueue_notify (session_t *s)
+{
+ return session_enqueue_notify_inline (s, 0 /* is_cl */);
+}
+
+int
+session_enqueue_notify_cl (session_t *s)
+{
+ return session_enqueue_notify_inline (s, 1 /* is_cl */);
+}
+
+int
+session_dequeue_notify (session_t *s)
+{
+ app_worker_t *app_wrk;
+ u8 is_cl;
+
+ /* Unset as soon as event is requested */
+ svm_fifo_clear_deq_ntf (s->tx_fifo);
+
+ app_wrk = app_worker_get_if_valid (s->app_wrk_index);
+ if (PREDICT_FALSE (!app_wrk))
+ return -1;
+
+ is_cl = s->session_state == SESSION_STATE_LISTENING ||
+ s->session_state == SESSION_STATE_OPENED;
+ session_program_io_event (app_wrk, s, SESSION_IO_EVT_TX, is_cl ? 1 : 0);
+
+ if (PREDICT_FALSE (svm_fifo_n_subscribers (s->tx_fifo)))
+ return session_notify_subscribers (app_wrk->app_index, s, s->tx_fifo,
+ SESSION_IO_EVT_TX);
+
+ return 0;
+}
+
+/**
+ * Flushes queue of sessions that are to be notified of new data
+ * enqueued events.
+ *
+ * @param transport_proto transport protocol for which queue to be flushed
+ * @param thread_index Thread index for which the flush is to be performed.
+ * @return 0 on success or a positive number indicating the number of
+ * failures due to API queue being full.
+ */
+void
+session_main_flush_enqueue_events (transport_proto_t transport_proto,
+ u32 thread_index)
+{
+ session_worker_t *wrk = session_main_get_worker (thread_index);
+ session_handle_t *handles;
+ session_t *s;
+ u32 i, is_cl;
+
+ handles = wrk->session_to_enqueue[transport_proto];
+
+ for (i = 0; i < vec_len (handles); i++)
+ {
+ s = session_get_from_handle (handles[i]);
+ session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED,
+ 0 /* TODO/not needed */);
+ is_cl =
+ s->thread_index != thread_index || (s->flags & SESSION_F_IS_CLESS);
+ if (!is_cl)
+ session_enqueue_notify_inline (s, 0);
+ else
+ session_enqueue_notify_inline (s, 1);
+ }
+
+ vec_reset_length (handles);
+ wrk->session_to_enqueue[transport_proto] = handles;
+}
+
/*
- * Enqueue data for delivery to session peer. Does not notify peer of enqueue
- * event but on request can queue notification events for later delivery by
- * calling stream_server_flush_enqueue_events().
+ * Enqueue data for delivery to app. If requested, it queues app notification
+ * event for later delivery.
*
* @param tc Transport connection which is to be enqueued data
* @param b Buffer to be enqueued
@@ -588,15 +784,14 @@ session_enqueue_stream_connection (transport_connection_t * tc,
if (queue_event)
{
- /* Queue RX event on this fifo. Eventually these will need to be flushed
- * by calling stream_server_flush_enqueue_events () */
- session_worker_t *wrk;
-
- wrk = session_main_get_worker (s->thread_index);
+ /* Queue RX event on this fifo. Eventually these will need to be
+ * flushed by calling @ref session_main_flush_enqueue_events () */
if (!(s->flags & SESSION_F_RX_EVT))
{
+ session_worker_t *wrk = session_main_get_worker (s->thread_index);
+ ASSERT (s->thread_index == vlib_get_thread_index ());
s->flags |= SESSION_F_RX_EVT;
- vec_add1 (wrk->session_to_enqueue[tc->proto], s->session_index);
+ vec_add1 (wrk->session_to_enqueue[tc->proto], session_handle (s));
}
session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0);
@@ -605,10 +800,11 @@ session_enqueue_stream_connection (transport_connection_t * tc,
return enqueued;
}
-int
-session_enqueue_dgram_connection (session_t * s,
- session_dgram_hdr_t * hdr,
- vlib_buffer_t * b, u8 proto, u8 queue_event)
+always_inline int
+session_enqueue_dgram_connection_inline (session_t *s,
+ session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto,
+ u8 queue_event, u32 is_cl)
{
int rv;
@@ -617,12 +813,10 @@ session_enqueue_dgram_connection (session_t * s,
if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT)))
{
- /* *INDENT-OFF* */
svm_fifo_seg_t segs[2] = {
{ (u8 *) hdr, sizeof (*hdr) },
{ vlib_buffer_get_current (b), b->current_length }
};
- /* *INDENT-ON* */
rv = svm_fifo_enqueue_segments (s->rx_fifo, segs, 2,
0 /* allow_partial */ );
@@ -654,15 +848,16 @@ session_enqueue_dgram_connection (session_t * s,
if (queue_event && rv > 0)
{
- /* Queue RX event on this fifo. Eventually these will need to be flushed
- * by calling stream_server_flush_enqueue_events () */
- session_worker_t *wrk;
-
- wrk = session_main_get_worker (s->thread_index);
+ /* Queue RX event on this fifo. Eventually these will need to be
+ * flushed by calling @ref session_main_flush_enqueue_events () */
if (!(s->flags & SESSION_F_RX_EVT))
{
+ u32 thread_index =
+ is_cl ? vlib_get_thread_index () : s->thread_index;
+ session_worker_t *wrk = session_main_get_worker (thread_index);
+ ASSERT (s->thread_index == vlib_get_thread_index () || is_cl);
s->flags |= SESSION_F_RX_EVT;
- vec_add1 (wrk->session_to_enqueue[proto], s->session_index);
+ vec_add1 (wrk->session_to_enqueue[proto], session_handle (s));
}
session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0);
@@ -671,6 +866,34 @@ session_enqueue_dgram_connection (session_t * s,
}
int
+session_enqueue_dgram_connection (session_t *s, session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto, u8 queue_event)
+{
+ return session_enqueue_dgram_connection_inline (s, hdr, b, proto,
+ queue_event, 0 /* is_cl */);
+}
+
+int
+session_enqueue_dgram_connection2 (session_t *s, session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto, u8 queue_event)
+{
+ return session_enqueue_dgram_connection_inline (s, hdr, b, proto,
+ queue_event, 1 /* is_cl */);
+}
+
+int
+session_enqueue_dgram_connection_cl (session_t *s, session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto,
+ u8 queue_event)
+{
+ session_t *awls;
+
+ awls = app_listener_select_wrk_cl_session (s, hdr);
+ return session_enqueue_dgram_connection_inline (awls, hdr, b, proto,
+ queue_event, 1 /* is_cl */);
+}
+
+int
session_tx_fifo_peek_bytes (transport_connection_t * tc, u8 * buffer,
u32 offset, u32 max_bytes)
{
@@ -693,187 +916,6 @@ session_tx_fifo_dequeue_drop (transport_connection_t * tc, u32 max_bytes)
return rv;
}
-static inline int
-session_notify_subscribers (u32 app_index, session_t * s,
- svm_fifo_t * f, session_evt_type_t evt_type)
-{
- app_worker_t *app_wrk;
- application_t *app;
- int i;
-
- app = application_get (app_index);
- if (!app)
- return -1;
-
- for (i = 0; i < f->shr->n_subscribers; i++)
- {
- app_wrk = application_get_worker (app, f->shr->subscribers[i]);
- if (!app_wrk)
- continue;
- if (app_worker_lock_and_send_event (app_wrk, s, evt_type))
- return -1;
- }
-
- return 0;
-}
-
-/**
- * Notify session peer that new data has been enqueued.
- *
- * @param s Stream session for which the event is to be generated.
- * @param lock Flag to indicate if call should lock message queue.
- *
- * @return 0 on success or negative number if failed to send notification.
- */
-static inline int
-session_enqueue_notify_inline (session_t * s)
-{
- app_worker_t *app_wrk;
- u32 session_index;
- u8 n_subscribers;
-
- session_index = s->session_index;
- n_subscribers = svm_fifo_n_subscribers (s->rx_fifo);
-
- app_wrk = app_worker_get_if_valid (s->app_wrk_index);
- if (PREDICT_FALSE (!app_wrk))
- {
- SESSION_DBG ("invalid s->app_index = %d", s->app_wrk_index);
- return 0;
- }
-
- SESSION_EVT (SESSION_EVT_ENQ, s, svm_fifo_max_dequeue_prod (s->rx_fifo));
-
- s->flags &= ~SESSION_F_RX_EVT;
-
- /* Application didn't confirm accept yet */
- if (PREDICT_FALSE (s->session_state == SESSION_STATE_ACCEPTING))
- return 0;
-
- if (PREDICT_FALSE (app_worker_lock_and_send_event (app_wrk, s,
- SESSION_IO_EVT_RX)))
- return -1;
-
- if (PREDICT_FALSE (n_subscribers))
- {
- s = session_get (session_index, vlib_get_thread_index ());
- return session_notify_subscribers (app_wrk->app_index, s,
- s->rx_fifo, SESSION_IO_EVT_RX);
- }
-
- return 0;
-}
-
-int
-session_enqueue_notify (session_t * s)
-{
- return session_enqueue_notify_inline (s);
-}
-
-static void
-session_enqueue_notify_rpc (void *arg)
-{
- u32 session_index = pointer_to_uword (arg);
- session_t *s;
-
- s = session_get_if_valid (session_index, vlib_get_thread_index ());
- if (!s)
- return;
-
- session_enqueue_notify (s);
-}
-
-/**
- * Like session_enqueue_notify, but can be called from a thread that does not
- * own the session.
- */
-void
-session_enqueue_notify_thread (session_handle_t sh)
-{
- u32 thread_index = session_thread_from_handle (sh);
- u32 session_index = session_index_from_handle (sh);
-
- /*
- * Pass session index (u32) as opposed to handle (u64) in case pointers
- * are not 64-bit.
- */
- session_send_rpc_evt_to_thread (thread_index,
- session_enqueue_notify_rpc,
- uword_to_pointer (session_index, void *));
-}
-
-int
-session_dequeue_notify (session_t * s)
-{
- app_worker_t *app_wrk;
-
- svm_fifo_clear_deq_ntf (s->tx_fifo);
-
- app_wrk = app_worker_get_if_valid (s->app_wrk_index);
- if (PREDICT_FALSE (!app_wrk))
- return -1;
-
- if (PREDICT_FALSE (app_worker_lock_and_send_event (app_wrk, s,
- SESSION_IO_EVT_TX)))
- return -1;
-
- if (PREDICT_FALSE (s->tx_fifo->shr->n_subscribers))
- return session_notify_subscribers (app_wrk->app_index, s,
- s->tx_fifo, SESSION_IO_EVT_TX);
-
- return 0;
-}
-
-/**
- * Flushes queue of sessions that are to be notified of new data
- * enqueued events.
- *
- * @param thread_index Thread index for which the flush is to be performed.
- * @return 0 on success or a positive number indicating the number of
- * failures due to API queue being full.
- */
-int
-session_main_flush_enqueue_events (u8 transport_proto, u32 thread_index)
-{
- session_worker_t *wrk = session_main_get_worker (thread_index);
- session_t *s;
- int i, errors = 0;
- u32 *indices;
-
- indices = wrk->session_to_enqueue[transport_proto];
-
- for (i = 0; i < vec_len (indices); i++)
- {
- s = session_get_if_valid (indices[i], thread_index);
- if (PREDICT_FALSE (!s))
- {
- errors++;
- continue;
- }
-
- session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED,
- 0 /* TODO/not needed */ );
-
- if (PREDICT_FALSE (session_enqueue_notify_inline (s)))
- errors++;
- }
-
- vec_reset_length (indices);
- wrk->session_to_enqueue[transport_proto] = indices;
-
- return errors;
-}
-
-int
-session_main_flush_all_enqueue_events (u8 transport_proto)
-{
- vlib_thread_main_t *vtm = vlib_get_thread_main ();
- int i, errors = 0;
- for (i = 0; i < 1 + vtm->n_threads; i++)
- errors += session_main_flush_enqueue_events (transport_proto, i);
- return errors;
-}
-
int
session_stream_connect_notify (transport_connection_t * tc,
session_error_t err)
@@ -888,6 +930,7 @@ session_stream_connect_notify (transport_connection_t * tc,
session_lookup_del_half_open (tc);
ho = ho_session_get (tc->s_index);
+ session_set_state (ho, SESSION_STATE_TRANSPORT_CLOSED);
opaque = ho->opaque;
app_wrk = app_worker_get_if_valid (ho->app_wrk_index);
if (!app_wrk)
@@ -897,8 +940,9 @@ session_stream_connect_notify (transport_connection_t * tc,
return app_worker_connect_notify (app_wrk, s, err, opaque);
s = session_alloc_for_connection (tc);
- s->session_state = SESSION_STATE_CONNECTING;
+ session_set_state (s, SESSION_STATE_CONNECTING);
s->app_wrk_index = app_wrk->wrk_index;
+ s->opaque = opaque;
new_si = s->session_index;
new_ti = s->thread_index;
@@ -910,7 +954,7 @@ session_stream_connect_notify (transport_connection_t * tc,
}
s = session_get (new_si, new_ti);
- s->session_state = SESSION_STATE_READY;
+ session_set_state (s, SESSION_STATE_READY);
session_lookup_add_connection (tc, session_handle (s));
if (app_worker_connect_notify (app_wrk, s, SESSION_E_NONE, opaque))
@@ -926,43 +970,20 @@ session_stream_connect_notify (transport_connection_t * tc,
return 0;
}
-typedef union session_switch_pool_reply_args_
-{
- struct
- {
- u32 session_index;
- u16 thread_index;
- u8 is_closed;
- };
- u64 as_u64;
-} session_switch_pool_reply_args_t;
-
-STATIC_ASSERT (sizeof (session_switch_pool_reply_args_t) <= sizeof (uword),
- "switch pool reply args size");
-
static void
-session_switch_pool_reply (void *arg)
+session_switch_pool_closed_rpc (void *arg)
{
- session_switch_pool_reply_args_t rargs;
+ session_handle_t sh;
session_t *s;
- rargs.as_u64 = pointer_to_uword (arg);
- s = session_get_if_valid (rargs.session_index, rargs.thread_index);
+ sh = pointer_to_uword (arg);
+ s = session_get_from_handle_if_valid (sh);
if (!s)
return;
- /* Session closed during migration. Clean everything up */
- if (rargs.is_closed)
- {
- transport_cleanup (session_get_transport_proto (s), s->connection_index,
- s->thread_index);
- segment_manager_dealloc_fifos (s->rx_fifo, s->tx_fifo);
- session_free (s);
- return;
- }
-
- /* Notify app that it has data on the new session */
- session_enqueue_notify (s);
+ transport_cleanup (session_get_transport_proto (s), s->connection_index,
+ s->thread_index);
+ session_cleanup (s);
}
typedef struct _session_switch_pool_args
@@ -980,8 +1001,7 @@ static void
session_switch_pool (void *cb_args)
{
session_switch_pool_args_t *args = (session_switch_pool_args_t *) cb_args;
- session_switch_pool_reply_args_t rargs;
- session_handle_t new_sh;
+ session_handle_t sh, new_sh;
segment_manager_t *sm;
app_worker_t *app_wrk;
session_t *s;
@@ -989,37 +1009,32 @@ session_switch_pool (void *cb_args)
ASSERT (args->thread_index == vlib_get_thread_index ());
s = session_get (args->session_index, args->thread_index);
- /* Check if session closed during migration */
- rargs.is_closed = s->session_state >= SESSION_STATE_TRANSPORT_CLOSING;
+ app_wrk = app_worker_get_if_valid (s->app_wrk_index);
+ if (!app_wrk)
+ goto app_closed;
- transport_cleanup (session_get_transport_proto (s), s->connection_index,
- s->thread_index);
+ /* Cleanup fifo segment slice state for fifos */
+ sm = app_worker_get_connect_segment_manager (app_wrk);
+ segment_manager_detach_fifo (sm, &s->rx_fifo);
+ segment_manager_detach_fifo (sm, &s->tx_fifo);
- app_wrk = app_worker_get_if_valid (s->app_wrk_index);
- if (app_wrk)
- {
- /* Cleanup fifo segment slice state for fifos */
- sm = app_worker_get_connect_segment_manager (app_wrk);
- segment_manager_detach_fifo (sm, &s->rx_fifo);
- segment_manager_detach_fifo (sm, &s->tx_fifo);
+ /* Check if session closed during migration */
+ if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING)
+ goto app_closed;
- /* Notify app, using old session, about the migration event */
- if (!rargs.is_closed)
- {
- new_sh = session_make_handle (args->new_session_index,
- args->new_thread_index);
- app_worker_migrate_notify (app_wrk, s, new_sh);
- }
- }
+ new_sh =
+ session_make_handle (args->new_session_index, args->new_thread_index);
+ app_worker_migrate_notify (app_wrk, s, new_sh);
- /* Trigger app read and fifo updates on the new thread */
- rargs.session_index = args->new_session_index;
- rargs.thread_index = args->new_thread_index;
- session_send_rpc_evt_to_thread (args->new_thread_index,
- session_switch_pool_reply,
- uword_to_pointer (rargs.as_u64, void *));
+ clib_mem_free (cb_args);
+ return;
- session_free (s);
+app_closed:
+ /* Session closed during migration. Clean everything up */
+ sh = session_handle (s);
+ session_send_rpc_evt_to_thread (args->new_thread_index,
+ session_switch_pool_closed_rpc,
+ uword_to_pointer (sh, void *));
clib_mem_free (cb_args);
}
@@ -1040,7 +1055,7 @@ session_dgram_connect_notify (transport_connection_t * tc,
*/
new_s = session_clone_safe (tc->s_index, old_thread_index);
new_s->connection_index = tc->c_index;
- new_s->session_state = SESSION_STATE_READY;
+ session_set_state (new_s, SESSION_STATE_READY);
new_s->flags |= SESSION_F_IS_MIGRATING;
if (!(tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP))
@@ -1094,11 +1109,11 @@ session_transport_closing_notify (transport_connection_t * tc)
* accept might be rejected */
if (s->session_state == SESSION_STATE_ACCEPTING)
{
- s->session_state = SESSION_STATE_TRANSPORT_CLOSING;
+ session_set_state (s, SESSION_STATE_TRANSPORT_CLOSING);
return;
}
- s->session_state = SESSION_STATE_TRANSPORT_CLOSING;
+ session_set_state (s, SESSION_STATE_TRANSPORT_CLOSING);
app_wrk = app_worker_get (s->app_wrk_index);
app_worker_close_notify (app_wrk, s);
}
@@ -1139,7 +1154,7 @@ session_transport_delete_notify (transport_connection_t * tc)
* because transport will soon be closed and closed sessions
* are assumed to have been removed from the lookup table */
session_lookup_del_session (s);
- s->session_state = SESSION_STATE_TRANSPORT_DELETED;
+ session_set_state (s, SESSION_STATE_TRANSPORT_DELETED);
session_cleanup_notify (s, SESSION_CLEANUP_TRANSPORT);
svm_fifo_dequeue_drop_all (s->tx_fifo);
break;
@@ -1150,7 +1165,7 @@ session_transport_delete_notify (transport_connection_t * tc)
* session is just removed because both transport and app have
* confirmed the close*/
session_lookup_del_session (s);
- s->session_state = SESSION_STATE_TRANSPORT_DELETED;
+ session_set_state (s, SESSION_STATE_TRANSPORT_DELETED);
session_cleanup_notify (s, SESSION_CLEANUP_TRANSPORT);
svm_fifo_dequeue_drop_all (s->tx_fifo);
session_program_transport_ctrl_evt (s, SESSION_CTRL_EVT_CLOSE);
@@ -1159,6 +1174,7 @@ session_transport_delete_notify (transport_connection_t * tc)
break;
case SESSION_STATE_CLOSED:
session_cleanup_notify (s, SESSION_CLEANUP_TRANSPORT);
+ session_set_state (s, SESSION_STATE_TRANSPORT_DELETED);
session_delete (s);
break;
default:
@@ -1186,6 +1202,9 @@ session_transport_closed_notify (transport_connection_t * tc)
if (!(s = session_get_if_valid (tc->s_index, tc->thread_index)))
return;
+ if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSED)
+ return;
+
/* Transport thinks that app requested close but it actually didn't.
* Can happen for tcp:
* 1)if fin and rst are received in close succession.
@@ -1194,17 +1213,15 @@ session_transport_closed_notify (transport_connection_t * tc)
{
session_transport_closing_notify (tc);
svm_fifo_dequeue_drop_all (s->tx_fifo);
- s->session_state = SESSION_STATE_TRANSPORT_CLOSED;
+ session_set_state (s, SESSION_STATE_TRANSPORT_CLOSED);
}
/* If app close has not been received or has not yet resulted in
* a transport close, only mark the session transport as closed */
else if (s->session_state <= SESSION_STATE_CLOSING)
- {
- s->session_state = SESSION_STATE_TRANSPORT_CLOSED;
- }
+ session_set_state (s, SESSION_STATE_TRANSPORT_CLOSED);
/* If app also closed, switch to closed */
else if (s->session_state == SESSION_STATE_APP_CLOSED)
- s->session_state = SESSION_STATE_CLOSED;
+ session_set_state (s, SESSION_STATE_CLOSED);
app_wrk = app_worker_get_if_valid (s->app_wrk_index);
if (app_wrk)
@@ -1226,10 +1243,10 @@ session_transport_reset_notify (transport_connection_t * tc)
return;
if (s->session_state == SESSION_STATE_ACCEPTING)
{
- s->session_state = SESSION_STATE_TRANSPORT_CLOSING;
+ session_set_state (s, SESSION_STATE_TRANSPORT_CLOSING);
return;
}
- s->session_state = SESSION_STATE_TRANSPORT_CLOSING;
+ session_set_state (s, SESSION_STATE_TRANSPORT_CLOSING);
app_wrk = app_worker_get (s->app_wrk_index);
app_worker_reset_notify (app_wrk, s);
}
@@ -1246,12 +1263,12 @@ session_stream_accept_notify (transport_connection_t * tc)
return -1;
if (s->session_state != SESSION_STATE_CREATED)
return 0;
- s->session_state = SESSION_STATE_ACCEPTING;
+ session_set_state (s, SESSION_STATE_ACCEPTING);
if (app_worker_accept_notify (app_wrk, s))
{
/* On transport delete, no notifications should be sent. Unless, the
* accept is retried and successful. */
- s->session_state = SESSION_STATE_CREATED;
+ session_set_state (s, SESSION_STATE_CREATED);
return -1;
}
return 0;
@@ -1269,7 +1286,7 @@ session_stream_accept (transport_connection_t * tc, u32 listener_index,
s = session_alloc_for_connection (tc);
s->listener_handle = ((u64) thread_index << 32) | (u64) listener_index;
- s->session_state = SESSION_STATE_CREATED;
+ session_set_state (s, SESSION_STATE_CREATED);
if ((rv = app_worker_init_accepted (s)))
{
@@ -1313,7 +1330,7 @@ session_dgram_accept (transport_connection_t * tc, u32 listener_index,
}
session_lookup_add_connection (tc, session_handle (s));
- s->session_state = SESSION_STATE_ACCEPTING;
+ session_set_state (s, SESSION_STATE_ACCEPTING);
app_wrk = app_worker_get (s->app_wrk_index);
if ((rv = app_worker_accept_notify (app_wrk, s)))
@@ -1351,7 +1368,10 @@ session_open_cl (session_endpoint_cfg_t *rmt, session_handle_t *rsh)
app_wrk = app_worker_get (rmt->app_wrk_index);
s = session_alloc_for_connection (tc);
s->app_wrk_index = app_wrk->wrk_index;
- s->session_state = SESSION_STATE_OPENED;
+ s->opaque = rmt->opaque;
+ session_set_state (s, SESSION_STATE_OPENED);
+ if (transport_connection_is_cless (tc))
+ s->flags |= SESSION_F_IS_CLESS;
if (app_worker_init_connected (app_wrk, s))
{
session_free (s);
@@ -1419,13 +1439,11 @@ session_open_app (session_endpoint_cfg_t *rmt, session_handle_t *rsh)
typedef int (*session_open_service_fn) (session_endpoint_cfg_t *,
session_handle_t *);
-/* *INDENT-OFF* */
static session_open_service_fn session_open_srv_fns[TRANSPORT_N_SERVICES] = {
session_open_vc,
session_open_cl,
session_open_app,
};
-/* *INDENT-ON* */
/**
* Ask transport to open connection to remote transport endpoint.
@@ -1476,6 +1494,9 @@ session_listen (session_t * ls, session_endpoint_cfg_t * sep)
* worker because local tables (for ct sessions) are not backed by a fib */
ls = listen_session_get (s_index);
ls->connection_index = tc_index;
+ ls->opaque = sep->opaque;
+ if (transport_connection_is_cless (session_get_transport (ls)))
+ ls->flags |= SESSION_F_IS_CLESS;
return 0;
}
@@ -1530,9 +1551,15 @@ session_half_close (session_t *s)
void
session_close (session_t * s)
{
- if (!s)
+ if (!s || (s->flags & SESSION_F_APP_CLOSED))
return;
+ /* Transports can close and delete their state independent of app closes
+ * and transport initiated state transitions can hide app closes. Instead
+ * of extending the state machine to support separate tracking of app and
+ * transport initiated closes, use a flag. */
+ s->flags |= SESSION_F_APP_CLOSED;
+
if (s->session_state >= SESSION_STATE_CLOSING)
{
/* Session will only be removed once both app and transport
@@ -1543,9 +1570,12 @@ session_close (session_t * s)
return;
}
- /* App closed so stop propagating dequeue notifications */
- svm_fifo_clear_deq_ntf (s->tx_fifo);
- s->session_state = SESSION_STATE_CLOSING;
+ /* App closed so stop propagating dequeue notifications.
+ * App might disconnect session before connected, in this case,
+ * tx_fifo may not be setup yet, so clear only it's inited. */
+ if (s->tx_fifo)
+ svm_fifo_clear_deq_ntf (s->tx_fifo);
+ session_set_state (s, SESSION_STATE_CLOSING);
session_program_transport_ctrl_evt (s, SESSION_CTRL_EVT_CLOSE);
}
@@ -1557,12 +1587,46 @@ session_reset (session_t * s)
{
if (s->session_state >= SESSION_STATE_CLOSING)
return;
- /* Drop all outstanding tx data */
- svm_fifo_dequeue_drop_all (s->tx_fifo);
- s->session_state = SESSION_STATE_CLOSING;
+ /* Drop all outstanding tx data
+ * App might disconnect session before connected, in this case,
+ * tx_fifo may not be setup yet, so clear only it's inited. */
+ if (s->tx_fifo)
+ svm_fifo_dequeue_drop_all (s->tx_fifo);
+ session_set_state (s, SESSION_STATE_CLOSING);
session_program_transport_ctrl_evt (s, SESSION_CTRL_EVT_RESET);
}
+void
+session_detach_app (session_t *s)
+{
+ if (s->session_state < SESSION_STATE_TRANSPORT_CLOSING)
+ {
+ session_close (s);
+ }
+ else if (s->session_state < SESSION_STATE_TRANSPORT_DELETED)
+ {
+ transport_connection_t *tc;
+
+ /* Transport is closing but it's not yet deleted. Confirm close and
+ * subsequently detach transport from session and enqueue a session
+ * cleanup notification. Transport closed and cleanup notifications are
+ * going to be dropped by session layer apis */
+ transport_close (session_get_transport_proto (s), s->connection_index,
+ s->thread_index);
+ tc = session_get_transport (s);
+ tc->s_index = SESSION_INVALID_INDEX;
+ session_set_state (s, SESSION_STATE_TRANSPORT_DELETED);
+ session_cleanup_notify (s, SESSION_CLEANUP_SESSION);
+ }
+ else
+ {
+ session_cleanup_notify (s, SESSION_CLEANUP_SESSION);
+ }
+
+ s->flags |= SESSION_F_APP_CLOSED;
+ s->app_wrk_index = APP_INVALID_INDEX;
+}
+
/**
* Notify transport the session can be half-disconnected.
*
@@ -1594,10 +1658,10 @@ session_transport_close (session_t * s)
if (s->session_state >= SESSION_STATE_APP_CLOSED)
{
if (s->session_state == SESSION_STATE_TRANSPORT_CLOSED)
- s->session_state = SESSION_STATE_CLOSED;
+ session_set_state (s, SESSION_STATE_CLOSED);
/* If transport is already deleted, just free the session */
else if (s->session_state >= SESSION_STATE_TRANSPORT_DELETED)
- session_free_w_fifos (s);
+ session_program_cleanup (s);
return;
}
@@ -1607,7 +1671,7 @@ session_transport_close (session_t * s)
* delete notify. This will finally lead to the complete cleanup of the
* session.
*/
- s->session_state = SESSION_STATE_APP_CLOSED;
+ session_set_state (s, SESSION_STATE_APP_CLOSED);
transport_close (session_get_transport_proto (s), s->connection_index,
s->thread_index);
@@ -1622,13 +1686,13 @@ session_transport_reset (session_t * s)
if (s->session_state >= SESSION_STATE_APP_CLOSED)
{
if (s->session_state == SESSION_STATE_TRANSPORT_CLOSED)
- s->session_state = SESSION_STATE_CLOSED;
+ session_set_state (s, SESSION_STATE_CLOSED);
else if (s->session_state >= SESSION_STATE_TRANSPORT_DELETED)
- session_free_w_fifos (s);
+ session_program_cleanup (s);
return;
}
- s->session_state = SESSION_STATE_APP_CLOSED;
+ session_set_state (s, SESSION_STATE_APP_CLOSED);
transport_reset (session_get_transport_proto (s), s->connection_index,
s->thread_index);
}
@@ -1727,14 +1791,28 @@ session_segment_handle (session_t * s)
f->segment_index);
}
-/* *INDENT-OFF* */
+void
+session_get_original_dst (transport_endpoint_t *i2o_src,
+ transport_endpoint_t *i2o_dst,
+ transport_proto_t transport_proto, u32 *original_dst,
+ u16 *original_dst_port)
+{
+ session_main_t *smm = vnet_get_session_main ();
+ ip_protocol_t proto =
+ (transport_proto == TRANSPORT_PROTO_TCP ? IPPROTO_TCP : IPPROTO_UDP);
+ if (!smm->original_dst_lookup || !i2o_dst->is_ip4)
+ return;
+ smm->original_dst_lookup (&i2o_src->ip.ip4, i2o_src->port, &i2o_dst->ip.ip4,
+ i2o_dst->port, proto, original_dst,
+ original_dst_port);
+}
+
static session_fifo_rx_fn *session_tx_fns[TRANSPORT_TX_N_FNS] = {
session_tx_fifo_peek_and_snd,
session_tx_fifo_dequeue_and_snd,
session_tx_fifo_dequeue_internal,
session_tx_fifo_dequeue_and_snd
};
-/* *INDENT-ON* */
void
session_register_transport (transport_proto_t transport_proto,
@@ -2013,6 +2091,7 @@ session_dma_completion_cb (vlib_main_t *vm, struct vlib_dma_batch *batch)
static void
session_prepare_dma_args (vlib_dma_config_t *args)
{
+ args->max_batches = 16;
args->max_transfers = DMA_TRANS_SIZE;
args->max_transfer_size = 65536;
args->features = 0;
@@ -2104,6 +2183,7 @@ session_node_enable_disable (u8 is_en)
if (!sm->poll_main)
continue;
}
+ vlib_node_set_state (vm, session_input_node.index, mstate);
vlib_node_set_state (vm, session_queue_node.index, state);
}
@@ -2147,6 +2227,8 @@ session_main_init (vlib_main_t * vm)
smm->use_private_rx_mqs = 0;
smm->no_adaptive = 0;
smm->last_transport_proto_type = TRANSPORT_PROTO_HTTP;
+ smm->port_allocator_min_src_port = 1024;
+ smm->port_allocator_max_src_port = 65535;
return 0;
}
@@ -2244,6 +2326,10 @@ session_config_fn (vlib_main_t * vm, unformat_input_t * input)
else if (unformat (input, "local-endpoints-table-buckets %d",
&smm->local_endpoints_table_buckets))
;
+ else if (unformat (input, "min-src-port %d", &tmp))
+ smm->port_allocator_min_src_port = tmp;
+ else if (unformat (input, "max-src-port %d", &tmp))
+ smm->port_allocator_max_src_port = tmp;
else if (unformat (input, "enable"))
smm->session_enable_asap = 1;
else if (unformat (input, "use-app-socket-api"))
@@ -2256,6 +2342,11 @@ session_config_fn (vlib_main_t * vm, unformat_input_t * input)
smm->no_adaptive = 1;
else if (unformat (input, "use-dma"))
smm->dma_enabled = 1;
+ else if (unformat (input, "nat44-original-dst-enable"))
+ {
+ smm->original_dst_lookup = vlib_get_plugin_symbol (
+ "nat_plugin.so", "nat44_original_dst_lookup");
+ }
/*
* Deprecated but maintained for compatibility
*/
diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h
index a68e51239bd..a5604bf8725 100644
--- a/src/vnet/session/session.h
+++ b/src/vnet/session/session.h
@@ -23,22 +23,10 @@
#include <svm/fifo_segment.h>
#include <vlib/dma/dma.h>
-#define foreach_session_input_error \
-_(NO_SESSION, "No session drops") \
-_(NO_LISTENER, "No listener for dst port drops") \
-_(ENQUEUED, "Packets pushed into rx fifo") \
-_(NOT_READY, "Session not ready packets") \
-_(FIFO_FULL, "Packets dropped for lack of rx fifo space") \
-_(EVENT_FIFO_FULL, "Events not sent for lack of event fifo space") \
-_(API_QUEUE_FULL, "Sessions not created for lack of API queue space") \
-
-typedef enum
-{
-#define _(sym,str) SESSION_ERROR_##sym,
- foreach_session_input_error
-#undef _
- SESSION_N_ERROR,
-} session_input_error_t;
+typedef struct session_wrk_stats_
+{
+ u32 errors[SESSION_N_ERRORS];
+} session_wrk_stats_t;
typedef struct session_tx_context_
{
@@ -112,8 +100,8 @@ typedef struct session_worker_
/** Convenience pointer to this worker's vlib_main */
vlib_main_t *vm;
- /** Per-proto vector of sessions to enqueue */
- u32 **session_to_enqueue;
+ /** Per-proto vector of session handles to enqueue */
+ session_handle_t **session_to_enqueue;
/** Timerfd used to periodically signal wrk session queue node */
int timerfd;
@@ -157,12 +145,12 @@ typedef struct session_worker_
/** Flag that is set if main thread signaled to handle connects */
u32 n_pending_connects;
- /** Main thread loops in poll mode without a connect */
- u32 no_connect_loops;
-
/** List head for first worker evts pending handling on main */
clib_llist_index_t evts_pending_main;
+ /** Per-app-worker bitmap of pending notifications */
+ uword *app_wrks_pending_ntf;
+
int config_index;
u8 dma_enabled;
session_dma_transfer *dma_trans;
@@ -172,6 +160,8 @@ typedef struct session_worker_
u16 batch_num;
vlib_dma_batch_t *batch;
+ session_wrk_stats_t stats;
+
#if SESSION_DEBUG
/** last event poll time by thread */
clib_time_type_t last_event_poll;
@@ -189,6 +179,10 @@ extern session_fifo_rx_fn session_tx_fifo_dequeue_internal;
u8 session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e);
typedef void (*session_update_time_fn) (f64 time_now, u8 thread_index);
+typedef void (*nat44_original_dst_lookup_fn) (
+ ip4_address_t *i2o_src, u16 i2o_src_port, ip4_address_t *i2o_dst,
+ u16 i2o_dst_port, ip_protocol_t proto, u32 *original_dst,
+ u16 *original_dst_port);
typedef struct session_main_
{
@@ -212,7 +206,9 @@ typedef struct session_main_
* Trade memory for speed, for now */
u32 *session_type_to_next;
- /** Thread for cl and ho that rely on cl allocs */
+ /** Thread used for allocating active open connections, i.e., half-opens
+ * for transports like tcp, and sessions that will be migrated for cl
+ * transports like udp. If vpp has workers, this will be first worker. */
u32 transport_cl_thread;
transport_proto_t last_transport_proto_type;
@@ -271,14 +267,22 @@ typedef struct session_main_
u32 local_endpoints_table_memory;
u32 local_endpoints_table_buckets;
+ /** Transport source port allocation range */
+ u16 port_allocator_min_src_port;
+ u16 port_allocator_max_src_port;
+
/** Preallocate session config parameter */
u32 preallocated_sessions;
u16 msg_id_base;
+
+ /** Query nat44-ed session to get original dst ip4 & dst port. */
+ nat44_original_dst_lookup_fn original_dst_lookup;
} session_main_t;
extern session_main_t session_main;
extern vlib_node_registration_t session_queue_node;
+extern vlib_node_registration_t session_input_node;
extern vlib_node_registration_t session_queue_process_node;
extern vlib_node_registration_t session_queue_pre_input_node;
@@ -334,7 +338,7 @@ session_evt_ctrl_data (session_worker_t * wrk, session_evt_elt_t * elt)
static inline void
session_evt_ctrl_data_free (session_worker_t * wrk, session_evt_elt_t * elt)
{
- ASSERT (elt->evt.event_type > SESSION_IO_EVT_BUILTIN_TX);
+ ASSERT (elt->evt.event_type >= SESSION_CTRL_EVT_RPC);
pool_put_index (wrk->ctrl_evts_data, elt->evt.ctrl_data_index);
}
@@ -362,7 +366,8 @@ int session_wrk_handle_mq (session_worker_t *wrk, svm_msg_q_t *mq);
session_t *session_alloc (u32 thread_index);
void session_free (session_t * s);
-void session_free_w_fifos (session_t * s);
+void session_cleanup (session_t *s);
+void session_program_cleanup (session_t *s);
void session_cleanup_half_open (session_handle_t ho_handle);
u8 session_is_valid (u32 si, u8 thread_index);
@@ -387,44 +392,37 @@ session_get_if_valid (u64 si, u32 thread_index)
}
always_inline session_t *
-session_get_from_handle (session_handle_t handle)
+session_get_from_handle (session_handle_tu_t handle)
{
session_main_t *smm = &session_main;
- u32 session_index, thread_index;
- session_parse_handle (handle, &session_index, &thread_index);
- return pool_elt_at_index (smm->wrk[thread_index].sessions, session_index);
+ return pool_elt_at_index (smm->wrk[handle.thread_index].sessions,
+ handle.session_index);
}
always_inline session_t *
-session_get_from_handle_if_valid (session_handle_t handle)
+session_get_from_handle_if_valid (session_handle_tu_t handle)
{
- u32 session_index, thread_index;
- session_parse_handle (handle, &session_index, &thread_index);
- return session_get_if_valid (session_index, thread_index);
+ return session_get_if_valid (handle.session_index, handle.thread_index);
}
-u64 session_segment_handle (session_t * s);
-
/**
* Get session from handle and avoid pool validation if no same thread
*
* Peekers are fine because pool grows with barrier (see @ref session_alloc)
*/
always_inline session_t *
-session_get_from_handle_safe (u64 handle)
+session_get_from_handle_safe (session_handle_tu_t handle)
{
- u32 thread_index = session_thread_from_handle (handle);
- session_worker_t *wrk = &session_main.wrk[thread_index];
+ session_worker_t *wrk = &session_main.wrk[handle.thread_index];
- if (thread_index == vlib_get_thread_index ())
+ if (handle.thread_index == vlib_get_thread_index ())
{
- return pool_elt_at_index (wrk->sessions,
- session_index_from_handle (handle));
+ return pool_elt_at_index (wrk->sessions, handle.session_index);
}
else
{
/* Don't use pool_elt_at index to avoid pool bitmap reallocs */
- return wrk->sessions + session_index_from_handle (handle);
+ return wrk->sessions + handle.session_index;
}
}
@@ -450,16 +448,19 @@ int session_stop_listen (session_t * s);
void session_half_close (session_t *s);
void session_close (session_t * s);
void session_reset (session_t * s);
+void session_detach_app (session_t *s);
void session_transport_half_close (session_t *s);
void session_transport_close (session_t * s);
void session_transport_reset (session_t * s);
void session_transport_cleanup (session_t * s);
-int session_send_io_evt_to_thread (svm_fifo_t * f,
- session_evt_type_t evt_type);
-int session_enqueue_notify (session_t * s);
+int session_enqueue_notify (session_t *s);
int session_dequeue_notify (session_t * s);
+int session_enqueue_notify_cl (session_t *s);
+int session_send_io_evt_to_thread (svm_fifo_t *f, session_evt_type_t evt_type);
int session_send_io_evt_to_thread_custom (void *data, u32 thread_index,
session_evt_type_t evt_type);
+int session_program_tx_io_evt (session_handle_tu_t sh,
+ session_evt_type_t evt_type);
void session_send_rpc_evt_to_thread (u32 thread_index, void *fp,
void *rpc_args);
void session_send_rpc_evt_to_thread_force (u32 thread_index, void *fp,
@@ -472,6 +473,7 @@ void session_get_endpoint (session_t * s, transport_endpoint_t * tep,
u8 is_lcl);
int session_transport_attribute (session_t *s, u8 is_get,
transport_endpt_attr_t *attr);
+u64 session_segment_handle (session_t *s);
u8 *format_session (u8 * s, va_list * args);
uword unformat_session (unformat_input_t * input, va_list * args);
@@ -489,6 +491,13 @@ int session_enqueue_dgram_connection (session_t * s,
session_dgram_hdr_t * hdr,
vlib_buffer_t * b, u8 proto,
u8 queue_event);
+int session_enqueue_dgram_connection2 (session_t *s, session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto,
+ u8 queue_event);
+int session_enqueue_dgram_connection_cl (session_t *s,
+ session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto,
+ u8 queue_event);
int session_stream_connect_notify (transport_connection_t * tc,
session_error_t err);
int session_dgram_connect_notify (transport_connection_t * tc,
@@ -506,6 +515,7 @@ int session_stream_accept (transport_connection_t * tc, u32 listener_index,
u32 thread_index, u8 notify);
int session_dgram_accept (transport_connection_t * tc, u32 listener_index,
u32 thread_index);
+
/**
* Initialize session layer for given transport proto and ip version
*
@@ -527,6 +537,13 @@ int session_tx_fifo_peek_bytes (transport_connection_t * tc, u8 * buffer,
u32 offset, u32 max_bytes);
u32 session_tx_fifo_dequeue_drop (transport_connection_t * tc, u32 max_bytes);
+always_inline void
+session_set_state (session_t *s, session_state_t session_state)
+{
+ s->session_state = session_state;
+ SESSION_EVT (SESSION_EVT_STATE_CHANGE, s);
+}
+
always_inline u32
transport_max_rx_enqueue (transport_connection_t * tc)
{
@@ -569,6 +586,19 @@ transport_rx_fifo_has_ooo_data (transport_connection_t * tc)
return svm_fifo_has_ooo_data (s->rx_fifo);
}
+always_inline u32
+transport_tx_fifo_has_dgram (transport_connection_t *tc)
+{
+ session_t *s = session_get (tc->s_index, tc->thread_index);
+ u32 max_deq = svm_fifo_max_dequeue_cons (s->tx_fifo);
+ session_dgram_pre_hdr_t phdr;
+
+ if (max_deq <= sizeof (session_dgram_hdr_t))
+ return 0;
+ svm_fifo_peek (s->tx_fifo, 0, sizeof (phdr), (u8 *) &phdr);
+ return max_deq >= phdr.data_length + sizeof (session_dgram_hdr_t);
+}
+
always_inline void
transport_rx_fifo_req_deq_ntf (transport_connection_t *tc)
{
@@ -609,12 +639,19 @@ transport_cl_thread (void)
return session_main.transport_cl_thread;
}
+always_inline u32
+session_vlib_thread_is_cl_thread (void)
+{
+ return (vlib_get_thread_index () == transport_cl_thread () ||
+ vlib_thread_is_main_w_barrier ());
+}
+
/*
* Listen sessions
*/
-always_inline u64
-listen_session_get_handle (session_t * s)
+always_inline session_handle_t
+listen_session_get_handle (session_t *s)
{
ASSERT (s->session_state == SESSION_STATE_LISTENING ||
session_get_transport_proto (s) == TRANSPORT_PROTO_QUIC);
@@ -661,28 +698,17 @@ always_inline session_t *
ho_session_alloc (void)
{
session_t *s;
- ASSERT (vlib_get_thread_index () == 0);
- s = session_alloc (0);
+ ASSERT (session_vlib_thread_is_cl_thread ());
+ s = session_alloc (transport_cl_thread ());
s->session_state = SESSION_STATE_CONNECTING;
s->flags |= SESSION_F_HALF_OPEN;
- /* Not ideal. Half-opens are only allocated from main with worker barrier
- * but can be cleaned up, i.e., session_half_open_free, from main without
- * a barrier. In debug images, the free_bitmap can grow while workers peek
- * the sessions pool, e.g., session_half_open_migrate_notify, and as a
- * result crash while validating the session. To avoid this, grow the bitmap
- * now. */
- if (CLIB_DEBUG)
- {
- session_t *sp = session_main.wrk[0].sessions;
- clib_bitmap_validate (pool_header (sp)->free_bitmap, s->session_index);
- }
return s;
}
always_inline session_t *
ho_session_get (u32 ho_index)
{
- return session_get (ho_index, 0 /* half-open thread */);
+ return session_get (ho_index, transport_cl_thread ());
}
always_inline void
@@ -707,7 +733,7 @@ vnet_get_session_main ()
always_inline session_worker_t *
session_main_get_worker (u32 thread_index)
{
- return &session_main.wrk[thread_index];
+ return vec_elt_at_index (session_main.wrk, thread_index);
}
static inline session_worker_t *
@@ -715,13 +741,13 @@ session_main_get_worker_if_valid (u32 thread_index)
{
if (thread_index > vec_len (session_main.wrk))
return 0;
- return &session_main.wrk[thread_index];
+ return session_main_get_worker (thread_index);
}
always_inline svm_msg_q_t *
session_main_get_vpp_event_queue (u32 thread_index)
{
- return session_main.wrk[thread_index].vpp_event_queue;
+ return session_main_get_worker (thread_index)->vpp_event_queue;
}
always_inline u8
@@ -730,14 +756,31 @@ session_main_is_enabled ()
return session_main.is_enabled == 1;
}
+always_inline void
+session_worker_stat_error_inc (session_worker_t *wrk, int error, int value)
+{
+ if ((-(error) >= 0 && -(error) < SESSION_N_ERRORS))
+ wrk->stats.errors[-error] += value;
+ else
+ SESSION_DBG ("unknown session counter");
+}
+
+always_inline void
+session_stat_error_inc (int error, int value)
+{
+ session_worker_t *wrk;
+ wrk = session_main_get_worker (vlib_get_thread_index ());
+ session_worker_stat_error_inc (wrk, error, value);
+}
+
#define session_cli_return_if_not_enabled() \
do { \
if (!session_main.is_enabled) \
return clib_error_return (0, "session layer is not enabled"); \
} while (0)
-int session_main_flush_enqueue_events (u8 proto, u32 thread_index);
-int session_main_flush_all_enqueue_events (u8 transport_proto);
+void session_main_flush_enqueue_events (transport_proto_t transport_proto,
+ u32 thread_index);
void session_queue_run_on_main_thread (vlib_main_t * vm);
/**
@@ -769,10 +812,16 @@ void session_wrk_enable_adaptive_mode (session_worker_t *wrk);
fifo_segment_t *session_main_get_wrk_mqs_segment (void);
void session_node_enable_disable (u8 is_en);
clib_error_t *vnet_session_enable_disable (vlib_main_t * vm, u8 is_en);
-void session_wrk_handle_evts_main_rpc ();
+void session_wrk_handle_evts_main_rpc (void *);
+void session_wrk_program_app_wrk_evts (session_worker_t *wrk,
+ u32 app_wrk_index);
session_t *session_alloc_for_connection (transport_connection_t * tc);
session_t *session_alloc_for_half_open (transport_connection_t *tc);
+void session_get_original_dst (transport_endpoint_t *i2o_src,
+ transport_endpoint_t *i2o_dst,
+ transport_proto_t transport_proto,
+ u32 *original_dst, u16 *original_dst_port);
typedef void (pool_safe_realloc_rpc_fn) (void *rpc_args);
@@ -813,8 +862,7 @@ pool_program_safe_realloc_rpc (void *args)
{
max_elts = _vec_max_len (*pra->pool, pra->elt_size);
n_alloc = clib_max (2 * max_elts, POOL_REALLOC_SAFE_ELT_THRESH);
- _pool_alloc (pra->pool, free_elts + n_alloc, pra->align, 0,
- pra->elt_size);
+ _pool_alloc (pra->pool, n_alloc, pra->align, 0, pra->elt_size);
}
pool_realloc_flag (*pra->pool) = 0;
clib_mem_free (args);
diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c
index 2502ef6a70a..48eb932a2c9 100644
--- a/src/vnet/session/session_api.c
+++ b/src/vnet/session/session_api.c
@@ -136,6 +136,13 @@ mq_send_session_accepted_cb (session_t * s)
m.mq_index = s->thread_index;
}
+ if (application_original_dst_is_enabled (app))
+ {
+ session_get_original_dst (&m.lcl, &m.rmt,
+ session_get_transport_proto (s),
+ &m.original_dst_ip4, &m.original_dst_port);
+ }
+
app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_ACCEPTED, &m, sizeof (m));
return 0;
@@ -205,7 +212,6 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context,
session_t * s, session_error_t err)
{
session_connected_msg_t m = { 0 };
- transport_connection_t *tc;
fifo_segment_t *eq_seg;
app_worker_t *app_wrk;
application_t *app;
@@ -223,14 +229,6 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context,
if (session_has_transport (s))
{
- tc = session_get_transport (s);
- if (!tc)
- {
- clib_warning ("failed to retrieve transport!");
- m.retval = SESSION_E_REFUSED;
- goto snd_msg;
- }
-
m.handle = session_handle (s);
m.vpp_event_queue_address =
fifo_segment_msg_q_offset (eq_seg, s->thread_index);
@@ -245,7 +243,6 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context,
else
{
ct_connection_t *cct;
- session_t *ss;
cct = (ct_connection_t *) session_get_transport (s);
m.handle = session_handle (s);
@@ -256,11 +253,10 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context,
m.server_rx_fifo = fifo_segment_fifo_offset (s->rx_fifo);
m.server_tx_fifo = fifo_segment_fifo_offset (s->tx_fifo);
m.segment_handle = session_segment_handle (s);
- ss = ct_session_get_peer (s);
- m.ct_rx_fifo = fifo_segment_fifo_offset (ss->tx_fifo);
- m.ct_tx_fifo = fifo_segment_fifo_offset (ss->rx_fifo);
- m.ct_segment_handle = session_segment_handle (ss);
m.mq_index = s->thread_index;
+ m.ct_rx_fifo = fifo_segment_fifo_offset (cct->client_rx_fifo);
+ m.ct_tx_fifo = fifo_segment_fifo_offset (cct->client_tx_fifo);
+ m.ct_segment_handle = cct->segment_handle;
}
/* Setup client session index in advance, in case data arrives
@@ -275,12 +271,12 @@ snd_msg:
return 0;
}
-int
+static int
mq_send_session_bound_cb (u32 app_wrk_index, u32 api_context,
session_handle_t handle, int rv)
{
session_bound_msg_t m = { 0 };
- transport_endpoint_t tep;
+ transport_connection_t *ltc;
fifo_segment_t *eq_seg;
app_worker_t *app_wrk;
application_t *app;
@@ -302,21 +298,24 @@ mq_send_session_bound_cb (u32 app_wrk_index, u32 api_context,
else
ls = app_listener_get_local_session (al);
- session_get_endpoint (ls, &tep, 1 /* is_lcl */);
- m.lcl_port = tep.port;
- m.lcl_is_ip4 = tep.is_ip4;
- clib_memcpy_fast (m.lcl_ip, &tep.ip, sizeof (tep.ip));
+ ltc = session_get_transport (ls);
+ m.lcl_port = ltc->lcl_port;
+ m.lcl_is_ip4 = ltc->is_ip4;
+ clib_memcpy_fast (m.lcl_ip, &ltc->lcl_ip, sizeof (m.lcl_ip));
app = application_get (app_wrk->app_index);
eq_seg = application_get_rx_mqs_segment (app);
m.vpp_evt_q = fifo_segment_msg_q_offset (eq_seg, ls->thread_index);
m.mq_index = ls->thread_index;
- if (session_transport_service_type (ls) == TRANSPORT_SERVICE_CL &&
- ls->rx_fifo)
+ if (transport_connection_is_cless (ltc))
{
- m.rx_fifo = fifo_segment_fifo_offset (ls->rx_fifo);
- m.tx_fifo = fifo_segment_fifo_offset (ls->tx_fifo);
- m.segment_handle = session_segment_handle (ls);
+ session_t *wrk_ls;
+ m.mq_index = transport_cl_thread ();
+ m.vpp_evt_q = fifo_segment_msg_q_offset (eq_seg, m.mq_index);
+ wrk_ls = app_listener_get_wrk_cl_session (al, app_wrk->wrk_map_index);
+ m.rx_fifo = fifo_segment_fifo_offset (wrk_ls->rx_fifo);
+ m.tx_fifo = fifo_segment_fifo_offset (wrk_ls->tx_fifo);
+ m.segment_handle = session_segment_handle (wrk_ls);
}
snd_msg:
@@ -326,11 +325,14 @@ snd_msg:
return 0;
}
-void
-mq_send_unlisten_reply (app_worker_t * app_wrk, session_handle_t sh,
- u32 context, int rv)
+static void
+mq_send_unlisten_cb (u32 app_wrk_index, session_handle_t sh, u32 context,
+ int rv)
{
session_unlisten_reply_msg_t m = { 0 };
+ app_worker_t *app_wrk;
+
+ app_wrk = app_worker_get (app_wrk_index);
m.context = context;
m.handle = sh;
@@ -451,6 +453,52 @@ mq_send_session_cleanup_cb (session_t * s, session_cleanup_ntf_t ntf)
app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_CLEANUP, &m, sizeof (m));
}
+static int
+mq_send_io_rx_event (session_t *s)
+{
+ session_event_t *mq_evt;
+ svm_msg_q_msg_t mq_msg;
+ app_worker_t *app_wrk;
+ svm_msg_q_t *mq;
+
+ if (svm_fifo_has_event (s->rx_fifo))
+ return 0;
+
+ app_wrk = app_worker_get (s->app_wrk_index);
+ mq = app_wrk->event_queue;
+
+ mq_msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
+ mq_evt = svm_msg_q_msg_data (mq, &mq_msg);
+
+ mq_evt->event_type = SESSION_IO_EVT_RX;
+ mq_evt->session_index = s->rx_fifo->shr->client_session_index;
+
+ (void) svm_fifo_set_event (s->rx_fifo);
+
+ svm_msg_q_add_raw (mq, &mq_msg);
+
+ return 0;
+}
+
+static int
+mq_send_io_tx_event (session_t *s)
+{
+ app_worker_t *app_wrk = app_worker_get (s->app_wrk_index);
+ svm_msg_q_t *mq = app_wrk->event_queue;
+ session_event_t *mq_evt;
+ svm_msg_q_msg_t mq_msg;
+
+ mq_msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
+ mq_evt = svm_msg_q_msg_data (mq, &mq_msg);
+
+ mq_evt->event_type = SESSION_IO_EVT_TX;
+ mq_evt->session_index = s->tx_fifo->shr->client_session_index;
+
+ svm_msg_q_add_raw (mq, &mq_msg);
+
+ return 0;
+}
+
static session_cb_vft_t session_mq_cb_vft = {
.session_accept_callback = mq_send_session_accepted_cb,
.session_disconnect_callback = mq_send_session_disconnected_cb,
@@ -458,8 +506,12 @@ static session_cb_vft_t session_mq_cb_vft = {
.session_reset_callback = mq_send_session_reset_cb,
.session_migrate_callback = mq_send_session_migrate_cb,
.session_cleanup_callback = mq_send_session_cleanup_cb,
+ .session_listened_callback = mq_send_session_bound_cb,
+ .session_unlistened_callback = mq_send_unlisten_cb,
.add_segment_callback = mq_send_add_segment_cb,
.del_segment_callback = mq_send_del_segment_cb,
+ .builtin_app_rx_callback = mq_send_io_rx_event,
+ .builtin_app_tx_callback = mq_send_io_tx_event,
};
static void
@@ -525,7 +577,8 @@ vl_api_app_attach_t_handler (vl_api_app_attach_t * mp)
if ((rv = vnet_application_attach (a)))
{
- clib_warning ("attach returned: %d", rv);
+ clib_warning ("attach returned: %U", format_session_error, rv);
+ rv = VNET_API_ERROR_UNSPECIFIED;
vec_free (a->namespace_id);
goto done;
}
@@ -567,7 +620,6 @@ vl_api_app_attach_t_handler (vl_api_app_attach_t * mp)
}
done:
- /* *INDENT-OFF* */
REPLY_MACRO3 (
VL_API_APP_ATTACH_REPLY,
((!rv) ? vec_len (((fifo_segment_t *) a->segment)->ssvm.name) : 0), ({
@@ -590,7 +642,6 @@ done:
rmp->segment_handle = clib_host_to_net_u64 (a->segment_handle);
}
}));
- /* *INDENT-ON* */
if (n_fds)
session_send_fds (reg, fds, n_fds);
@@ -632,7 +683,9 @@ vl_api_app_worker_add_del_t_handler (vl_api_app_worker_add_del_t * mp)
rv = vnet_app_worker_add_del (&args);
if (rv)
{
- clib_warning ("app worker add/del returned: %d", rv);
+ clib_warning ("app worker add/del returned: %U", format_session_error,
+ rv);
+ rv = VNET_API_ERROR_UNSPECIFIED;
goto done;
}
@@ -653,16 +706,16 @@ vl_api_app_worker_add_del_t_handler (vl_api_app_worker_add_del_t * mp)
n_fds += 1;
}
- /* *INDENT-OFF* */
done:
REPLY_MACRO3 (
VL_API_APP_WORKER_ADD_DEL_REPLY,
((!rv && mp->is_add) ? vec_len (args.segment->name) : 0), ({
rmp->is_add = mp->is_add;
- rmp->wrk_index = clib_host_to_net_u32 (args.wrk_map_index);
- rmp->segment_handle = clib_host_to_net_u64 (args.segment_handle);
+ rmp->wrk_index = mp->wrk_index;
if (!rv && mp->is_add)
{
+ rmp->wrk_index = clib_host_to_net_u32 (args.wrk_map_index);
+ rmp->segment_handle = clib_host_to_net_u64 (args.segment_handle);
rmp->app_event_queue_address =
fifo_segment_msg_q_offset ((fifo_segment_t *) args.segment, 0);
rmp->n_fds = n_fds;
@@ -674,7 +727,6 @@ done:
}
}
}));
- /* *INDENT-ON* */
if (n_fds)
session_send_fds (reg, fds, n_fds);
@@ -700,6 +752,12 @@ vl_api_application_detach_t_handler (vl_api_application_detach_t * mp)
a->app_index = app->app_index;
a->api_client_index = mp->client_index;
rv = vnet_application_detach (a);
+ if (rv)
+ {
+ clib_warning ("vnet_application_detach: %U", format_session_error,
+ rv);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
}
done:
@@ -723,7 +781,6 @@ vl_api_app_namespace_add_del_t_handler (vl_api_app_namespace_add_del_t * mp)
vnet_app_namespace_add_del_args_t args = {
.ns_id = ns_id,
- .netns = 0,
.sock_name = 0,
.secret = clib_net_to_host_u64 (mp->secret),
.sw_if_index = clib_net_to_host_u32 (mp->sw_if_index),
@@ -743,13 +800,11 @@ vl_api_app_namespace_add_del_t_handler (vl_api_app_namespace_add_del_t * mp)
}
vec_free (ns_id);
- /* *INDENT-OFF* */
done:
REPLY_MACRO2 (VL_API_APP_NAMESPACE_ADD_DEL_REPLY, ({
if (!rv)
rmp->appns_index = clib_host_to_net_u32 (appns_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -757,7 +812,7 @@ vl_api_app_namespace_add_del_v2_t_handler (
vl_api_app_namespace_add_del_v2_t *mp)
{
vl_api_app_namespace_add_del_v2_reply_t *rmp;
- u8 *ns_id = 0, *netns = 0;
+ u8 *ns_id = 0;
u32 appns_index = 0;
int rv = 0;
@@ -768,13 +823,10 @@ vl_api_app_namespace_add_del_v2_t_handler (
}
mp->namespace_id[sizeof (mp->namespace_id) - 1] = 0;
- mp->netns[sizeof (mp->netns) - 1] = 0;
ns_id = format (0, "%s", &mp->namespace_id);
- netns = format (0, "%s", &mp->netns);
vnet_app_namespace_add_del_args_t args = {
.ns_id = ns_id,
- .netns = netns,
.sock_name = 0,
.secret = clib_net_to_host_u64 (mp->secret),
.sw_if_index = clib_net_to_host_u32 (mp->sw_if_index),
@@ -793,7 +845,6 @@ vl_api_app_namespace_add_del_v2_t_handler (
}
}
vec_free (ns_id);
- vec_free (netns);
done:
REPLY_MACRO2 (VL_API_APP_NAMESPACE_ADD_DEL_V2_REPLY, ({
@@ -803,11 +854,55 @@ done:
}
static void
+vl_api_app_namespace_add_del_v4_t_handler (
+ vl_api_app_namespace_add_del_v4_t *mp)
+{
+ vl_api_app_namespace_add_del_v4_reply_t *rmp;
+ u8 *ns_id = 0, *sock_name = 0;
+ u32 appns_index = 0;
+ int rv = 0;
+ if (session_main_is_enabled () == 0)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto done;
+ }
+ mp->namespace_id[sizeof (mp->namespace_id) - 1] = 0;
+ ns_id = format (0, "%s", &mp->namespace_id);
+ sock_name = vl_api_from_api_to_new_vec (mp, &mp->sock_name);
+ vnet_app_namespace_add_del_args_t args = {
+ .ns_id = ns_id,
+ .sock_name = sock_name,
+ .secret = clib_net_to_host_u64 (mp->secret),
+ .sw_if_index = clib_net_to_host_u32 (mp->sw_if_index),
+ .ip4_fib_id = clib_net_to_host_u32 (mp->ip4_fib_id),
+ .ip6_fib_id = clib_net_to_host_u32 (mp->ip6_fib_id),
+ .is_add = mp->is_add,
+ };
+ rv = vnet_app_namespace_add_del (&args);
+ if (!rv && mp->is_add)
+ {
+ appns_index = app_namespace_index_from_id (ns_id);
+ if (appns_index == APP_NAMESPACE_INVALID_INDEX)
+ {
+ clib_warning ("app ns lookup failed id:%s", ns_id);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
+ }
+ vec_free (ns_id);
+ vec_free (sock_name);
+done:
+ REPLY_MACRO2 (VL_API_APP_NAMESPACE_ADD_DEL_V4_REPLY, ({
+ if (!rv)
+ rmp->appns_index = clib_host_to_net_u32 (appns_index);
+ }));
+}
+
+static void
vl_api_app_namespace_add_del_v3_t_handler (
vl_api_app_namespace_add_del_v3_t *mp)
{
vl_api_app_namespace_add_del_v3_reply_t *rmp;
- u8 *ns_id = 0, *netns = 0, *sock_name = 0;
+ u8 *ns_id = 0, *sock_name = 0, *api_sock_name = 0;
u32 appns_index = 0;
int rv = 0;
if (session_main_is_enabled () == 0)
@@ -816,13 +911,22 @@ vl_api_app_namespace_add_del_v3_t_handler (
goto done;
}
mp->namespace_id[sizeof (mp->namespace_id) - 1] = 0;
- mp->netns[sizeof (mp->netns) - 1] = 0;
ns_id = format (0, "%s", &mp->namespace_id);
- netns = format (0, "%s", &mp->netns);
- sock_name = vl_api_from_api_to_new_vec (mp, &mp->sock_name);
+ api_sock_name = vl_api_from_api_to_new_vec (mp, &mp->sock_name);
+ mp->netns[sizeof (mp->netns) - 1] = 0;
+ if (strlen ((char *) mp->netns) != 0)
+ {
+ sock_name =
+ format (0, "abstract:%v,netns_name=%s", api_sock_name, &mp->netns);
+ }
+ else
+ {
+ sock_name = api_sock_name;
+ api_sock_name = 0; // for vec_free
+ }
+
vnet_app_namespace_add_del_args_t args = {
.ns_id = ns_id,
- .netns = netns,
.sock_name = sock_name,
.secret = clib_net_to_host_u64 (mp->secret),
.sw_if_index = clib_net_to_host_u32 (mp->sw_if_index),
@@ -841,8 +945,8 @@ vl_api_app_namespace_add_del_v3_t_handler (
}
}
vec_free (ns_id);
- vec_free (netns);
vec_free (sock_name);
+ vec_free (api_sock_name);
done:
REPLY_MACRO2 (VL_API_APP_NAMESPACE_ADD_DEL_V3_REPLY, ({
if (!rv)
@@ -877,7 +981,10 @@ vl_api_session_rule_add_del_t_handler (vl_api_session_rule_add_del_t * mp)
rv = vnet_session_rule_add_del (&args);
if (rv)
- clib_warning ("rule add del returned: %d", rv);
+ {
+ clib_warning ("rule add del returned: %U", format_session_error, rv);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
vec_free (table_args->tag);
REPLY_MACRO (VL_API_SESSION_RULE_ADD_DEL_REPLY);
}
@@ -980,7 +1087,6 @@ send_session_rules_table_details (session_rules_table_t * srt, u8 fib_proto,
if (is_local || fib_proto == FIB_PROTOCOL_IP4)
{
u8 *tag = 0;
- /* *INDENT-OFF* */
srt16 = &srt->session_rules_tables_16;
pool_foreach (rule16, srt16->rules) {
ri = mma_rules_table_rule_index_16 (srt16, rule16);
@@ -988,12 +1094,10 @@ send_session_rules_table_details (session_rules_table_t * srt, u8 fib_proto,
send_session_rule_details4 (rule16, is_local, tp, appns_index, tag,
reg, context);
}
- /* *INDENT-ON* */
}
if (is_local || fib_proto == FIB_PROTOCOL_IP6)
{
u8 *tag = 0;
- /* *INDENT-OFF* */
srt40 = &srt->session_rules_tables_40;
pool_foreach (rule40, srt40->rules) {
ri = mma_rules_table_rule_index_40 (srt40, rule40);
@@ -1001,7 +1105,6 @@ send_session_rules_table_details (session_rules_table_t * srt, u8 fib_proto,
send_session_rule_details6 (rule40, is_local, tp, appns_index, tag,
reg, context);
}
- /* *INDENT-ON* */
}
}
@@ -1016,7 +1119,6 @@ vl_api_session_rules_dump_t_handler (vl_api_session_rules_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
session_table_foreach (st, ({
for (tp = 0; tp < TRANSPORT_N_PROTOS; tp++)
{
@@ -1026,7 +1128,6 @@ vl_api_session_rules_dump_t_handler (vl_api_session_rules_dump_t * mp)
mp->context);
}
}));
- /* *INDENT-ON* */
}
static void
@@ -1071,12 +1172,10 @@ vl_api_app_add_cert_key_pair_t_handler (vl_api_app_add_cert_key_pair_t * mp)
rv = vnet_app_add_cert_key_pair (a);
done:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_APP_ADD_CERT_KEY_PAIR_REPLY, ({
if (!rv)
rmp->index = clib_host_to_net_u32 (a->index);
}));
- /* *INDENT-ON* */
}
static void
@@ -1092,6 +1191,12 @@ vl_api_app_del_cert_key_pair_t_handler (vl_api_app_del_cert_key_pair_t * mp)
}
ckpair_index = clib_net_to_host_u32 (mp->index);
rv = vnet_app_del_cert_key_pair (ckpair_index);
+ if (rv)
+ {
+ clib_warning ("vnet_app_del_cert_key_pair: %U", format_session_error,
+ rv);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
done:
REPLY_MACRO (VL_API_APP_DEL_CERT_KEY_PAIR_REPLY);
@@ -1169,8 +1274,12 @@ static session_cb_vft_t session_mq_sapi_cb_vft = {
.session_reset_callback = mq_send_session_reset_cb,
.session_migrate_callback = mq_send_session_migrate_cb,
.session_cleanup_callback = mq_send_session_cleanup_cb,
+ .session_listened_callback = mq_send_session_bound_cb,
+ .session_unlistened_callback = mq_send_unlisten_cb,
.add_segment_callback = mq_send_add_segment_sapi_cb,
.del_segment_callback = mq_send_del_segment_sapi_cb,
+ .builtin_app_rx_callback = mq_send_io_rx_event,
+ .builtin_app_tx_callback = mq_send_io_tx_event,
};
static void
@@ -1310,7 +1419,7 @@ sapi_add_del_worker_handler (app_namespace_t * app_ns,
app = application_get_if_valid (mp->app_index);
if (!app)
{
- rv = VNET_API_ERROR_INVALID_VALUE;
+ rv = SESSION_E_INVALID;
goto done;
}
@@ -1325,7 +1434,8 @@ sapi_add_del_worker_handler (app_namespace_t * app_ns,
rv = vnet_app_worker_add_del (&args);
if (rv)
{
- clib_warning ("app worker add/del returned: %d", rv);
+ clib_warning ("app worker add/del returned: %U", format_session_error,
+ rv);
goto done;
}
@@ -1348,15 +1458,20 @@ sapi_add_del_worker_handler (app_namespace_t * app_ns,
done:
+ /* With app sock api socket expected to be closed, no reply */
+ if (!mp->is_add && appns_sapi_enabled ())
+ return;
+
msg.type = APP_SAPI_MSG_TYPE_ADD_DEL_WORKER_REPLY;
rmp = &msg.worker_add_del_reply;
rmp->retval = rv;
rmp->is_add = mp->is_add;
+ rmp->wrk_index = mp->wrk_index;
rmp->api_client_handle = sapi_handle;
- rmp->wrk_index = args.wrk_map_index;
- rmp->segment_handle = args.segment_handle;
if (!rv && mp->is_add)
{
+ rmp->wrk_index = args.wrk_map_index;
+ rmp->segment_handle = args.segment_handle;
/* No segment name and size. This supports only memfds */
rmp->app_event_queue_address =
fifo_segment_msg_q_offset ((fifo_segment_t *) args.segment, 0);
@@ -1653,27 +1768,10 @@ appns_sapi_add_ns_socket (app_namespace_t * app_ns)
clib_socket_t *cs;
char dir[4096];
- if (app_ns->netns)
- {
- if (!app_ns->sock_name)
- app_ns->sock_name = format (0, "@vpp/session/%v%c", app_ns->ns_id, 0);
- if (app_ns->sock_name[0] != '@')
- return VNET_API_ERROR_INVALID_VALUE;
- }
- else
- {
- snprintf (dir, sizeof (dir), "%s%s", vlib_unix_get_runtime_dir (),
- subdir);
- err = vlib_unix_recursive_mkdir ((char *) dir);
- if (err)
- {
- clib_error_report (err);
- return VNET_API_ERROR_SYSCALL_ERROR_1;
- }
+ snprintf (dir, sizeof (dir), "%s%s", vlib_unix_get_runtime_dir (), subdir);
- if (!app_ns->sock_name)
- app_ns->sock_name = format (0, "%s%v%c", dir, app_ns->ns_id, 0);
- }
+ if (!app_ns->sock_name)
+ app_ns->sock_name = format (0, "%s%v%c", dir, app_ns->ns_id, 0);
/*
* Create and initialize socket to listen on
@@ -1684,13 +1782,24 @@ appns_sapi_add_ns_socket (app_namespace_t * app_ns)
CLIB_SOCKET_F_ALLOW_GROUP_WRITE |
CLIB_SOCKET_F_SEQPACKET | CLIB_SOCKET_F_PASSCRED;
- if ((err = clib_socket_init_netns (cs, app_ns->netns)))
+ if (clib_socket_prefix_get_type (cs->config) == CLIB_SOCKET_TYPE_UNIX)
+ {
+ err = vlib_unix_recursive_mkdir ((char *) dir);
+ if (err)
+ {
+ clib_error_report (err);
+ return SESSION_E_SYSCALL;
+ }
+ }
+
+ if ((err = clib_socket_init (cs)))
{
clib_error_report (err);
return -1;
}
- if (!app_ns->netns && stat ((char *) app_ns->sock_name, &file_stat) == -1)
+ if (clib_socket_prefix_get_type (cs->config) == CLIB_SOCKET_TYPE_UNIX &&
+ stat ((char *) app_ns->sock_name, &file_stat) == -1)
return -1;
/*
@@ -1712,19 +1821,6 @@ appns_sapi_add_ns_socket (app_namespace_t * app_ns)
return 0;
}
-static void
-vl_api_application_tls_cert_add_t_handler (
- vl_api_application_tls_cert_add_t *mp)
-{
- /* deprecated */
-}
-
-static void
-vl_api_application_tls_key_add_t_handler (vl_api_application_tls_key_add_t *mp)
-{
- /* deprecated */
-}
-
#include <vnet/session/session.api.c>
static clib_error_t *
session_api_hookup (vlib_main_t *vm)
diff --git a/src/vnet/session/session_cli.c b/src/vnet/session/session_cli.c
index 344937c684a..569a77bccc1 100644
--- a/src/vnet/session/session_cli.c
+++ b/src/vnet/session/session_cli.c
@@ -145,8 +145,11 @@ format_session (u8 * s, va_list * args)
else if (ss->session_state == SESSION_STATE_CONNECTING)
{
if (ss->flags & SESSION_F_HALF_OPEN)
- s = format (s, "%U%v", format_transport_half_open_connection, tp,
- ss->connection_index, ss->thread_index, verbose, str);
+ {
+ s = format (s, "%U", format_transport_half_open_connection, tp,
+ ss->connection_index, ss->thread_index, verbose);
+ s = format (s, "%v", str);
+ }
else
s = format (s, "%U", format_transport_connection, tp,
ss->connection_index, ss->thread_index, verbose);
@@ -339,7 +342,6 @@ session_cli_show_all_sessions (vlib_main_t * vm, int verbose)
n_closed = 0;
- /* *INDENT-OFF* */
pool_foreach (s, pool) {
if (s->session_state >= SESSION_STATE_TRANSPORT_DELETED)
{
@@ -348,7 +350,6 @@ session_cli_show_all_sessions (vlib_main_t * vm, int verbose)
}
vlib_cli_output (vm, "%U", format_session, s, verbose);
}
- /* *INDENT-ON* */
if (!n_closed)
vlib_cli_output (vm, "Thread %d: active sessions %u", thread_index,
@@ -615,7 +616,6 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "%-" SESSION_CLI_ID_LEN "s%-24s", "Listener",
"App");
- /* *INDENT-OFF* */
pool_foreach (s, smm->wrk[0].sessions) {
if (s->session_state != SESSION_STATE_LISTENING
|| s->session_type != sst)
@@ -625,7 +625,6 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "%U%-25v%", format_session, s, 0,
app_name);
}
- /* *INDENT-ON* */
goto done;
}
@@ -655,7 +654,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_show_session_command) =
{
.path = "show session",
@@ -665,7 +663,6 @@ VLIB_CLI_COMMAND (vlib_cli_show_session_command) =
"[protos] [states] ",
.function = show_session_command_fn,
};
-/* *INDENT-ON* */
static int
clear_session (session_t * s)
@@ -717,27 +714,23 @@ clear_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (clear_all)
{
- /* *INDENT-OFF* */
vec_foreach (wrk, smm->wrk)
{
pool_foreach (session, wrk->sessions) {
clear_session (session);
}
};
- /* *INDENT-ON* */
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_session_command, static) =
{
.path = "clear session",
.short_help = "clear session thread <thread> session <index>",
.function = clear_session_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_session_fifo_trace_command_fn (vlib_main_t * vm,
@@ -780,14 +773,12 @@ show_session_fifo_trace_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_session_fifo_trace_command, static) =
{
.path = "show session fifo trace",
.short_help = "show session fifo trace <session>",
.function = show_session_fifo_trace_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
session_replay_fifo_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -827,14 +818,12 @@ session_replay_fifo_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (session_replay_fifo_trace_command, static) =
{
.path = "session replay fifo",
.short_help = "session replay fifo <session>",
.function = session_replay_fifo_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
session_enable_disable_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -859,14 +848,68 @@ session_enable_disable_fn (vlib_main_t * vm, unformat_input_t * input,
return vnet_session_enable_disable (vm, is_en);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (session_enable_disable_command, static) =
{
.path = "session",
.short_help = "session [enable|disable]",
.function = session_enable_disable_fn,
};
-/* *INDENT-ON* */
+
+static clib_error_t *
+show_session_stats_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ session_main_t *smm = &session_main;
+ session_worker_t *wrk;
+ unsigned int *e;
+
+ if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ return clib_error_return (0, "unknown input `%U'", format_unformat_error,
+ input);
+
+ vec_foreach (wrk, smm->wrk)
+ {
+ vlib_cli_output (vm, "Thread %u:\n", wrk - smm->wrk);
+ e = wrk->stats.errors;
+#define _(name, str) \
+ if (e[SESSION_EP_##name]) \
+ vlib_cli_output (vm, " %lu %s", e[SESSION_EP_##name], str);
+ foreach_session_error
+#undef _
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_session_stats_command, static) = {
+ .path = "show session stats",
+ .short_help = "show session stats",
+ .function = show_session_stats_fn,
+};
+
+static clib_error_t *
+clear_session_stats_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ session_main_t *smm = &session_main;
+ session_worker_t *wrk;
+
+ if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ return clib_error_return (0, "unknown input `%U'", format_unformat_error,
+ input);
+
+ vec_foreach (wrk, smm->wrk)
+ {
+ clib_memset (&wrk->stats, 0, sizeof (wrk->stats));
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (clear_session_stats_command, static) = {
+ .path = "clear session stats",
+ .short_help = "clear session stats",
+ .function = clear_session_stats_fn,
+};
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/session/session_debug.c b/src/vnet/session/session_debug.c
index 8e4588ecd0b..2a50adac5dd 100644
--- a/src/vnet/session/session_debug.c
+++ b/src/vnet/session/session_debug.c
@@ -52,15 +52,20 @@ show_session_dbg_clock_cycles_fn (vlib_main_t * vm, unformat_input_t * input,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_session_dbg_clock_cycles_command, static) =
{
.path = "show session dbg clock_cycles",
.short_help = "show session dbg clock_cycles",
.function = show_session_dbg_clock_cycles_fn,
};
-/* *INDENT-ON* */
+static_always_inline f64
+session_dbg_time_now (u32 thread)
+{
+ vlib_main_t *vm = vlib_get_main_by_index (thread);
+
+ return clib_time_now (&vm->clib_time) + vm->time_offset;
+}
static clib_error_t *
clear_session_dbg_clock_cycles_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -77,7 +82,7 @@ clear_session_dbg_clock_cycles_fn (vlib_main_t * vm, unformat_input_t * input,
{
sde = &session_dbg_main.wrk[thread];
clib_memset (sde, 0, sizeof (session_dbg_evts_t));
- sde->last_time = vlib_time_now (vlib_mains[thread]);
+ sde->last_time = session_dbg_time_now (thread);
sde->start_time = sde->last_time;
}
@@ -85,14 +90,12 @@ clear_session_dbg_clock_cycles_fn (vlib_main_t * vm, unformat_input_t * input,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_session_clock_cycles_command, static) =
{
.path = "clear session dbg clock_cycles",
.short_help = "clear session dbg clock_cycles",
.function = clear_session_dbg_clock_cycles_fn,
};
-/* *INDENT-ON* */
void
session_debug_init (void)
@@ -107,15 +110,99 @@ session_debug_init (void)
for (thread = 0; thread < num_threads; thread++)
{
clib_memset (&sdm->wrk[thread], 0, sizeof (session_dbg_evts_t));
- sdm->wrk[thread].start_time = vlib_time_now (vlib_mains[thread]);
+ sdm->wrk[thread].start_time = session_dbg_time_now (thread);
}
}
+
+static const char *session_evt_grp_str[] = {
+#define _(sym, str) str,
+ foreach_session_evt_grp
+#undef _
+};
+
+static void
+session_debug_show_groups (vlib_main_t *vm)
+{
+ session_dbg_main_t *sdm = &session_dbg_main;
+ int i = 0;
+
+ vlib_cli_output (vm, "%-10s%-30s%-10s", "Index", "Group", "Level");
+
+ for (i = 0; i < SESSION_EVT_N_GRP; i++)
+ vlib_cli_output (vm, "%-10d%-30s%-10d", i, session_evt_grp_str[i],
+ sdm->grp_dbg_lvl[i]);
+}
+
+static clib_error_t *
+session_debug_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ session_dbg_main_t *sdm = &session_dbg_main;
+ u32 group, level = ~0;
+ clib_error_t *error = 0;
+ u8 is_show = 0;
+ uword *bitmap = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "show"))
+ is_show = 1;
+ else if (unformat (input, "group %U", unformat_bitmap_list, &bitmap))
+ ;
+ else if (unformat (input, "level %d", &level))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ if (is_show)
+ {
+ session_debug_show_groups (vm);
+ goto done;
+ }
+ if (level == ~0)
+ {
+ vlib_cli_output (vm, "level must be entered");
+ goto done;
+ }
+
+ group = clib_bitmap_last_set (bitmap);
+ if (group == ~0)
+ {
+ vlib_cli_output (vm, "group must be entered");
+ goto done;
+ }
+ if (group >= SESSION_EVT_N_GRP)
+ {
+ vlib_cli_output (vm, "group out of bounds");
+ goto done;
+ }
+ clib_bitmap_foreach (group, bitmap)
+ sdm->grp_dbg_lvl[group] = level;
+
+done:
+
+ clib_bitmap_free (bitmap);
+ return error;
+}
+
+VLIB_CLI_COMMAND (session_debug_command, static) = {
+ .path = "session debug",
+ .short_help = "session debug {show | debug group <list> level <n>}",
+ .function = session_debug_fn,
+ .is_mp_safe = 1,
+};
+
#else
void
session_debug_init (void)
{
}
-#endif
+#endif /* SESSION_DEBUG */
void
dump_thread_0_event_queue (void)
@@ -189,7 +276,7 @@ session_node_cmp_event (session_event_t * e, svm_fifo_t * f)
case SESSION_IO_EVT_RX:
case SESSION_IO_EVT_TX:
case SESSION_IO_EVT_BUILTIN_RX:
- case SESSION_IO_EVT_BUILTIN_TX:
+ case SESSION_IO_EVT_TX_MAIN:
case SESSION_IO_EVT_TX_FLUSH:
if (e->session_index == f->shr->master_session_index)
return 1;
@@ -211,7 +298,6 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e)
session_worker_t *wrk;
int i, index, found = 0;
svm_msg_q_msg_t *msg;
- svm_msg_q_ring_t *ring;
svm_msg_q_t *mq;
u8 thread_index;
@@ -228,8 +314,7 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e)
for (i = 0; i < sq->cursize; i++)
{
msg = (svm_msg_q_msg_t *) (&sq->data[0] + sq->elsize * index);
- ring = svm_msg_q_ring (mq, msg->ring_index);
- clib_memcpy_fast (e, svm_msg_q_msg_data (mq, msg), ring->elsize);
+ clib_memcpy_fast (e, svm_msg_q_msg_data (mq, msg), sizeof (*e));
found = session_node_cmp_event (e, f);
if (found)
return 1;
@@ -239,7 +324,6 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e)
* Search pending events vector
*/
- /* *INDENT-OFF* */
clib_llist_foreach (wrk->event_elts, evt_list,
pool_elt_at_index (wrk->event_elts, wrk->new_head),
elt, ({
@@ -250,9 +334,7 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e)
goto done;
}
}));
- /* *INDENT-ON* */
- /* *INDENT-OFF* */
clib_llist_foreach (wrk->event_elts, evt_list,
pool_elt_at_index (wrk->event_elts, wrk->old_head),
elt, ({
@@ -263,7 +345,6 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e)
goto done;
}
}));
- /* *INDENT-ON* */
done:
return found;
diff --git a/src/vnet/session/session_debug.h b/src/vnet/session/session_debug.h
index 9e49a35dbe6..d433ef47fb1 100644
--- a/src/vnet/session/session_debug.h
+++ b/src/vnet/session/session_debug.h
@@ -17,49 +17,81 @@
#include <vnet/session/transport.h>
#include <vlib/vlib.h>
-
-#define foreach_session_dbg_evt \
- _(ENQ, "enqueue") \
- _(DEQ, "dequeue") \
- _(DEQ_NODE, "dequeue") \
- _(POLL_GAP_TRACK, "poll gap track") \
- _(POLL_DISPATCH_TIME, "dispatch time") \
- _(DISPATCH_START, "dispatch start") \
- _(DISPATCH_END, "dispatch end") \
- _(FREE, "session free") \
- _(DSP_CNTRS, "dispatch counters") \
- _(IO_EVT_COUNTS, "io evt counts") \
- _(EVT_COUNTS, "ctrl evt counts") \
+#include <vpp/vnet/config.h>
+
+#define foreach_session_dbg_evt \
+ _ (ENQ, DEQ_EVTS, 1, "enqueue") \
+ _ (DEQ, DEQ_EVTS, 1, "dequeue") \
+ _ (DEQ_NODE, DISPATCH_DBG, 1, "dequeue") \
+ _ (POLL_GAP_TRACK, EVT_POLL_DBG, 1, "poll gap track") \
+ _ (POLL_DISPATCH_TIME, EVT_POLL_DBG, 1, "dispatch time") \
+ _ (DISPATCH_START, CLOCKS_EVT_DBG, 1, "dispatch start") \
+ _ (DISPATCH_END, CLOCKS_EVT_DBG, 1, "dispatch end") \
+ _ (DSP_CNTRS, CLOCKS_EVT_DBG, 1, "dispatch counters") \
+ _ (STATE_CHANGE, SM, 1, "session state change") \
+ _ (FREE, SM, 1, "session free") \
+ _ (IO_EVT_COUNTS, COUNTS_EVT_DBG, 1, "io evt counts") \
+ _ (COUNTS, COUNTS_EVT_DBG, 1, "ctrl evt counts")
typedef enum _session_evt_dbg
{
-#define _(sym, str) SESSION_EVT_##sym,
+#define _(sym, grp, lvl, str) SESSION_EVT_##sym,
foreach_session_dbg_evt
#undef _
} session_evt_dbg_e;
-#define foreach_session_events \
-_(CLK_UPDATE_TIME, 1, 1, "Time Update Time") \
-_(CLK_MQ_DEQ, 1, 1, "Time MQ Dequeue") \
-_(CLK_CTRL_EVTS, 1, 1, "Time Ctrl Events") \
-_(CLK_NEW_IO_EVTS, 1, 1, "Time New IO Events") \
-_(CLK_OLD_IO_EVTS, 1, 1, "Time Old IO Events") \
-_(CLK_TOTAL, 1, 1, "Time Total in Node") \
-_(CLK_START, 1, 1, "Time Since Last Reset") \
- \
-_(CNT_MQ_EVTS, 1, 0, "# of MQ Events Processed" ) \
-_(CNT_CTRL_EVTS, 1, 0, "# of Ctrl Events Processed" ) \
-_(CNT_NEW_EVTS, 1, 0, "# of New Events Processed" ) \
-_(CNT_OLD_EVTS, 1, 0, "# of Old Events Processed" ) \
-_(CNT_IO_EVTS, 1, 0, "# of Events Processed" ) \
-_(CNT_NODE_CALL, 1, 0, "# of Node Calls") \
- \
-_(BASE_OFFSET_IO_EVTS, 0, 0, "NULL") \
-_(SESSION_IO_EVT_RX, 1, 0, "# of IO Event RX") \
-_(SESSION_IO_EVT_TX, 1, 0, "# of IO Event TX") \
-_(SESSION_IO_EVT_TX_FLUSH, 1, 0, "# of IO Event TX Flush") \
-_(SESSION_IO_EVT_BUILTIN_RX, 1, 0, "# of IO Event BuiltIn RX") \
-_(SESSION_IO_EVT_BUILTIN_TX, 1, 0, "# of IO Event BuiltIn TX") \
+typedef enum session_evt_lvl_
+{
+#define _(sym, grp, lvl, str) SESSION_EVT_##sym##_LVL = lvl,
+ foreach_session_dbg_evt
+#undef _
+} session_evt_lvl_e;
+
+#define foreach_session_evt_grp \
+ _ (DEQ_EVTS, "dequeue/enqueue events") \
+ _ (DISPATCH_DBG, "dispatch") \
+ _ (EVT_POLL_DBG, "event poll") \
+ _ (SM, "state machine") \
+ _ (CLOCKS_EVT_DBG, "clocks events") \
+ _ (COUNTS_EVT_DBG, "counts events")
+
+typedef enum session_evt_grp_
+{
+#define _(sym, str) SESSION_EVT_GRP_##sym,
+ foreach_session_evt_grp
+#undef _
+ SESSION_EVT_N_GRP
+} session_evt_grp_e;
+
+typedef enum session_evt_to_grp_
+{
+#define _(sym, grp, lvl, str) SESSION_EVT_##sym##_GRP = SESSION_EVT_GRP_##grp,
+ foreach_session_dbg_evt
+#undef _
+} session_evt_to_grp_e;
+
+#define foreach_session_events \
+ _ (CLK_UPDATE_TIME, 1, 1, "Time Update Time") \
+ _ (CLK_MQ_DEQ, 1, 1, "Time MQ Dequeue") \
+ _ (CLK_CTRL_EVTS, 1, 1, "Time Ctrl Events") \
+ _ (CLK_NEW_IO_EVTS, 1, 1, "Time New IO Events") \
+ _ (CLK_OLD_IO_EVTS, 1, 1, "Time Old IO Events") \
+ _ (CLK_TOTAL, 1, 1, "Time Total in Node") \
+ _ (CLK_START, 1, 1, "Time Since Last Reset") \
+ \
+ _ (CNT_MQ_EVTS, 1, 0, "# of MQ Events Processed") \
+ _ (CNT_CTRL_EVTS, 1, 0, "# of Ctrl Events Processed") \
+ _ (CNT_NEW_EVTS, 1, 0, "# of New Events Processed") \
+ _ (CNT_OLD_EVTS, 1, 0, "# of Old Events Processed") \
+ _ (CNT_IO_EVTS, 1, 0, "# of Events Processed") \
+ _ (CNT_NODE_CALL, 1, 0, "# of Node Calls") \
+ \
+ _ (BASE_OFFSET_IO_EVTS, 0, 0, "NULL") \
+ _ (SESSION_IO_EVT_RX, 1, 0, "# of IO Event RX") \
+ _ (SESSION_IO_EVT_TX, 1, 0, "# of IO Event TX") \
+ _ (SESSION_IO_EVT_TX_FLUSH, 1, 0, "# of IO Event TX Flush") \
+ _ (SESSION_IO_EVT_BUILTIN_RX, 1, 0, "# of IO Event BuiltIn RX") \
+ _ (SESSION_IO_EVT_TX_MAIN, 1, 0, "# of IO Event TX Main")
typedef enum
{
@@ -90,17 +122,28 @@ typedef struct session_dbg_evts_t
typedef struct session_dbg_main_
{
session_dbg_evts_t *wrk;
+ u8 grp_dbg_lvl[SESSION_EVT_N_GRP];
} session_dbg_main_t;
extern session_dbg_main_t session_dbg_main;
-#define SESSION_DEBUG 0 * (TRANSPORT_DEBUG > 0)
-#define SESSION_DEQ_EVTS (0)
-#define SESSION_DISPATCH_DBG (0)
-#define SESSION_EVT_POLL_DBG (0)
-#define SESSION_SM (0)
+#if defined VPP_SESSION_DEBUG && (TRANSPORT_DEBUG > 0)
+#define SESSION_DEBUG (1)
+#define SESSION_DEQ_EVTS (1)
+#define SESSION_DISPATCH_DBG (1)
+#define SESSION_EVT_POLL_DBG (1)
+#define SESSION_SM (1)
+#define SESSION_CLOCKS_EVT_DBG (1)
+#define SESSION_COUNTS_EVT_DBG (1)
+#else
+#define SESSION_DEBUG (0)
+#define SESSION_DEQ_EVTS (0)
+#define SESSION_DISPATCH_DBG (0)
+#define SESSION_EVT_POLL_DBG (0)
+#define SESSION_SM (0)
#define SESSION_CLOCKS_EVT_DBG (0)
#define SESSION_COUNTS_EVT_DBG (0)
+#endif
#if SESSION_DEBUG
@@ -123,17 +166,43 @@ extern session_dbg_main_t session_dbg_main;
ed = ELOG_DATA (&vlib_global_main.elog_main, _e)
#if SESSION_SM
-#define SESSION_EVT_FREE_HANDLER(_s) \
-{ \
- ELOG_TYPE_DECLARE (_e) = \
- { \
- .format = "free: idx %u", \
- .format_args = "i4", \
- }; \
- DEC_SESSION_ETD(_s, _e, 1); \
- ed->data[0] = _s->session_index; \
-}
+#define SESSION_EVT_STATE_CHANGE_HANDLER(_s) \
+ { \
+ ELOG_TYPE_DECLARE (_e) = { \
+ .format = "%s: idx %u", \
+ .format_args = "t4i4", \
+ .n_enum_strings = 12, \
+ .enum_strings = { \
+ "created", \
+ "listening", \
+ "connecting", \
+ "accepting", \
+ "ready", \
+ "opened", \
+ "transport closing", \
+ "closing", \
+ "app closed", \
+ "transport closed", \
+ "closed", \
+ "transport deleted", \
+ }, \
+ }; \
+ DEC_SESSION_ETD (_s, _e, 2); \
+ ed->data[0] = _s->session_state; \
+ ed->data[1] = _s->session_index; \
+ }
+
+#define SESSION_EVT_FREE_HANDLER(_s) \
+ { \
+ ELOG_TYPE_DECLARE (_e) = { \
+ .format = "free: idx %u", \
+ .format_args = "i4", \
+ }; \
+ DEC_SESSION_ED (_e, 1); \
+ ed->data[0] = _s->session_index; \
+ }
#else
+#define SESSION_EVT_STATE_CHANGE_HANDLER(_s)
#define SESSION_EVT_FREE_HANDLER(_s)
#endif
@@ -282,17 +351,17 @@ extern session_dbg_main_t session_dbg_main;
counters[SESS_Q_##_node_evt].u64 += _cnt; \
}
-#define SESSION_IO_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk) \
-{ \
- u8 type = SESS_Q_BASE_OFFSET_IO_EVTS + _node_evt + 1; \
- session_dbg_evts_t *sde; \
- sde = &session_dbg_main.wrk[_wrk->vm->thread_index]; \
- sde->counters[type].u64 += _cnt; \
- sde->counters[SESS_Q_CNT_IO_EVTS].u64 += _cnt ; \
-}
+#define SESSION_EVT_IO_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk) \
+ { \
+ u8 type = SESS_Q_BASE_OFFSET_IO_EVTS + _node_evt + 1; \
+ session_dbg_evts_t *sde; \
+ sde = &session_dbg_main.wrk[_wrk->vm->thread_index]; \
+ sde->counters[type].u64 += _cnt; \
+ sde->counters[SESS_Q_CNT_IO_EVTS].u64 += _cnt; \
+ }
#else
#define SESSION_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk)
-#define SESSION_IO_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk)
+#define SESSION_EVT_IO_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk)
#endif /*SESSION_COUNTS_EVT_DBG */
@@ -322,8 +391,18 @@ extern session_dbg_main_t session_dbg_main;
#define CONCAT_HELPER(_a, _b) _a##_b
#define CC(_a, _b) CONCAT_HELPER(_a, _b)
-#define SESSION_EVT(_evt, _args...) CC(_evt, _HANDLER)(_args)
-
+#define session_evt_lvl(_evt) CC (_evt, _LVL)
+#define session_evt_grp(_evt) CC (_evt, _GRP)
+#define session_evt_grp_dbg_lvl(_evt) \
+ session_dbg_main.grp_dbg_lvl[session_evt_grp (_evt)]
+#define SESSION_EVT(_evt, _args...) \
+ do \
+ { \
+ if (PREDICT_FALSE (session_evt_grp_dbg_lvl (_evt) >= \
+ session_evt_lvl (_evt))) \
+ CC (_evt, _HANDLER) (_args); \
+ } \
+ while (0)
#else
#define SESSION_EVT(_evt, _args...)
#define SESSION_DBG(_fmt, _args...)
diff --git a/src/vnet/session/session_input.c b/src/vnet/session/session_input.c
new file mode 100644
index 00000000000..73b777127fd
--- /dev/null
+++ b/src/vnet/session/session_input.c
@@ -0,0 +1,343 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/session/session.h>
+#include <vnet/session/application.h>
+
+static inline int
+mq_try_lock (svm_msg_q_t *mq)
+{
+ int rv, n_try = 0;
+
+ while (n_try < 100)
+ {
+ rv = svm_msg_q_try_lock (mq);
+ if (!rv)
+ return 0;
+ n_try += 1;
+ usleep (1);
+ }
+
+ return -1;
+}
+
+always_inline u8
+mq_event_ring_index (session_evt_type_t et)
+{
+ return (et >= SESSION_CTRL_EVT_RPC ? SESSION_MQ_CTRL_EVT_RING :
+ SESSION_MQ_IO_EVT_RING);
+}
+
+void
+app_worker_del_all_events (app_worker_t *app_wrk)
+{
+ session_worker_t *wrk;
+ session_event_t *evt;
+ u32 thread_index;
+ session_t *s;
+
+ for (thread_index = 0; thread_index < vec_len (app_wrk->wrk_evts);
+ thread_index++)
+ {
+ while (clib_fifo_elts (app_wrk->wrk_evts[thread_index]))
+ {
+ clib_fifo_sub2 (app_wrk->wrk_evts[thread_index], evt);
+ switch (evt->event_type)
+ {
+ case SESSION_CTRL_EVT_MIGRATED:
+ s = session_get (evt->session_index, thread_index);
+ transport_cleanup (session_get_transport_proto (s),
+ s->connection_index, s->thread_index);
+ session_free (s);
+ break;
+ case SESSION_CTRL_EVT_CLEANUP:
+ s = session_get (evt->as_u64[0] & 0xffffffff, thread_index);
+ if (evt->as_u64[0] >> 32 != SESSION_CLEANUP_SESSION)
+ break;
+ uword_to_pointer (evt->as_u64[1], void (*) (session_t * s)) (s);
+ break;
+ case SESSION_CTRL_EVT_HALF_CLEANUP:
+ s = ho_session_get (evt->session_index);
+ pool_put_index (app_wrk->half_open_table, s->ho_index);
+ session_free (s);
+ break;
+ default:
+ break;
+ }
+ }
+ wrk = session_main_get_worker (thread_index);
+ clib_bitmap_set (wrk->app_wrks_pending_ntf, app_wrk->wrk_index, 0);
+ }
+}
+
+always_inline int
+app_worker_flush_events_inline (app_worker_t *app_wrk, u32 thread_index,
+ u8 is_builtin)
+{
+ application_t *app = application_get (app_wrk->app_index);
+ svm_msg_q_t *mq = app_wrk->event_queue;
+ u8 ring_index, mq_is_cong;
+ session_state_t old_state;
+ session_event_t *evt;
+ u32 n_evts = 128, i;
+ session_t *s;
+ int rv;
+
+ n_evts = clib_min (n_evts, clib_fifo_elts (app_wrk->wrk_evts[thread_index]));
+
+ if (!is_builtin)
+ {
+ mq_is_cong = app_worker_mq_is_congested (app_wrk);
+ if (mq_try_lock (mq))
+ {
+ app_worker_set_mq_wrk_congested (app_wrk, thread_index);
+ return 0;
+ }
+ }
+
+ for (i = 0; i < n_evts; i++)
+ {
+ evt = clib_fifo_head (app_wrk->wrk_evts[thread_index]);
+ if (!is_builtin)
+ {
+ ring_index = mq_event_ring_index (evt->event_type);
+ if (svm_msg_q_or_ring_is_full (mq, ring_index))
+ {
+ app_worker_set_mq_wrk_congested (app_wrk, thread_index);
+ break;
+ }
+ }
+
+ switch (evt->event_type)
+ {
+ case SESSION_IO_EVT_RX:
+ s = session_get (evt->session_index, thread_index);
+ s->flags &= ~SESSION_F_RX_EVT;
+ /* Application didn't confirm accept yet */
+ if (PREDICT_FALSE (s->session_state == SESSION_STATE_ACCEPTING ||
+ s->session_state == SESSION_STATE_CONNECTING))
+ break;
+ app->cb_fns.builtin_app_rx_callback (s);
+ break;
+ /* Handle sessions that might not be on current thread */
+ case SESSION_IO_EVT_BUILTIN_RX:
+ s = session_get_from_handle_if_valid (evt->session_handle);
+ if (!s)
+ break;
+ s->flags &= ~SESSION_F_RX_EVT;
+ if (PREDICT_FALSE (s->session_state == SESSION_STATE_ACCEPTING ||
+ s->session_state == SESSION_STATE_CONNECTING))
+ break;
+ app->cb_fns.builtin_app_rx_callback (s);
+ break;
+ case SESSION_IO_EVT_TX:
+ s = session_get (evt->session_index, thread_index);
+ app->cb_fns.builtin_app_tx_callback (s);
+ break;
+ case SESSION_IO_EVT_TX_MAIN:
+ s = session_get_from_handle_if_valid (evt->session_handle);
+ if (!s)
+ break;
+ app->cb_fns.builtin_app_tx_callback (s);
+ break;
+ case SESSION_CTRL_EVT_BOUND:
+ /* No app cb function currently */
+ if (is_builtin)
+ break;
+ app->cb_fns.session_listened_callback (
+ app_wrk->wrk_index, evt->as_u64[1] >> 32, evt->session_handle,
+ evt->as_u64[1] & 0xffffffff);
+ break;
+ case SESSION_CTRL_EVT_ACCEPTED:
+ s = session_get (evt->session_index, thread_index);
+ old_state = s->session_state;
+ if (app->cb_fns.session_accept_callback (s))
+ {
+ session_detach_app (s);
+ break;
+ }
+ if (is_builtin)
+ {
+ if (old_state >= SESSION_STATE_TRANSPORT_CLOSING)
+ {
+ session_set_state (s,
+ clib_max (old_state, s->session_state));
+ if (!(s->flags & SESSION_F_APP_CLOSED))
+ app->cb_fns.session_disconnect_callback (s);
+ }
+ }
+ break;
+ case SESSION_CTRL_EVT_CONNECTED:
+ if (!(evt->as_u64[1] & 0xffffffff))
+ {
+ s = session_get (evt->session_index, thread_index);
+ old_state = s->session_state;
+ }
+ else
+ s = 0;
+ rv = app->cb_fns.session_connected_callback (
+ app_wrk->wrk_index, evt->as_u64[1] >> 32, s,
+ evt->as_u64[1] & 0xffffffff);
+ if (!s)
+ break;
+ if (rv)
+ {
+ session_detach_app (s);
+ break;
+ }
+ if (old_state >= SESSION_STATE_TRANSPORT_CLOSING)
+ {
+ session_set_state (s, clib_max (old_state, s->session_state));
+ if (!(s->flags & SESSION_F_APP_CLOSED))
+ app->cb_fns.session_disconnect_callback (s);
+ }
+ break;
+ case SESSION_CTRL_EVT_DISCONNECTED:
+ s = session_get (evt->session_index, thread_index);
+ if (!(s->flags & SESSION_F_APP_CLOSED))
+ app->cb_fns.session_disconnect_callback (s);
+ break;
+ case SESSION_CTRL_EVT_RESET:
+ s = session_get (evt->session_index, thread_index);
+ if (!(s->flags & SESSION_F_APP_CLOSED))
+ app->cb_fns.session_reset_callback (s);
+ break;
+ case SESSION_CTRL_EVT_UNLISTEN_REPLY:
+ if (is_builtin)
+ break;
+ app->cb_fns.session_unlistened_callback (
+ app_wrk->wrk_index, evt->session_handle, evt->as_u64[1] >> 32,
+ evt->as_u64[1] & 0xffffffff);
+ break;
+ case SESSION_CTRL_EVT_MIGRATED:
+ s = session_get (evt->session_index, thread_index);
+ app->cb_fns.session_migrate_callback (s, evt->as_u64[1]);
+ transport_cleanup (session_get_transport_proto (s),
+ s->connection_index, s->thread_index);
+ session_free (s);
+ /* Notify app that it has data on the new session */
+ s = session_get_from_handle (evt->as_u64[1]);
+ session_send_io_evt_to_thread (s->rx_fifo,
+ SESSION_IO_EVT_BUILTIN_RX);
+ break;
+ case SESSION_CTRL_EVT_TRANSPORT_CLOSED:
+ s = session_get (evt->session_index, thread_index);
+ /* Notification enqueued before session was refused by app */
+ if (PREDICT_FALSE (s->app_wrk_index == APP_INVALID_INDEX))
+ break;
+ if (app->cb_fns.session_transport_closed_callback)
+ app->cb_fns.session_transport_closed_callback (s);
+ break;
+ case SESSION_CTRL_EVT_CLEANUP:
+ s = session_get (evt->as_u64[0] & 0xffffffff, thread_index);
+ /* Notification enqueued before session was refused by app */
+ if (PREDICT_TRUE (s->app_wrk_index != APP_INVALID_INDEX))
+ {
+ if (app->cb_fns.session_cleanup_callback)
+ app->cb_fns.session_cleanup_callback (s, evt->as_u64[0] >> 32);
+ }
+ if (evt->as_u64[0] >> 32 != SESSION_CLEANUP_SESSION)
+ break;
+ uword_to_pointer (evt->as_u64[1], void (*) (session_t * s)) (s);
+ break;
+ case SESSION_CTRL_EVT_HALF_CLEANUP:
+ s = ho_session_get (evt->session_index);
+ ASSERT (session_vlib_thread_is_cl_thread ());
+ if (app->cb_fns.half_open_cleanup_callback)
+ app->cb_fns.half_open_cleanup_callback (s);
+ pool_put_index (app_wrk->half_open_table, s->ho_index);
+ session_free (s);
+ break;
+ case SESSION_CTRL_EVT_APP_ADD_SEGMENT:
+ app->cb_fns.add_segment_callback (app_wrk->wrk_index,
+ evt->as_u64[1]);
+ break;
+ case SESSION_CTRL_EVT_APP_DEL_SEGMENT:
+ app->cb_fns.del_segment_callback (app_wrk->wrk_index,
+ evt->as_u64[1]);
+ break;
+ default:
+ clib_warning ("unexpected event: %u", evt->event_type);
+ ASSERT (0);
+ break;
+ }
+ clib_fifo_advance_head (app_wrk->wrk_evts[thread_index], 1);
+ }
+
+ if (!is_builtin)
+ {
+ svm_msg_q_unlock (mq);
+ if (mq_is_cong && i == n_evts)
+ app_worker_unset_wrk_mq_congested (app_wrk, thread_index);
+ }
+
+ return 0;
+}
+
+int
+app_wrk_flush_wrk_events (app_worker_t *app_wrk, u32 thread_index)
+{
+ if (app_worker_application_is_builtin (app_wrk))
+ return app_worker_flush_events_inline (app_wrk, thread_index,
+ 1 /* is_builtin */);
+ else
+ return app_worker_flush_events_inline (app_wrk, thread_index,
+ 0 /* is_builtin */);
+}
+
+static inline int
+session_wrk_flush_events (session_worker_t *wrk)
+{
+ app_worker_t *app_wrk;
+ uword app_wrk_index;
+ u32 thread_index;
+
+ thread_index = wrk->vm->thread_index;
+ app_wrk_index = clib_bitmap_first_set (wrk->app_wrks_pending_ntf);
+
+ while (app_wrk_index != ~0)
+ {
+ app_wrk = app_worker_get_if_valid (app_wrk_index);
+ /* app_wrk events are flushed on free, so should be valid here */
+ ASSERT (app_wrk != 0);
+ app_wrk_flush_wrk_events (app_wrk, thread_index);
+
+ if (!clib_fifo_elts (app_wrk->wrk_evts[thread_index]))
+ clib_bitmap_set (wrk->app_wrks_pending_ntf, app_wrk->wrk_index, 0);
+
+ app_wrk_index =
+ clib_bitmap_next_set (wrk->app_wrks_pending_ntf, app_wrk_index + 1);
+ }
+
+ if (!clib_bitmap_is_zero (wrk->app_wrks_pending_ntf))
+ vlib_node_set_interrupt_pending (wrk->vm, session_input_node.index);
+
+ return 0;
+}
+
+VLIB_NODE_FN (session_input_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ u32 thread_index = vm->thread_index;
+ session_worker_t *wrk;
+
+ wrk = session_main_get_worker (thread_index);
+ session_wrk_flush_events (wrk);
+
+ return 0;
+}
+
+VLIB_REGISTER_NODE (session_input_node) = {
+ .name = "session-input",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_DISABLED,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */ \ No newline at end of file
diff --git a/src/vnet/session/session_lookup.c b/src/vnet/session/session_lookup.c
index 68f98d0f046..9d028dbb28c 100644
--- a/src/vnet/session/session_lookup.c
+++ b/src/vnet/session/session_lookup.c
@@ -29,13 +29,14 @@
#include <vnet/session/session.h>
#include <vnet/session/application.h>
+static session_lookup_main_t sl_main;
+
/**
* Network namespace index (i.e., fib index) to session lookup table. We
* should have one per network protocol type but for now we only support IP4/6
*/
static u32 *fib_index_to_table_index[2];
-/* *INDENT-OFF* */
/* 16 octets */
typedef CLIB_PACKED (struct {
union
@@ -72,7 +73,6 @@ typedef CLIB_PACKED (struct {
u64 as_u64[6];
};
}) v6_connection_key_t;
-/* *INDENT-ON* */
typedef clib_bihash_kv_16_8_t session_kv4_t;
typedef clib_bihash_kv_48_8_t session_kv6_t;
@@ -155,29 +155,70 @@ make_v6_ss_kv_from_tc (session_kv6_t * kv, transport_connection_t * tc)
tc->rmt_port, tc->proto);
}
+static inline u8
+session_table_alloc_needs_sync (void)
+{
+ return !vlib_thread_is_main_w_barrier () && (vlib_num_workers () > 1);
+}
+
+static_always_inline u8
+session_table_is_alloced (u8 fib_proto, u32 fib_index)
+{
+ return (vec_len (fib_index_to_table_index[fib_proto]) > fib_index &&
+ fib_index_to_table_index[fib_proto][fib_index] != ~0);
+}
+
static session_table_t *
session_table_get_or_alloc (u8 fib_proto, u32 fib_index)
{
session_table_t *st;
u32 table_index;
+
ASSERT (fib_index != ~0);
- if (vec_len (fib_index_to_table_index[fib_proto]) > fib_index &&
- fib_index_to_table_index[fib_proto][fib_index] != ~0)
+
+ if (session_table_is_alloced (fib_proto, fib_index))
{
table_index = fib_index_to_table_index[fib_proto][fib_index];
return session_table_get (table_index);
}
+
+ u8 needs_sync = session_table_alloc_needs_sync ();
+ session_lookup_main_t *slm = &sl_main;
+
+ /* Stop workers, otherwise consumers might be affected. This is
+ * acceptable because new tables should seldom be allocated */
+ if (needs_sync)
+ {
+ vlib_workers_sync ();
+
+ /* We might have a race, only one worker allowed at once */
+ clib_spinlock_lock (&slm->st_alloc_lock);
+ }
+
+ /* Another worker just allocated this table */
+ if (session_table_is_alloced (fib_proto, fib_index))
+ {
+ table_index = fib_index_to_table_index[fib_proto][fib_index];
+ st = session_table_get (table_index);
+ }
else
{
st = session_table_alloc ();
- table_index = session_table_index (st);
+ st->active_fib_proto = fib_proto;
+ session_table_init (st, fib_proto);
vec_validate_init_empty (fib_index_to_table_index[fib_proto], fib_index,
~0);
+ table_index = session_table_index (st);
fib_index_to_table_index[fib_proto][fib_index] = table_index;
- st->active_fib_proto = fib_proto;
- session_table_init (st, fib_proto);
- return st;
}
+
+ if (needs_sync)
+ {
+ clib_spinlock_unlock (&slm->st_alloc_lock);
+ vlib_workers_continue ();
+ }
+
+ return st;
}
static session_table_t *
@@ -1311,8 +1352,8 @@ session_lookup_connection (u32 fib_index, ip46_address_t * lcl,
lcl_port, rmt_port, proto);
}
-int
-vnet_session_rule_add_del (session_rule_add_del_args_t * args)
+session_error_t
+vnet_session_rule_add_del (session_rule_add_del_args_t *args)
{
app_namespace_t *app_ns = app_namespace_get (args->appns_index);
session_rules_table_t *srt;
@@ -1322,14 +1363,14 @@ vnet_session_rule_add_del (session_rule_add_del_args_t * args)
int rv = 0;
if (!app_ns)
- return VNET_API_ERROR_APP_INVALID_NS;
+ return SESSION_E_INVALID_NS;
if (args->scope > 3)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
if (args->transport_proto != TRANSPORT_PROTO_TCP
&& args->transport_proto != TRANSPORT_PROTO_UDP)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
if ((args->scope & SESSION_RULE_SCOPE_GLOBAL) || args->scope == 0)
{
@@ -1569,7 +1610,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (session_rule_command, static) =
{
.path = "session rule",
@@ -1577,7 +1617,6 @@ VLIB_CLI_COMMAND (session_rule_command, static) =
"<lcl-ip/plen> <lcl-port> <rmt-ip/plen> <rmt-port> action <action>",
.function = session_rule_command_fn,
};
-/* *INDENT-ON* */
void
session_lookup_dump_rules_table (u32 fib_index, u8 fib_proto,
@@ -1700,7 +1739,6 @@ show_session_rules_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_session_rules_command, static) =
{
.path = "show session rules",
@@ -1708,11 +1746,93 @@ VLIB_CLI_COMMAND (show_session_rules_command, static) =
"<lcl-port> <rmt-ip/plen> <rmt-port> scope <scope>]",
.function = show_session_rules_command_fn,
};
-/* *INDENT-ON* */
+
+u8 *
+format_session_lookup_tables (u8 *s, va_list *args)
+{
+ u32 fib_proto = va_arg (*args, u32);
+ u32 *fibs, num_fibs = 0, fib_index, indent;
+ session_table_t *st;
+ u64 total_mem = 0;
+
+ fibs = fib_index_to_table_index[fib_proto];
+
+ for (fib_index = 0; fib_index < vec_len (fibs); fib_index++)
+ {
+ if (fibs[fib_index] == ~0)
+ continue;
+
+ num_fibs += 1;
+ st = session_table_get (fibs[fib_index]);
+ total_mem += session_table_memory_size (st);
+ }
+
+ indent = format_get_indent (s);
+ s = format (s, "active fibs:\t%u\n", num_fibs);
+ s = format (s, "%Umax fib-index:\t%u\n", format_white_space, indent,
+ vec_len (fibs) - 1);
+ s = format (s, "%Utable memory:\t%U\n", format_white_space, indent,
+ format_memory_size, total_mem);
+ s = format (s, "%Uvec memory:\t%U\n", format_white_space, indent,
+ format_memory_size, vec_mem_size (fibs));
+
+ return s;
+}
+
+static clib_error_t *
+show_session_lookup_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ session_table_t *st;
+ u32 fib_index = ~0;
+
+ session_cli_return_if_not_enabled ();
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table %u", &fib_index))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (fib_index != ~0)
+ {
+ st = session_table_get_for_fib_index (FIB_PROTOCOL_IP4, fib_index);
+ if (st)
+ vlib_cli_output (vm, "%U", format_session_table, st);
+ else
+ vlib_cli_output (vm, "no ip4 table for fib-index %u", fib_index);
+ st = session_table_get_for_fib_index (FIB_PROTOCOL_IP6, fib_index);
+ if (st)
+ vlib_cli_output (vm, "%U", format_session_table, st);
+ else
+ vlib_cli_output (vm, "no ip6 table for fib-index %u", fib_index);
+ goto done;
+ }
+
+ vlib_cli_output (vm, "ip4 fib lookup tables:\n %U",
+ format_session_lookup_tables, FIB_PROTOCOL_IP4);
+ vlib_cli_output (vm, "ip6 fib lookup tables:\n %U",
+ format_session_lookup_tables, FIB_PROTOCOL_IP6);
+
+done:
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_session_lookup_command, static) = {
+ .path = "show session lookup",
+ .short_help = "show session lookup [table <fib-index>]",
+ .function = show_session_lookup_command_fn,
+};
void
session_lookup_init (void)
{
+ session_lookup_main_t *slm = &sl_main;
+
+ clib_spinlock_init (&slm->st_alloc_lock);
+
/*
* Allocate default table and map it to fib_index 0
*/
diff --git a/src/vnet/session/session_lookup.h b/src/vnet/session/session_lookup.h
index c1037dff8c9..f9ffc15165a 100644
--- a/src/vnet/session/session_lookup.h
+++ b/src/vnet/session/session_lookup.h
@@ -29,6 +29,11 @@ typedef enum session_lookup_result_
SESSION_LOOKUP_RESULT_FILTERED
} session_lookup_result_t;
+typedef struct session_lookup_main_
+{
+ clib_spinlock_t st_alloc_lock;
+} session_lookup_main_t;
+
session_t *session_lookup_safe4 (u32 fib_index, ip4_address_t * lcl,
ip4_address_t * rmt, u16 lcl_port,
u16 rmt_port, u8 proto);
@@ -130,7 +135,7 @@ typedef struct _session_rule_add_del_args
u8 transport_proto;
} session_rule_add_del_args_t;
-int vnet_session_rule_add_del (session_rule_add_del_args_t * args);
+session_error_t vnet_session_rule_add_del (session_rule_add_del_args_t *args);
void session_lookup_set_tables_appns (app_namespace_t * app_ns);
void session_lookup_init (void);
diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c
index 1908a58f08f..0ec158fb429 100644
--- a/src/vnet/session/session_node.c
+++ b/src/vnet/session/session_node.c
@@ -139,13 +139,17 @@ session_mq_listen_handler (session_worker_t *wrk, session_evt_elt_t *elt)
a->sep_ext.ext_cfg = session_mq_get_ext_config (app, mp->ext_config);
if ((rv = vnet_listen (a)))
- clib_warning ("listen returned: %U", format_session_error, rv);
+ session_worker_stat_error_inc (wrk, rv, 1);
app_wrk = application_get_worker (app, mp->wrk_index);
- mq_send_session_bound_cb (app_wrk->wrk_index, mp->context, a->handle, rv);
+ app_worker_listened_notify (app_wrk, a->handle, mp->context, rv);
if (mp->ext_config)
session_mq_free_ext_config (app, mp->ext_config);
+
+ /* Make sure events are flushed before releasing barrier, to avoid
+ * potential race with accept. */
+ app_wrk_flush_wrk_events (app_wrk, 0);
}
static void
@@ -170,7 +174,8 @@ session_mq_listen_uri_handler (session_worker_t *wrk, session_evt_elt_t *elt)
rv = vnet_bind_uri (a);
app_wrk = application_get_worker (app, 0);
- mq_send_session_bound_cb (app_wrk->wrk_index, mp->context, a->handle, rv);
+ app_worker_listened_notify (app_wrk, a->handle, mp->context, rv);
+ app_wrk_flush_wrk_events (app_wrk, 0);
}
static void
@@ -178,6 +183,7 @@ session_mq_connect_one (session_connect_msg_t *mp)
{
vnet_connect_args_t _a, *a = &_a;
app_worker_t *app_wrk;
+ session_worker_t *wrk;
application_t *app;
int rv;
@@ -211,9 +217,10 @@ session_mq_connect_one (session_connect_msg_t *mp)
if ((rv = vnet_connect (a)))
{
- clib_warning ("connect returned: %U", format_session_error, rv);
+ wrk = session_main_get_worker (vlib_get_thread_index ());
+ session_worker_stat_error_inc (wrk, rv, 1);
app_wrk = application_get_worker (app, mp->wrk_index);
- mq_send_session_connected_cb (app_wrk->wrk_index, mp->context, 0, rv);
+ app_worker_connect_notify (app_wrk, 0, rv, mp->context);
}
if (mp->ext_config)
@@ -224,23 +231,20 @@ static void
session_mq_handle_connects_rpc (void *arg)
{
u32 max_connects = 32, n_connects = 0;
- vlib_main_t *vm = vlib_get_main ();
session_evt_elt_t *he, *elt, *next;
- session_worker_t *fwrk, *wrk;
+ session_worker_t *fwrk;
- ASSERT (vlib_get_thread_index () == 0);
+ ASSERT (session_vlib_thread_is_cl_thread ());
/* Pending connects on linked list pertaining to first worker */
- fwrk = session_main_get_worker (1);
+ fwrk = session_main_get_worker (transport_cl_thread ());
if (!fwrk->n_pending_connects)
- goto update_state;
-
- vlib_worker_thread_barrier_sync (vm);
+ return;
he = clib_llist_elt (fwrk->event_elts, fwrk->pending_connects);
elt = clib_llist_next (fwrk->event_elts, evt_list, he);
- /* Avoid holding the barrier for too long */
+ /* Avoid holding the worker for too long */
while (n_connects < max_connects && elt != he)
{
next = clib_llist_next (fwrk->event_elts, evt_list, elt);
@@ -254,45 +258,10 @@ session_mq_handle_connects_rpc (void *arg)
/* Decrement with worker barrier */
fwrk->n_pending_connects -= n_connects;
-
- vlib_worker_thread_barrier_release (vm);
-
-update_state:
-
- /* Switch worker to poll mode if it was in interrupt mode and had work or
- * back to interrupt if threshold of loops without a connect is passed.
- * While in poll mode, reprogram connects rpc */
- wrk = session_main_get_worker (0);
- if (wrk->state != SESSION_WRK_POLLING)
- {
- if (n_connects)
- {
- session_wrk_set_state (wrk, SESSION_WRK_POLLING);
- vlib_node_set_state (vm, session_queue_node.index,
- VLIB_NODE_STATE_POLLING);
- wrk->no_connect_loops = 0;
- }
- }
- else
+ if (fwrk->n_pending_connects > 0)
{
- if (!n_connects)
- {
- if (++wrk->no_connect_loops > 1e5)
- {
- session_wrk_set_state (wrk, SESSION_WRK_INTERRUPT);
- vlib_node_set_state (vm, session_queue_node.index,
- VLIB_NODE_STATE_INTERRUPT);
- }
- }
- else
- wrk->no_connect_loops = 0;
- }
-
- if (wrk->state == SESSION_WRK_POLLING)
- {
- elt = session_evt_alloc_ctrl (wrk);
- elt->evt.event_type = SESSION_CTRL_EVT_RPC;
- elt->evt.rpc_args.fp = session_mq_handle_connects_rpc;
+ session_send_rpc_evt_to_thread_force (fwrk->vm->thread_index,
+ session_mq_handle_connects_rpc, 0);
}
}
@@ -302,20 +271,28 @@ session_mq_connect_handler (session_worker_t *wrk, session_evt_elt_t *elt)
u32 thread_index = wrk - session_main.wrk;
session_evt_elt_t *he;
- /* No workers, so just deal with the connect now */
- if (PREDICT_FALSE (!thread_index))
+ if (PREDICT_FALSE (thread_index > transport_cl_thread ()))
{
- session_mq_connect_one (session_evt_ctrl_data (wrk, elt));
+ clib_warning ("Connect on wrong thread. Dropping");
return;
}
- if (PREDICT_FALSE (thread_index != 1))
+ /* If on worker, check if main has any pending messages. Avoids reordering
+ * with other control messages that need to be handled by main
+ */
+ if (thread_index)
{
- clib_warning ("Connect on wrong thread. Dropping");
- return;
+ he = clib_llist_elt (wrk->event_elts, wrk->evts_pending_main);
+
+ /* Events pending on main, postpone to avoid reordering */
+ if (!clib_llist_is_empty (wrk->event_elts, evt_list, he))
+ {
+ clib_llist_add_tail (wrk->event_elts, evt_list, elt, he);
+ return;
+ }
}
- /* Add to pending list to be handled by main thread */
+ /* Add to pending list to be handled by first worker */
he = clib_llist_elt (wrk->event_elts, wrk->pending_connects);
clib_llist_add_tail (wrk->event_elts, evt_list, elt, he);
@@ -323,9 +300,8 @@ session_mq_connect_handler (session_worker_t *wrk, session_evt_elt_t *elt)
wrk->n_pending_connects += 1;
if (wrk->n_pending_connects == 1)
{
- vlib_node_set_interrupt_pending (vlib_get_main_by_index (0),
- session_queue_node.index);
- session_send_rpc_evt_to_thread (0, session_mq_handle_connects_rpc, 0);
+ session_send_rpc_evt_to_thread_force (thread_index,
+ session_mq_handle_connects_rpc, 0);
}
}
@@ -351,9 +327,9 @@ session_mq_connect_uri_handler (session_worker_t *wrk, session_evt_elt_t *elt)
a->app_index = app->app_index;
if ((rv = vnet_connect_uri (a)))
{
- clib_warning ("connect_uri returned: %d", rv);
+ session_worker_stat_error_inc (wrk, rv, 1);
app_wrk = application_get_worker (app, 0 /* default wrk only */ );
- mq_send_session_connected_cb (app_wrk->wrk_index, mp->context, 0, rv);
+ app_worker_connect_notify (app_wrk, 0, rv, mp->context);
}
}
@@ -433,13 +409,13 @@ session_mq_unlisten_handler (session_worker_t *wrk, session_evt_elt_t *elt)
a->wrk_map_index = mp->wrk_index;
if ((rv = vnet_unlisten (a)))
- clib_warning ("unlisten returned: %d", rv);
+ session_worker_stat_error_inc (wrk, rv, 1);
app_wrk = application_get_worker (app, a->wrk_map_index);
if (!app_wrk)
return;
- mq_send_unlisten_reply (app_wrk, sh, mp->context, rv);
+ app_worker_unlisten_reply (app_wrk, sh, mp->context, rv);
}
static void
@@ -480,28 +456,29 @@ session_mq_accepted_reply_handler (session_worker_t *wrk,
a->app_index = mp->context;
a->handle = mp->handle;
vnet_disconnect_session (a);
+ s->app_wrk_index = SESSION_INVALID_INDEX;
return;
}
/* Special handling for cut-through sessions */
if (!session_has_transport (s))
{
- s->session_state = SESSION_STATE_READY;
+ session_set_state (s, SESSION_STATE_READY);
ct_session_connect_notify (s, SESSION_E_NONE);
return;
}
old_state = s->session_state;
- s->session_state = SESSION_STATE_READY;
+ session_set_state (s, SESSION_STATE_READY);
if (!svm_fifo_is_empty_prod (s->rx_fifo))
- app_worker_lock_and_send_event (app_wrk, s, SESSION_IO_EVT_RX);
+ app_worker_rx_notify (app_wrk, s);
/* Closed while waiting for app to reply. Resend disconnect */
if (old_state >= SESSION_STATE_TRANSPORT_CLOSING)
{
app_worker_close_notify (app_wrk, s);
- s->session_state = old_state;
+ session_set_state (s, old_state);
return;
}
}
@@ -514,15 +491,13 @@ session_mq_reset_reply_handler (void *data)
app_worker_t *app_wrk;
session_t *s;
application_t *app;
- u32 index, thread_index;
mp = (session_reset_reply_msg_t *) data;
app = application_lookup (mp->context);
if (!app)
return;
- session_parse_handle (mp->handle, &index, &thread_index);
- s = session_get_if_valid (index, thread_index);
+ s = session_get_from_handle_if_valid (mp->handle);
/* No session or not the right session */
if (!s || s->session_state < SESSION_STATE_TRANSPORT_CLOSING)
@@ -632,6 +607,7 @@ session_mq_worker_update_handler (void *data)
session_event_t *evt;
session_t *s;
application_t *app;
+ int rv;
app = application_lookup (mp->client_index);
if (!app)
@@ -668,7 +644,9 @@ session_mq_worker_update_handler (void *data)
return;
}
- app_worker_own_session (app_wrk, s);
+ rv = app_worker_own_session (app_wrk, s);
+ if (rv)
+ session_stat_error_inc (rv, 1);
/*
* Send reply
@@ -695,7 +673,7 @@ session_mq_worker_update_handler (void *data)
session_send_io_evt_to_thread (s->tx_fifo, SESSION_IO_EVT_TX);
if (s->rx_fifo && !svm_fifo_is_empty (s->rx_fifo))
- app_worker_lock_and_send_event (app_wrk, s, SESSION_IO_EVT_RX);
+ app_worker_rx_notify (app_wrk, s);
if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING)
app_worker_close_notify (app_wrk, s);
@@ -812,6 +790,9 @@ session_wrk_handle_evts_main_rpc (void *args)
case SESSION_CTRL_EVT_ACCEPTED_REPLY:
session_mq_accepted_reply_handler (fwrk, elt);
break;
+ case SESSION_CTRL_EVT_CONNECT:
+ session_mq_connect_handler (fwrk, elt);
+ break;
default:
clib_warning ("unhandled %u", elt->evt.event_type);
ALWAYS_ASSERT (0);
@@ -820,8 +801,11 @@ session_wrk_handle_evts_main_rpc (void *args)
/* Regrab element in case pool moved */
elt = clib_llist_elt (fwrk->event_elts, ei);
- session_evt_ctrl_data_free (fwrk, elt);
- clib_llist_put (fwrk->event_elts, elt);
+ if (!clib_llist_elt_is_linked (elt, evt_list))
+ {
+ session_evt_ctrl_data_free (fwrk, elt);
+ clib_llist_put (fwrk->event_elts, elt);
+ }
ei = next_ei;
}
@@ -1125,8 +1109,8 @@ session_tx_fill_buffer (session_worker_t *wrk, session_tx_context_t *ctx,
if (transport_connection_is_cless (ctx->tc))
{
- ip_copy (&ctx->tc->rmt_ip, &hdr->rmt_ip, ctx->tc->is_ip4);
- ctx->tc->rmt_port = hdr->rmt_port;
+ clib_memcpy_fast (data0 - sizeof (session_dgram_hdr_t), hdr,
+ sizeof (*hdr));
}
hdr->data_offset += n_bytes_read;
if (hdr->data_offset == hdr->data_length)
@@ -1188,6 +1172,11 @@ session_tx_not_ready (session_t * s, u8 peek_data)
return 2;
}
}
+ else
+ {
+ if (s->session_state == SESSION_STATE_TRANSPORT_DELETED)
+ return 2;
+ }
return 0;
}
@@ -1244,9 +1233,28 @@ session_tx_set_dequeue_params (vlib_main_t * vm, session_tx_context_t * ctx,
svm_fifo_peek (ctx->s->tx_fifo, 0, sizeof (ctx->hdr),
(u8 *) & ctx->hdr);
+ /* Zero length dgrams not supported */
+ if (PREDICT_FALSE (ctx->hdr.data_length == 0))
+ {
+ svm_fifo_dequeue_drop (ctx->s->tx_fifo, sizeof (ctx->hdr));
+ ctx->max_len_to_snd = 0;
+ return;
+ }
+ /* We cannot be sure apps have not enqueued incomplete dgrams */
+ if (PREDICT_FALSE (ctx->max_dequeue <
+ ctx->hdr.data_length + sizeof (ctx->hdr)))
+ {
+ ctx->max_len_to_snd = 0;
+ return;
+ }
ASSERT (ctx->hdr.data_length > ctx->hdr.data_offset);
len = ctx->hdr.data_length - ctx->hdr.data_offset;
+ if (ctx->hdr.gso_size)
+ {
+ ctx->sp.snd_mss = clib_min (ctx->sp.snd_mss, ctx->hdr.gso_size);
+ }
+
/* Process multiple dgrams if smaller than min (buf_space, mss).
* This avoids handling multiple dgrams if they require buffer
* chains */
@@ -1266,11 +1274,13 @@ session_tx_set_dequeue_params (vlib_main_t * vm, session_tx_context_t * ctx,
{
svm_fifo_peek (ctx->s->tx_fifo, offset, sizeof (ctx->hdr),
(u8 *) & hdr);
- ASSERT (hdr.data_length > hdr.data_offset);
dgram_len = hdr.data_length - hdr.data_offset;
- if (len + dgram_len > ctx->max_dequeue
- || first_dgram_len != dgram_len)
+ if (offset + sizeof (hdr) + hdr.data_length >
+ ctx->max_dequeue ||
+ first_dgram_len != dgram_len)
break;
+ /* Assert here to allow test above with zero length dgrams */
+ ASSERT (hdr.data_length > hdr.data_offset);
len += dgram_len;
offset += sizeof (hdr) + hdr.data_length;
}
@@ -1408,9 +1418,12 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
ctx->sp.max_burst_size = max_burst;
n_custom_tx = ctx->transport_vft->custom_tx (ctx->tc, &ctx->sp);
*n_tx_packets += n_custom_tx;
- if (PREDICT_FALSE
- (ctx->s->session_state >= SESSION_STATE_TRANSPORT_CLOSED))
- return SESSION_TX_OK;
+ if (PREDICT_FALSE (ctx->s->session_state >=
+ SESSION_STATE_TRANSPORT_CLOSED))
+ {
+ svm_fifo_unset_event (ctx->s->tx_fifo);
+ return SESSION_TX_OK;
+ }
max_burst -= n_custom_tx;
if (!max_burst || (ctx->s->flags & SESSION_F_CUSTOM_TX))
{
@@ -1552,7 +1565,7 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
*n_tx_packets += ctx->n_segs_per_evt;
SESSION_EVT (SESSION_EVT_DEQ, ctx->s, ctx->max_len_to_snd, ctx->max_dequeue,
- ctx->s->tx_fifo->has_event, wrk->last_vlib_time);
+ ctx->s->tx_fifo->shr->has_event, wrk->last_vlib_time);
ASSERT (ctx->left_to_snd == 0);
@@ -1597,9 +1610,12 @@ session_tx_fifo_dequeue_internal (session_worker_t * wrk,
{
transport_send_params_t *sp = &wrk->ctx.sp;
session_t *s = wrk->ctx.s;
+ clib_llist_index_t ei;
u32 n_packets;
- if (PREDICT_FALSE (s->session_state >= SESSION_STATE_TRANSPORT_CLOSED))
+ if (PREDICT_FALSE ((s->session_state >= SESSION_STATE_TRANSPORT_CLOSED) ||
+ (s->session_state == SESSION_STATE_CONNECTING &&
+ (s->flags & SESSION_F_HALF_OPEN))))
return 0;
/* Clear custom-tx flag used to request reschedule for tx */
@@ -1610,9 +1626,14 @@ session_tx_fifo_dequeue_internal (session_worker_t * wrk,
sp->max_burst_size = clib_min (SESSION_NODE_FRAME_SIZE - *n_tx_packets,
TRANSPORT_PACER_MAX_BURST_PKTS);
+ /* Grab elt index since app transports can enqueue events on tx */
+ ei = clib_llist_entry_index (wrk->event_elts, elt);
+
n_packets = transport_custom_tx (session_get_transport_proto (s), s, sp);
*n_tx_packets += n_packets;
+ elt = clib_llist_elt (wrk->event_elts, ei);
+
if (s->flags & SESSION_F_CUSTOM_TX)
{
session_evt_add_old (wrk, elt);
@@ -1767,7 +1788,7 @@ session_event_dispatch_io (session_worker_t * wrk, vlib_node_runtime_t * node,
break;
case SESSION_IO_EVT_RX:
s = session_event_get_session (wrk, e);
- if (!s)
+ if (!s || s->session_state >= SESSION_STATE_TRANSPORT_CLOSED)
break;
transport_app_rx_evt (session_get_transport_proto (s),
s->connection_index, s->thread_index);
@@ -1778,19 +1799,21 @@ session_event_dispatch_io (session_worker_t * wrk, vlib_node_runtime_t * node,
break;
svm_fifo_unset_event (s->rx_fifo);
app_wrk = app_worker_get (s->app_wrk_index);
- app_worker_builtin_rx (app_wrk, s);
+ app_worker_rx_notify (app_wrk, s);
break;
- case SESSION_IO_EVT_BUILTIN_TX:
- s = session_get_from_handle_if_valid (e->session_handle);
+ case SESSION_IO_EVT_TX_MAIN:
+ s = session_get_if_valid (e->session_index, 0 /* main thread */);
+ if (PREDICT_FALSE (!s))
+ break;
wrk->ctx.s = s;
if (PREDICT_TRUE (s != 0))
- session_tx_fifo_dequeue_internal (wrk, node, elt, n_tx_packets);
+ (smm->session_tx_fns[s->session_type]) (wrk, node, elt, n_tx_packets);
break;
default:
clib_warning ("unhandled event type %d", e->event_type);
}
- SESSION_EVT (SESSION_IO_EVT_COUNTS, e->event_type, 1, wrk);
+ SESSION_EVT (SESSION_EVT_IO_EVT_COUNTS, e->event_type, 1, wrk);
/* Regrab elements in case pool moved */
elt = clib_llist_elt (wrk->event_elts, ei);
@@ -1798,14 +1821,12 @@ session_event_dispatch_io (session_worker_t * wrk, vlib_node_runtime_t * node,
clib_llist_put (wrk->event_elts, elt);
}
-/* *INDENT-OFF* */
static const u32 session_evt_msg_sizes[] = {
#define _(symc, sym) \
[SESSION_CTRL_EVT_ ## symc] = sizeof (session_ ## sym ##_msg_t),
foreach_session_ctrl_evt
#undef _
};
-/* *INDENT-ON* */
always_inline void
session_update_time_subscribers (session_main_t *smm, clib_time_type_t now,
@@ -1882,7 +1903,7 @@ session_wrk_update_state (session_worker_t *wrk)
if (wrk->state == SESSION_WRK_POLLING)
{
- if (clib_llist_elts (wrk->event_elts) == 4 &&
+ if (clib_llist_elts (wrk->event_elts) == 5 &&
vlib_last_vectors_per_main_loop (vm) < 1)
{
session_wrk_set_state (wrk, SESSION_WRK_INTERRUPT);
@@ -1892,7 +1913,7 @@ session_wrk_update_state (session_worker_t *wrk)
}
else if (wrk->state == SESSION_WRK_INTERRUPT)
{
- if (clib_llist_elts (wrk->event_elts) > 4 ||
+ if (clib_llist_elts (wrk->event_elts) > 5 ||
vlib_last_vectors_per_main_loop (vm) > 1)
{
session_wrk_set_state (wrk, SESSION_WRK_POLLING);
@@ -1940,6 +1961,8 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
if (wrk->trans_head == ((wrk->trans_tail + 1) & (wrk->trans_size - 1)))
return 0;
wrk->batch = vlib_dma_batch_new (vm, wrk->config_index);
+ if (!wrk->batch)
+ return 0;
}
/*
@@ -2041,7 +2064,6 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return n_tx_packets;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (session_queue_node) = {
.function = session_queue_node_fn,
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
@@ -2052,7 +2074,6 @@ VLIB_REGISTER_NODE (session_queue_node) = {
.error_counters = session_error_counters,
.state = VLIB_NODE_STATE_DISABLED,
};
-/* *INDENT-ON* */
static clib_error_t *
session_wrk_tfd_read_ready (clib_file_t *cf)
@@ -2156,7 +2177,6 @@ session_queue_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (session_queue_process_node) =
{
.function = session_queue_process,
@@ -2164,7 +2184,6 @@ VLIB_REGISTER_NODE (session_queue_process_node) =
.name = "session-queue-process",
.state = VLIB_NODE_STATE_DISABLED,
};
-/* *INDENT-ON* */
static_always_inline uword
session_queue_pre_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
@@ -2177,7 +2196,6 @@ session_queue_pre_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
return session_queue_node_fn (vm, node, frame);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (session_queue_pre_input_node) =
{
.function = session_queue_pre_input_inline,
@@ -2185,7 +2203,6 @@ VLIB_REGISTER_NODE (session_queue_pre_input_node) =
.name = "session-queue-main",
.state = VLIB_NODE_STATE_DISABLED,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/session/session_rules_table.c b/src/vnet/session/session_rules_table.c
index 5108c00d728..70a702cf55c 100644
--- a/src/vnet/session/session_rules_table.c
+++ b/src/vnet/session/session_rules_table.c
@@ -386,11 +386,11 @@ session_rules_table_lookup6 (session_rules_table_t * srt,
* @param srt table where rule should be added
* @param args rule arguments
*
- * @return 0 if success, clib_error_t error otherwise
+ * @return 0 if success, session_error_t error otherwise
*/
-int
-session_rules_table_add_del (session_rules_table_t * srt,
- session_rule_table_add_del_args_t * args)
+session_error_t
+session_rules_table_add_del (session_rules_table_t *srt,
+ session_rule_table_add_del_args_t *args)
{
u8 fib_proto = args->rmt.fp_proto, *rt;
u32 ri_from_tag, ri;
@@ -398,7 +398,7 @@ session_rules_table_add_del (session_rules_table_t * srt,
ri_from_tag = session_rules_table_rule_for_tag (srt, args->tag);
if (args->is_add && ri_from_tag != SESSION_RULES_TABLE_INVALID_INDEX)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
if (fib_proto == FIB_PROTOCOL_IP4)
{
@@ -509,7 +509,7 @@ session_rules_table_add_del (session_rules_table_t * srt,
}
}
else
- return VNET_API_ERROR_INVALID_VALUE_2;
+ return SESSION_E_INVALID;
return 0;
}
@@ -605,11 +605,9 @@ session_rules_table_cli_dump (vlib_main_t * vm, session_rules_table_t * srt,
srt4 = &srt->session_rules_tables_16;
vlib_cli_output (vm, "IP4 rules");
- /* *INDENT-OFF* */
pool_foreach (sr4, srt4->rules) {
vlib_cli_output (vm, "%U", format_session_rule4, srt, sr4);
}
- /* *INDENT-ON* */
}
else if (fib_proto == FIB_PROTOCOL_IP6)
@@ -619,11 +617,9 @@ session_rules_table_cli_dump (vlib_main_t * vm, session_rules_table_t * srt,
srt6 = &srt->session_rules_tables_40;
vlib_cli_output (vm, "IP6 rules");
- /* *INDENT-OFF* */
pool_foreach (sr6, srt6->rules) {
vlib_cli_output (vm, "%U", format_session_rule6, srt, sr6);
}
- /* *INDENT-ON* */
}
}
diff --git a/src/vnet/session/session_rules_table.h b/src/vnet/session/session_rules_table.h
index 206ef2f380f..010d50a6398 100644
--- a/src/vnet/session/session_rules_table.h
+++ b/src/vnet/session/session_rules_table.h
@@ -18,11 +18,11 @@
#include <vnet/vnet.h>
#include <vnet/fib/fib.h>
+#include <vnet/session/session_types.h>
#include <vnet/session/transport.h>
#include <vnet/session/mma_16.h>
#include <vnet/session/mma_40.h>
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
union
@@ -52,7 +52,6 @@ typedef CLIB_PACKED (struct
u64 as_u64[5];
};
}) session_mask_or_match_6_t;
-/* *INDENT-ON* */
#define SESSION_RULE_TAG_MAX_LEN 64
#define SESSION_RULES_TABLE_INVALID_INDEX MMA_TABLE_INVALID_INDEX
@@ -111,8 +110,9 @@ void session_rules_table_show_rule (vlib_main_t * vm,
ip46_address_t * lcl_ip, u16 lcl_port,
ip46_address_t * rmt_ip, u16 rmt_port,
u8 is_ip4);
-int session_rules_table_add_del (session_rules_table_t * srt,
- session_rule_table_add_del_args_t * args);
+session_error_t
+session_rules_table_add_del (session_rules_table_t *srt,
+ session_rule_table_add_del_args_t *args);
u8 *session_rules_table_rule_tag (session_rules_table_t * srt, u32 ri,
u8 is_ip4);
void session_rules_table_init (session_rules_table_t * srt);
diff --git a/src/vnet/session/session_table.c b/src/vnet/session/session_table.c
index 9af8ae6a584..dbbe771979c 100644
--- a/src/vnet/session/session_table.c
+++ b/src/vnet/session/session_table.c
@@ -185,7 +185,66 @@ ip4_session_table_walk (clib_bihash_16_8_t * hash,
&ctx);
}
-/* *INDENT-ON* */
+u32
+session_table_memory_size (session_table_t *st)
+{
+ u64 total_size = 0;
+
+ if (clib_bihash_is_initialised_16_8 (&st->v4_session_hash))
+ {
+ clib_bihash_alloc_chunk_16_8_t *c = st->v4_session_hash.chunks;
+ while (c)
+ {
+ total_size += c->size;
+ c = c->next;
+ }
+ c = st->v4_half_open_hash.chunks;
+ while (c)
+ {
+ total_size += c->size;
+ c = c->next;
+ }
+ }
+
+ if (clib_bihash_is_initialised_48_8 (&st->v6_session_hash))
+ {
+ clib_bihash_alloc_chunk_48_8_t *c = st->v6_session_hash.chunks;
+ while (c)
+ {
+ total_size += c->size;
+ c = c->next;
+ }
+ c = st->v6_half_open_hash.chunks;
+ while (c)
+ {
+ total_size += c->size;
+ c = c->next;
+ }
+ }
+
+ return total_size;
+}
+
+u8 *
+format_session_table (u8 *s, va_list *args)
+{
+ session_table_t *st = va_arg (*args, session_table_t *);
+
+ if (clib_bihash_is_initialised_16_8 (&st->v4_session_hash))
+ {
+ s = format (s, "%U", format_bihash_16_8, &st->v4_session_hash, 0);
+ s = format (s, "%U", format_bihash_16_8, &st->v4_half_open_hash, 0);
+ }
+
+ if (clib_bihash_is_initialised_48_8 (&st->v6_session_hash))
+ {
+ s = format (s, "%U", format_bihash_48_8, &st->v6_session_hash, 0);
+ s = format (s, "%U", format_bihash_48_8, &st->v6_half_open_hash, 0);
+ }
+
+ return s;
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/session/session_table.h b/src/vnet/session/session_table.h
index 2127ea45d01..636b8d77bee 100644
--- a/src/vnet/session/session_table.h
+++ b/src/vnet/session/session_table.h
@@ -69,6 +69,9 @@ u32 session_table_index (session_table_t * slt);
void session_table_init (session_table_t * slt, u8 fib_proto);
void session_table_free (session_table_t *slt, u8 fib_proto);
+u32 session_table_memory_size (session_table_t *st);
+u8 *format_session_table (u8 *s, va_list *args);
+
/* Internal, try not to use it! */
session_table_t *_get_session_tables ();
@@ -76,7 +79,6 @@ session_table_t *_get_session_tables ();
pool_foreach (VAR, _get_session_tables ()) BODY
#endif /* SRC_VNET_SESSION_SESSION_TABLE_H_ */
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/session/session_test.c b/src/vnet/session/session_test.c
index a8a9327b892..770e7263024 100644
--- a/src/vnet/session/session_test.c
+++ b/src/vnet/session/session_test.c
@@ -277,12 +277,6 @@ api_app_worker_add_del (vat_main_t *vat)
}
static int
-api_application_tls_key_add (vat_main_t *vat)
-{
- return -1;
-}
-
-static int
api_app_namespace_add_del (vat_main_t *vam)
{
vl_api_app_namespace_add_del_t *mp;
@@ -330,8 +324,14 @@ api_app_namespace_add_del (vat_main_t *vam)
return ret;
}
+static void
+vl_api_app_namespace_add_del_v4_reply_t_handler (
+ vl_api_app_namespace_add_del_v4_reply_t *mp)
+{
+}
+
static int
-api_application_tls_cert_add (vat_main_t *vat)
+api_app_namespace_add_del_v4 (vat_main_t *vat)
{
return -1;
}
diff --git a/src/vnet/session/session_types.h b/src/vnet/session/session_types.h
index 95a88c5ab6e..5e650727d61 100644
--- a/src/vnet/session/session_types.h
+++ b/src/vnet/session/session_types.h
@@ -25,6 +25,19 @@
#define SESSION_CTRL_MSG_TX_MAX_SIZE 160
#define SESSION_NODE_FRAME_SIZE 128
+typedef u8 session_type_t;
+typedef u64 session_handle_t;
+
+typedef union session_handle_tu_
+{
+ session_handle_t handle;
+ struct
+ {
+ u32 session_index;
+ u32 thread_index;
+ };
+} __attribute__ ((__transparent_union__)) session_handle_tu_t;
+
#define foreach_session_endpoint_fields \
foreach_transport_endpoint_cfg_fields \
_(u8, transport_proto) \
@@ -125,9 +138,6 @@ session_endpoint_is_zero (session_endpoint_t * sep)
return ip_is_zero (&sep->ip, sep->is_ip4);
}
-typedef u8 session_type_t;
-typedef u64 session_handle_t;
-
typedef enum
{
SESSION_CLEANUP_TRANSPORT,
@@ -144,19 +154,19 @@ typedef enum session_ft_action_
/*
* Session states
*/
-#define foreach_session_state \
- _(CREATED, "created") \
- _(LISTENING, "listening") \
- _(CONNECTING, "connecting") \
- _(ACCEPTING, "accepting") \
- _(READY, "ready") \
- _(OPENED, "opened") \
- _(TRANSPORT_CLOSING, "transport-closing") \
- _(CLOSING, "closing") \
- _(APP_CLOSED, "app-closed") \
- _(TRANSPORT_CLOSED, "transport-closed") \
- _(CLOSED, "closed") \
- _(TRANSPORT_DELETED, "transport-deleted") \
+#define foreach_session_state \
+ _ (CREATED, "created") \
+ _ (LISTENING, "listening") \
+ _ (CONNECTING, "connecting") \
+ _ (ACCEPTING, "accepting") \
+ _ (READY, "ready") \
+ _ (OPENED, "opened") \
+ _ (TRANSPORT_CLOSING, "transport-closing") \
+ _ (CLOSING, "closing") \
+ _ (APP_CLOSED, "app-closed") \
+ _ (TRANSPORT_CLOSED, "transport-closed") \
+ _ (CLOSED, "closed") \
+ _ (TRANSPORT_DELETED, "transport-deleted")
typedef enum
{
@@ -164,7 +174,7 @@ typedef enum
foreach_session_state
#undef _
SESSION_N_STATES,
-} session_state_t;
+} __clib_packed session_state_t;
#define foreach_session_flag \
_ (RX_EVT, "rx-event") \
@@ -173,7 +183,9 @@ typedef enum
_ (IS_MIGRATING, "migrating") \
_ (UNIDIRECTIONAL, "unidirectional") \
_ (CUSTOM_FIFO_TUNING, "custom-fifo-tuning") \
- _ (HALF_OPEN, "half-open")
+ _ (HALF_OPEN, "half-open") \
+ _ (APP_CLOSED, "app-closed") \
+ _ (IS_CLESS, "connectionless")
typedef enum session_flags_bits_
{
@@ -196,38 +208,42 @@ typedef struct session_
svm_fifo_t *rx_fifo;
svm_fifo_t *tx_fifo;
+ union
+ {
+ session_handle_t handle;
+ struct
+ {
+ /** Index in thread pool where session was allocated */
+ u32 session_index;
+
+ /** Index of the thread that allocated the session */
+ u32 thread_index;
+ };
+ };
+
/** Type built from transport and network protocol types */
session_type_t session_type;
/** State in session layer state machine. See @ref session_state_t */
- volatile u8 session_state;
-
- /** Index in thread pool where session was allocated */
- u32 session_index;
+ volatile session_state_t session_state;
/** Index of the app worker that owns the session */
u32 app_wrk_index;
- /** Index of the thread that allocated the session */
- u8 thread_index;
-
/** Session flags. See @ref session_flags_t */
- u32 flags;
+ session_flags_t flags;
/** Index of the transport connection associated to the session */
u32 connection_index;
- /** Index of application that owns the listener. Set only if a listener */
- u32 app_index;
+ /** App listener index in app's listener pool if a listener */
+ u32 al_index;
union
{
/** Parent listener session index if the result of an accept */
session_handle_t listener_handle;
- /** App listener index in app's listener pool if a listener */
- u32 al_index;
-
/** Index in app worker's half-open table if a half-open */
u32 ho_index;
};
@@ -300,45 +316,35 @@ session_tx_is_dgram (session_t * s)
always_inline session_handle_t
session_handle (session_t * s)
{
- return ((u64) s->thread_index << 32) | (u64) s->session_index;
+ return s->handle;
}
always_inline u32
-session_index_from_handle (session_handle_t handle)
+session_index_from_handle (session_handle_tu_t handle)
{
- return handle & 0xFFFFFFFF;
+ return handle.session_index;
}
always_inline u32
-session_thread_from_handle (session_handle_t handle)
+session_thread_from_handle (session_handle_tu_t handle)
{
- return handle >> 32;
+ return handle.thread_index;
}
always_inline void
-session_parse_handle (session_handle_t handle, u32 * index,
- u32 * thread_index)
+session_parse_handle (session_handle_tu_t handle, u32 *index,
+ u32 *thread_index)
{
- *index = session_index_from_handle (handle);
- *thread_index = session_thread_from_handle (handle);
+ *index = handle.session_index;
+ *thread_index = handle.thread_index;
}
static inline session_handle_t
session_make_handle (u32 session_index, u32 data)
{
- return (((u64) data << 32) | (u64) session_index);
-}
-
-always_inline u32
-session_handle_index (session_handle_t ho_handle)
-{
- return (ho_handle & 0xffffffff);
-}
-
-always_inline u32
-session_handle_data (session_handle_t ho_handle)
-{
- return (ho_handle >> 32);
+ return ((session_handle_tu_t){ .session_index = session_index,
+ .thread_index = data })
+ .handle;
}
typedef enum
@@ -347,7 +353,7 @@ typedef enum
SESSION_IO_EVT_TX,
SESSION_IO_EVT_TX_FLUSH,
SESSION_IO_EVT_BUILTIN_RX,
- SESSION_IO_EVT_BUILTIN_TX,
+ SESSION_IO_EVT_TX_MAIN,
SESSION_CTRL_EVT_RPC,
SESSION_CTRL_EVT_HALF_CLOSE,
SESSION_CTRL_EVT_CLOSE,
@@ -378,6 +384,8 @@ typedef enum
SESSION_CTRL_EVT_APP_WRK_RPC,
SESSION_CTRL_EVT_TRANSPORT_ATTR,
SESSION_CTRL_EVT_TRANSPORT_ATTR_REPLY,
+ SESSION_CTRL_EVT_TRANSPORT_CLOSED,
+ SESSION_CTRL_EVT_HALF_CLEANUP,
} session_evt_type_t;
#define foreach_session_ctrl_evt \
@@ -412,7 +420,6 @@ typedef enum
#define FIFO_EVENT_APP_TX SESSION_IO_EVT_TX
#define FIFO_EVENT_DISCONNECT SESSION_CTRL_EVT_CLOSE
#define FIFO_EVENT_BUILTIN_RX SESSION_IO_EVT_BUILTIN_RX
-#define FIFO_EVENT_BUILTIN_TX SESSION_IO_EVT_BUILTIN_TX
typedef enum
{
@@ -437,6 +444,7 @@ typedef struct
session_handle_t session_handle;
session_rpc_args_t rpc_args;
u32 ctrl_data_index;
+ u64 as_u64[2];
struct
{
u8 data[0];
@@ -461,12 +469,12 @@ typedef struct session_dgram_header_
u16 rmt_port;
u16 lcl_port;
u8 is_ip4;
+ u16 gso_size;
} __clib_packed session_dgram_hdr_t;
#define SESSION_CONN_ID_LEN 37
-#define SESSION_CONN_HDR_LEN 45
-
-STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 8),
+#define SESSION_CONN_HDR_LEN 47
+STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 10),
"session conn id wrong length");
#define foreach_session_error \
@@ -484,9 +492,11 @@ STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 8),
_ (NOLISTEN, "not listening") \
_ (NOSESSION, "session does not exist") \
_ (NOAPP, "app not attached") \
+ _ (APP_ATTACHED, "app already attached") \
_ (PORTINUSE, "lcl port in use") \
_ (IPINUSE, "ip in use") \
_ (ALREADY_LISTENING, "ip port pair already listened on") \
+ _ (ADDR_NOT_IN_USE, "address not in use") \
_ (INVALID, "invalid value") \
_ (INVALID_RMT_IP, "invalid remote ip") \
_ (INVALID_APPWRK, "invalid app worker") \
@@ -506,6 +516,8 @@ STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 8),
_ (NOCRYPTOENG, "no crypto engine") \
_ (NOCRYPTOCKP, "cert key pair not found ") \
_ (LOCAL_CONNECT, "could not connect with local scope") \
+ _ (WRONG_NS_SECRET, "wrong ns secret") \
+ _ (SYSCALL, "system call error") \
_ (TRANSPORT_NO_REG, "transport was not registered")
typedef enum session_error_p_
diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c
index 192a201612a..1c2a9261d3c 100644
--- a/src/vnet/session/transport.c
+++ b/src/vnet/session/transport.c
@@ -17,36 +17,31 @@
#include <vnet/session/session.h>
#include <vnet/fib/fib.h>
-typedef struct local_endpoint_
-{
- transport_endpoint_t ep;
- int refcnt;
-} local_endpoint_t;
-
/**
* Per-type vector of transport protocol virtual function tables
*/
transport_proto_vft_t *tp_vfts;
-/*
- * Port allocator seed
- */
-static u32 port_allocator_seed;
+typedef struct local_endpoint_
+{
+ transport_endpoint_t ep;
+ transport_proto_t proto;
+ int refcnt;
+} local_endpoint_t;
-/*
- * Local endpoints table
- */
-static transport_endpoint_table_t local_endpoints_table;
+typedef struct transport_main_
+{
+ transport_endpoint_table_t local_endpoints_table;
+ local_endpoint_t *local_endpoints;
+ u32 *lcl_endpts_freelist;
+ u32 port_allocator_seed;
+ u16 port_allocator_min_src_port;
+ u16 port_allocator_max_src_port;
+ u8 lcl_endpts_cleanup_pending;
+ clib_spinlock_t local_endpoints_lock;
+} transport_main_t;
-/*
- * Pool of local endpoints
- */
-static local_endpoint_t *local_endpoints;
-
-/*
- * Local endpoints pool lock
- */
-static clib_spinlock_t local_endpoints_lock;
+static transport_main_t tp_main;
u8 *
format_transport_proto (u8 * s, va_list * args)
@@ -76,6 +71,35 @@ format_transport_proto_short (u8 * s, va_list * args)
return s;
}
+const char *transport_flags_str[] = {
+#define _(sym, str) str,
+ foreach_transport_connection_flag
+#undef _
+};
+
+u8 *
+format_transport_flags (u8 *s, va_list *args)
+{
+ transport_connection_flags_t flags;
+ int i, last = -1;
+
+ flags = va_arg (*args, transport_connection_flags_t);
+
+ for (i = 0; i < TRANSPORT_CONNECTION_N_FLAGS; i++)
+ if (flags & (1 << i))
+ last = i;
+
+ for (i = 0; i < last; i++)
+ {
+ if (flags & (1 << i))
+ s = format (s, "%s, ", transport_flags_str[i]);
+ }
+ if (last >= 0)
+ s = format (s, "%s", transport_flags_str[last]);
+
+ return s;
+}
+
u8 *
format_transport_connection (u8 * s, va_list * args)
{
@@ -100,8 +124,8 @@ format_transport_connection (u8 * s, va_list * args)
if (transport_connection_is_tx_paced (tc))
s = format (s, "%Upacer: %U\n", format_white_space, indent,
format_transport_pacer, &tc->pacer, tc->thread_index);
- s = format (s, "%Utransport: flags 0x%x\n", format_white_space, indent,
- tc->flags);
+ s = format (s, "%Utransport: flags: %U\n", format_white_space, indent,
+ format_transport_flags, tc->flags);
}
return s;
}
@@ -124,14 +148,13 @@ u8 *
format_transport_half_open_connection (u8 * s, va_list * args)
{
u32 transport_proto = va_arg (*args, u32);
- u32 ho_index = va_arg (*args, u32);
transport_proto_vft_t *tp_vft;
tp_vft = transport_protocol_get_vft (transport_proto);
if (!tp_vft)
return s;
- s = format (s, "%U", tp_vft->format_half_open, ho_index);
+ s = (tp_vft->format_half_open) (s, args);
return s;
}
@@ -426,52 +449,115 @@ transport_connection_attribute (transport_proto_t tp, u32 conn_index,
void
transport_endpoint_free (u32 tepi)
{
- pool_put_index (local_endpoints, tepi);
+ transport_main_t *tm = &tp_main;
+ pool_put_index (tm->local_endpoints, tepi);
}
always_inline local_endpoint_t *
transport_endpoint_alloc (void)
{
+ transport_main_t *tm = &tp_main;
local_endpoint_t *lep;
ASSERT (vlib_get_thread_index () <= transport_cl_thread ());
- pool_get_aligned_safe (local_endpoints, lep, 0);
+
+ pool_get_aligned_safe (tm->local_endpoints, lep, 0);
return lep;
}
+static void
+transport_cleanup_freelist (void)
+{
+ transport_main_t *tm = &tp_main;
+ local_endpoint_t *lep;
+ u32 *lep_indexp;
+
+ clib_spinlock_lock (&tm->local_endpoints_lock);
+
+ vec_foreach (lep_indexp, tm->lcl_endpts_freelist)
+ {
+ lep = pool_elt_at_index (tm->local_endpoints, *lep_indexp);
+
+ /* Port re-shared after attempt to cleanup */
+ if (lep->refcnt > 0)
+ continue;
+
+ transport_endpoint_table_del (&tm->local_endpoints_table, lep->proto,
+ &lep->ep);
+ transport_endpoint_free (*lep_indexp);
+ }
+
+ vec_reset_length (tm->lcl_endpts_freelist);
+
+ tm->lcl_endpts_cleanup_pending = 0;
+
+ clib_spinlock_unlock (&tm->local_endpoints_lock);
+}
+
void
-transport_endpoint_cleanup (u8 proto, ip46_address_t * lcl_ip, u16 port)
+transport_program_endpoint_cleanup (u32 lepi)
+{
+ transport_main_t *tm = &tp_main;
+ u8 flush_fl = 0;
+
+ /* All workers can free connections. Synchronize access to freelist */
+ clib_spinlock_lock (&tm->local_endpoints_lock);
+
+ vec_add1 (tm->lcl_endpts_freelist, lepi);
+
+ /* Avoid accumulating lots of endpoints for cleanup */
+ if (!tm->lcl_endpts_cleanup_pending &&
+ vec_len (tm->lcl_endpts_freelist) > 32)
+ {
+ tm->lcl_endpts_cleanup_pending = 1;
+ flush_fl = 1;
+ }
+
+ clib_spinlock_unlock (&tm->local_endpoints_lock);
+
+ if (flush_fl)
+ session_send_rpc_evt_to_thread_force (transport_cl_thread (),
+ transport_cleanup_freelist, 0);
+}
+
+int
+transport_release_local_endpoint (u8 proto, ip46_address_t *lcl_ip, u16 port)
{
+ transport_main_t *tm = &tp_main;
local_endpoint_t *lep;
u32 lepi;
- /* Cleanup local endpoint if this was an active connect */
- lepi = transport_endpoint_lookup (&local_endpoints_table, proto, lcl_ip,
- clib_net_to_host_u16 (port));
+ lepi = transport_endpoint_lookup (&tm->local_endpoints_table, proto, lcl_ip,
+ port);
if (lepi == ENDPOINT_INVALID_INDEX)
- return;
+ return -1;
+
+ /* First worker may be cleaning up ports so avoid touching free bitmap */
+ lep = &tm->local_endpoints[lepi];
+ ASSERT (lep->refcnt >= 1);
- lep = pool_elt_at_index (local_endpoints, lepi);
+ /* Local endpoint no longer in use, program cleanup */
if (!clib_atomic_sub_fetch (&lep->refcnt, 1))
{
- transport_endpoint_table_del (&local_endpoints_table, proto, &lep->ep);
-
- /* All workers can free connections. Synchronize access to pool */
- clib_spinlock_lock (&local_endpoints_lock);
- transport_endpoint_free (lepi);
- clib_spinlock_unlock (&local_endpoints_lock);
+ transport_program_endpoint_cleanup (lepi);
+ return 0;
}
+
+ /* Not an error, just in idication that endpoint was not cleaned up */
+ return -1;
}
static int
transport_endpoint_mark_used (u8 proto, ip46_address_t *ip, u16 port)
{
+ transport_main_t *tm = &tp_main;
local_endpoint_t *lep;
u32 tei;
ASSERT (vlib_get_thread_index () <= transport_cl_thread ());
- tei = transport_endpoint_lookup (&local_endpoints_table, proto, ip, port);
+ tei =
+ transport_endpoint_lookup (&tm->local_endpoints_table, proto, ip, port);
if (tei != ENDPOINT_INVALID_INDEX)
return SESSION_E_PORTINUSE;
@@ -479,10 +565,11 @@ transport_endpoint_mark_used (u8 proto, ip46_address_t *ip, u16 port)
lep = transport_endpoint_alloc ();
clib_memcpy_fast (&lep->ep.ip, ip, sizeof (*ip));
lep->ep.port = port;
+ lep->proto = proto;
lep->refcnt = 1;
- transport_endpoint_table_add (&local_endpoints_table, proto, &lep->ep,
- lep - local_endpoints);
+ transport_endpoint_table_add (&tm->local_endpoints_table, proto, &lep->ep,
+ lep - tm->local_endpoints);
return 0;
}
@@ -490,14 +577,18 @@ transport_endpoint_mark_used (u8 proto, ip46_address_t *ip, u16 port)
void
transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip, u16 port)
{
+ transport_main_t *tm = &tp_main;
local_endpoint_t *lep;
u32 lepi;
- lepi = transport_endpoint_lookup (&local_endpoints_table, proto, lcl_ip,
- clib_net_to_host_u16 (port));
+ /* Active opens should call this only from a control thread, which are also
+ * used to allocate and free ports. So, pool has only one writer and
+ * potentially many readers. Listeners are allocated with barrier */
+ lepi = transport_endpoint_lookup (&tm->local_endpoints_table, proto, lcl_ip,
+ port);
if (lepi != ENDPOINT_INVALID_INDEX)
{
- lep = pool_elt_at_index (local_endpoints, lepi);
+ lep = pool_elt_at_index (tm->local_endpoints, lepi);
clib_atomic_add_fetch (&lep->refcnt, 1);
}
}
@@ -505,11 +596,16 @@ transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip, u16 port)
/**
* Allocate local port and add if successful add entry to local endpoint
* table to mark the pair as used.
+ *
+ * @return port in net order or -1 if port cannot be allocated
*/
int
-transport_alloc_local_port (u8 proto, ip46_address_t * ip)
+transport_alloc_local_port (u8 proto, ip46_address_t *lcl_addr,
+ transport_endpoint_cfg_t *rmt)
{
- u16 min = 1024, max = 65535; /* XXX configurable ? */
+ transport_main_t *tm = &tp_main;
+ u16 min = tm->port_allocator_min_src_port;
+ u16 max = tm->port_allocator_max_src_port;
int tries, limit;
limit = max - min;
@@ -525,13 +621,26 @@ transport_alloc_local_port (u8 proto, ip46_address_t * ip)
/* Find a port in the specified range */
while (1)
{
- port = random_u32 (&port_allocator_seed) & PORT_MASK;
+ port = random_u32 (&tm->port_allocator_seed) & PORT_MASK;
if (PREDICT_TRUE (port >= min && port < max))
- break;
+ {
+ port = clib_host_to_net_u16 (port);
+ break;
+ }
}
- if (!transport_endpoint_mark_used (proto, ip, port))
+ if (!transport_endpoint_mark_used (proto, lcl_addr, port))
return port;
+
+ /* IP:port pair already in use, check if 6-tuple available */
+ if (session_lookup_connection (rmt->fib_index, lcl_addr, &rmt->ip, port,
+ rmt->port, proto, rmt->is_ip4))
+ continue;
+
+ /* 6-tuple is available so increment lcl endpoint refcount */
+ transport_share_local_endpoint (proto, lcl_addr, port);
+
+ return port;
}
return -1;
}
@@ -594,6 +703,7 @@ transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt_cfg,
ip46_address_t * lcl_addr, u16 * lcl_port)
{
transport_endpoint_t *rmt = (transport_endpoint_t *) rmt_cfg;
+ transport_main_t *tm = &tp_main;
session_error_t error;
int port;
@@ -614,22 +724,37 @@ transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt_cfg,
sizeof (rmt_cfg->peer.ip));
}
+ /* Cleanup freelist if need be */
+ if (vec_len (tm->lcl_endpts_freelist))
+ transport_cleanup_freelist ();
+
/*
* Allocate source port
*/
if (rmt_cfg->peer.port == 0)
{
- port = transport_alloc_local_port (proto, lcl_addr);
+ port = transport_alloc_local_port (proto, lcl_addr, rmt_cfg);
if (port < 1)
return SESSION_E_NOPORT;
*lcl_port = port;
}
else
{
- port = clib_net_to_host_u16 (rmt_cfg->peer.port);
- *lcl_port = port;
+ *lcl_port = rmt_cfg->peer.port;
+
+ if (!transport_endpoint_mark_used (proto, lcl_addr, rmt_cfg->peer.port))
+ return 0;
+
+ /* IP:port pair already in use, check if 6-tuple available */
+ if (session_lookup_connection (rmt->fib_index, lcl_addr, &rmt->ip,
+ rmt_cfg->peer.port, rmt->port, proto,
+ rmt->is_ip4))
+ return SESSION_E_PORTINUSE;
+
+ /* 6-tuple is available so increment lcl endpoint refcount */
+ transport_share_local_endpoint (proto, lcl_addr, rmt_cfg->peer.port);
- return transport_endpoint_mark_used (proto, lcl_addr, port);
+ return 0;
}
return 0;
@@ -846,6 +971,7 @@ transport_init (void)
{
vlib_thread_main_t *vtm = vlib_get_thread_main ();
session_main_t *smm = vnet_get_session_main ();
+ transport_main_t *tm = &tp_main;
u32 num_threads;
if (smm->local_endpoints_table_buckets == 0)
@@ -854,12 +980,14 @@ transport_init (void)
smm->local_endpoints_table_memory = 512 << 20;
/* Initialize [port-allocator] random number seed */
- port_allocator_seed = (u32) clib_cpu_time_now ();
+ tm->port_allocator_seed = (u32) clib_cpu_time_now ();
+ tm->port_allocator_min_src_port = smm->port_allocator_min_src_port;
+ tm->port_allocator_max_src_port = smm->port_allocator_max_src_port;
- clib_bihash_init_24_8 (&local_endpoints_table, "local endpoints table",
+ clib_bihash_init_24_8 (&tm->local_endpoints_table, "local endpoints table",
smm->local_endpoints_table_buckets,
smm->local_endpoints_table_memory);
- clib_spinlock_init (&local_endpoints_lock);
+ clib_spinlock_init (&tm->local_endpoints_lock);
num_threads = 1 /* main thread */ + vtm->n_threads;
if (num_threads > 1)
diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h
index 633bb1ecfd0..e6ba1ecbc5f 100644
--- a/src/vnet/session/transport.h
+++ b/src/vnet/session/transport.h
@@ -66,7 +66,6 @@ typedef struct transport_send_params_
/*
* Transport protocol virtual function table
*/
-/* *INDENT-OFF* */
typedef struct _transport_proto_vft
{
/*
@@ -125,7 +124,6 @@ typedef struct _transport_proto_vft
*/
transport_options_t transport_options;
} transport_proto_vft_t;
-/* *INDENT-ON* */
extern transport_proto_vft_t *tp_vfts;
@@ -246,13 +244,14 @@ transport_register_new_protocol (const transport_proto_vft_t * vft,
transport_proto_vft_t *transport_protocol_get_vft (transport_proto_t tp);
void transport_update_time (clib_time_type_t time_now, u8 thread_index);
-int transport_alloc_local_port (u8 proto, ip46_address_t * ip);
-int transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt,
- ip46_address_t * lcl_addr,
- u16 * lcl_port);
+int transport_alloc_local_port (u8 proto, ip46_address_t *ip,
+ transport_endpoint_cfg_t *rmt);
+int transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t *rmt,
+ ip46_address_t *lcl_addr, u16 *lcl_port);
void transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip,
u16 port);
-void transport_endpoint_cleanup (u8 proto, ip46_address_t * lcl_ip, u16 port);
+int transport_release_local_endpoint (u8 proto, ip46_address_t *lcl_ip,
+ u16 port);
void transport_enable_disable (vlib_main_t * vm, u8 is_en);
void transport_init (void);
diff --git a/src/vnet/session/transport_types.h b/src/vnet/session/transport_types.h
index adf5e59e6c0..b3469fa9fdb 100644
--- a/src/vnet/session/transport_types.h
+++ b/src/vnet/session/transport_types.h
@@ -40,24 +40,35 @@ typedef enum transport_service_type_
TRANSPORT_N_SERVICES
} transport_service_type_t;
+/*
+ * IS_TX_PACED : Connection sending is paced
+ * NO_LOOKUP: Don't register connection in lookup. Does not apply to local
+ * apps and transports using the network layer (udp/tcp)
+ * DESCHED: Connection descheduled by the session layer
+ * CLESS: Connection is "connection less". Some important implications of that
+ * are that connections are not pinned to workers and listeners will
+ * have fifos associated to them
+ */
+#define foreach_transport_connection_flag \
+ _ (IS_TX_PACED, "tx_paced") \
+ _ (NO_LOOKUP, "no_lookup") \
+ _ (DESCHED, "descheduled") \
+ _ (CLESS, "connectionless")
+
+typedef enum transport_connection_flags_bits_
+{
+#define _(sym, str) TRANSPORT_CONNECTION_F_BIT_##sym,
+ foreach_transport_connection_flag
+#undef _
+ TRANSPORT_CONNECTION_N_FLAGS
+} transport_connection_flags_bits_t;
+
typedef enum transport_connection_flags_
{
- TRANSPORT_CONNECTION_F_IS_TX_PACED = 1 << 0,
- /**
- * Don't register connection in lookup. Does not apply to local apps
- * and transports using the network layer (udp/tcp)
- */
- TRANSPORT_CONNECTION_F_NO_LOOKUP = 1 << 1,
- /**
- * Connection descheduled by the session layer.
- */
- TRANSPORT_CONNECTION_F_DESCHED = 1 << 2,
- /**
- * Connection is "connection less". Some important implications of that
- * are that connections are not pinned to workers and listeners will
- * have fifos associated to them
- */
- TRANSPORT_CONNECTION_F_CLESS = 1 << 3,
+#define _(sym, str) \
+ TRANSPORT_CONNECTION_F_##sym = 1 << TRANSPORT_CONNECTION_F_BIT_##sym,
+ foreach_transport_connection_flag
+#undef _
} transport_connection_flags_t;
typedef struct _spacer
@@ -113,7 +124,7 @@ typedef struct _transport_connection
#if TRANSPORT_DEBUG
elog_track_t elog_track; /**< Event logging */
- u32 cc_stat_tstamp; /**< CC stats timestamp */
+ f64 cc_stat_tstamp; /**< CC stats timestamp */
#endif
/**
@@ -176,6 +187,7 @@ typedef enum _transport_proto
u8 *format_transport_proto (u8 * s, va_list * args);
u8 *format_transport_proto_short (u8 * s, va_list * args);
+u8 *format_transport_flags (u8 *s, va_list *args);
u8 *format_transport_connection (u8 * s, va_list * args);
u8 *format_transport_listen_connection (u8 * s, va_list * args);
u8 *format_transport_half_open_connection (u8 * s, va_list * args);
diff --git a/src/vnet/snap/node.c b/src/vnet/snap/node.c
index 2a42907321c..ad88b2b3a90 100644
--- a/src/vnet/snap/node.c
+++ b/src/vnet/snap/node.c
@@ -261,7 +261,6 @@ static char *snap_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (snap_input_node) = {
.function = snap_input,
.name = "snap-input",
@@ -282,7 +281,6 @@ VLIB_REGISTER_NODE (snap_input_node) = {
.format_trace = format_snap_input_trace,
.unformat_buffer = unformat_snap_header,
};
-/* *INDENT-ON* */
static void
snap_setup_node (vlib_main_t *vm, u32 node_index)
diff --git a/src/vnet/snap/snap.h b/src/vnet/snap/snap.h
index f6b3be1847f..028df4ede66 100644
--- a/src/vnet/snap/snap.h
+++ b/src/vnet/snap/snap.h
@@ -75,7 +75,6 @@ typedef enum
typedef union
{
- /* *INDENT-OFF* */
CLIB_PACKED (struct {
/* OUI: organization unique identifier. */
u8 oui[3];
@@ -83,7 +82,6 @@ typedef union
/* Per-OUI protocol. */
u16 protocol;
});
- /* *INDENT-ON* */
u8 as_u8[5];
} snap_header_t;
diff --git a/src/vnet/span/node.c b/src/vnet/span/node.c
index ca5ea68ae90..56977b58dc2 100644
--- a/src/vnet/span/node.c
+++ b/src/vnet/span/node.c
@@ -84,7 +84,6 @@ span_mirror (vlib_main_t * vm, vlib_node_runtime_t * node, u32 sw_if_index0,
if (PREDICT_FALSE (b0->flags & VNET_BUFFER_F_SPAN_CLONE))
return;
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, sm0->mirror_ports)
{
if (mirror_frames[i] == 0)
@@ -122,7 +121,6 @@ span_mirror (vlib_main_t * vm, vlib_node_runtime_t * node, u32 sw_if_index0,
}
}
}
- /* *INDENT-ON* */
}
static_always_inline uword
@@ -304,7 +302,6 @@ VLIB_NODE_FN (span_l2_output_node) (vlib_main_t * vm,
[0] = "error-drop" \
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (span_input_node) = {
span_node_defs,
.name = "span-input",
@@ -349,7 +346,6 @@ clib_error_t *span_init (vlib_main_t * vm)
}
VLIB_INIT_FUNCTION (span_init);
-/* *INDENT-ON* */
#endif /* CLIB_MARCH_VARIANT */
#undef span_node_defs
diff --git a/src/vnet/span/span.c b/src/vnet/span/span.c
index ec47920504a..bf5e20f4d14 100644
--- a/src/vnet/span/span.c
+++ b/src/vnet/span/span.c
@@ -87,6 +87,9 @@ span_add_delete_entry (vlib_main_t * vm,
if (enable_rx || disable_rx)
vnet_feature_enable_disable ("device-input", "span-input",
src_sw_if_index, rx, 0, 0);
+ if (enable_rx || disable_rx)
+ vnet_feature_enable_disable ("port-rx-eth", "span-input",
+ src_sw_if_index, rx, 0, 0);
if (enable_tx || disable_tx)
vnet_feature_enable_disable ("interface-output", "span-output",
src_sw_if_index, tx, 0, 0);
@@ -163,13 +166,11 @@ set_interface_span_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_span_command, static) = {
.path = "set interface span",
.short_help = "set interface span <if-name> [l2] {disable | destination <if-name> [both|rx|tx]}",
.function = set_interface_span_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_interfaces_span_command_fn (vlib_main_t * vm,
@@ -188,7 +189,6 @@ show_interfaces_span_command_fn (vlib_main_t * vm,
};
u8 *s = 0;
- /* *INDENT-OFF* */
vec_foreach (si, sm->interfaces)
{
span_mirror_t * drxm = &si->mirror_rxtx[SPAN_FEAT_DEVICE][VLIB_RX];
@@ -229,18 +229,15 @@ show_interfaces_span_command_fn (vlib_main_t * vm,
clib_bitmap_free (d);
}
}
- /* *INDENT-ON* */
vec_free (s);
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_interfaces_span_command, static) = {
.path = "show interface span",
.short_help = "Shows SPAN mirror table",
.function = show_interfaces_span_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/span/span_api.c b/src/vnet/span/span_api.c
index 300f619934e..f5b24bdf214 100644
--- a/src/vnet/span/span_api.c
+++ b/src/vnet/span/span_api.c
@@ -61,7 +61,6 @@ vl_api_sw_interface_span_dump_t_handler (vl_api_sw_interface_span_dump_t * mp)
return;
span_feat_t sf = mp->is_l2 ? SPAN_FEAT_L2 : SPAN_FEAT_DEVICE;
- /* *INDENT-OFF* */
vec_foreach (si, sm->interfaces)
{
span_mirror_t * rxm = &si->mirror_rxtx[sf][VLIB_RX];
@@ -90,7 +89,6 @@ vl_api_sw_interface_span_dump_t_handler (vl_api_sw_interface_span_dump_t * mp)
clib_bitmap_free (b);
}
}
- /* *INDENT-ON* */
}
#include <vnet/span/span.api.c>
diff --git a/src/vnet/srmpls/sr_mpls_api.c b/src/vnet/srmpls/sr_mpls_api.c
index 45107f08ab1..920856acff6 100644
--- a/src/vnet/srmpls/sr_mpls_api.c
+++ b/src/vnet/srmpls/sr_mpls_api.c
@@ -29,7 +29,6 @@
#include <vnet/srmpls/sr_mpls.api_enum.h>
#include <vnet/srmpls/sr_mpls.api_types.h>
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
#define vl_api_version(n, v) static u32 api_version = v;
#include <vnet/srmpls/sr_mpls.api.h>
@@ -194,12 +193,18 @@ sr_mpls_api_hookup (vlib_main_t * vm)
vec_free (name);
#define _(N, n) \
- vl_msg_api_set_handlers ( \
- REPLY_MSG_ID_BASE + VL_API_##N, #n, vl_api_##n##_t_handler, \
- vl_noop_handler, vl_api_##n##_t_endian, vl_api_##n##_t_print, \
- sizeof (vl_api_##n##_t), 1, vl_api_##n##_t_print_json, \
- vl_api_##n##_t_tojson, vl_api_##n##_t_fromjson, \
- vl_api_##n##_t_calc_size);
+ vl_msg_api_config (&(vl_msg_api_msg_config_t){ \
+ .id = REPLY_MSG_ID_BASE + VL_API_##N, \
+ .name = #n, \
+ .handler = vl_api_##n##_t_handler, \
+ .endian = vl_api_##n##_t_endian, \
+ .format_fn = vl_api_##n##_t_format, \
+ .size = sizeof (vl_api_##n##_t), \
+ .traced = 1, \
+ .tojson = vl_api_##n##_t_tojson, \
+ .fromjson = vl_api_##n##_t_fromjson, \
+ .calc_size = vl_api_##n##_t_calc_size, \
+ });
foreach_vpe_api_msg;
#undef _
@@ -207,25 +212,34 @@ sr_mpls_api_hookup (vlib_main_t * vm)
* Manually register the sr policy add msg, so we trace enough bytes
* to capture a typical segment list
*/
- vl_msg_api_set_handlers (
- REPLY_MSG_ID_BASE + VL_API_SR_MPLS_POLICY_ADD, "sr_mpls_policy_add",
- vl_api_sr_mpls_policy_add_t_handler, vl_noop_handler,
- vl_api_sr_mpls_policy_add_t_endian, vl_api_sr_mpls_policy_add_t_print, 256,
- 1, vl_api_sr_mpls_policy_add_t_print_json,
- vl_api_sr_mpls_policy_add_t_tojson, vl_api_sr_mpls_policy_add_t_fromjson,
- vl_api_sr_mpls_policy_add_t_calc_size);
-
+ vl_msg_api_config (&(vl_msg_api_msg_config_t){
+ .id = REPLY_MSG_ID_BASE + VL_API_SR_MPLS_POLICY_ADD,
+ .name = "sr_mpls_policy_add",
+ .handler = vl_api_sr_mpls_policy_add_t_handler,
+ .endian = vl_api_sr_mpls_policy_add_t_endian,
+ .format_fn = vl_api_sr_mpls_policy_add_t_format,
+ .size = 256,
+ .traced = 1,
+ .tojson = vl_api_sr_mpls_policy_add_t_tojson,
+ .fromjson = vl_api_sr_mpls_policy_add_t_fromjson,
+ .calc_size = vl_api_sr_mpls_policy_add_t_calc_size,
+ });
/*
* Manually register the sr policy mod msg, so we trace enough bytes
* to capture a typical segment list
*/
- vl_msg_api_set_handlers (
- REPLY_MSG_ID_BASE + VL_API_SR_MPLS_POLICY_MOD, "sr_mpls_policy_mod",
- vl_api_sr_mpls_policy_mod_t_handler, vl_noop_handler,
- vl_api_sr_mpls_policy_mod_t_endian, vl_api_sr_mpls_policy_mod_t_print, 256,
- 1, vl_api_sr_mpls_policy_mod_t_print_json,
- vl_api_sr_mpls_policy_mod_t_tojson, vl_api_sr_mpls_policy_mod_t_fromjson,
- vl_api_sr_mpls_policy_mod_t_calc_size);
+ vl_msg_api_config (&(vl_msg_api_msg_config_t){
+ .id = REPLY_MSG_ID_BASE + VL_API_SR_MPLS_POLICY_MOD,
+ .name = "sr_mpls_policy_mod",
+ .handler = vl_api_sr_mpls_policy_mod_t_handler,
+ .endian = vl_api_sr_mpls_policy_mod_t_endian,
+ .format_fn = vl_api_sr_mpls_policy_mod_t_format,
+ .size = 256,
+ .traced = 1,
+ .tojson = vl_api_sr_mpls_policy_mod_t_tojson,
+ .fromjson = vl_api_sr_mpls_policy_mod_t_fromjson,
+ .calc_size = vl_api_sr_mpls_policy_mod_t_calc_size,
+ });
/*
* Set up the (msg_name, crc, message-id) table
diff --git a/src/vnet/srmpls/sr_mpls_policy.c b/src/vnet/srmpls/sr_mpls_policy.c
index 8f0804850f1..41cb71601e9 100644
--- a/src/vnet/srmpls/sr_mpls_policy.c
+++ b/src/vnet/srmpls/sr_mpls_policy.c
@@ -108,7 +108,6 @@ create_sl (mpls_sr_policy_t * sr_policy, mpls_label_t * sl, u32 weight)
fib_route_path_t *paths = NULL;
vec_add1 (paths, path);
- /* *INDENT-OFF* */
fib_prefix_t pfx = {
.fp_len = 21,
.fp_proto = FIB_PROTOCOL_MPLS,
@@ -116,7 +115,6 @@ create_sl (mpls_sr_policy_t * sr_policy, mpls_label_t * sl, u32 weight)
.fp_eos = eos,
.fp_payload_proto = DPO_PROTO_MPLS,
};
- /* *INDENT-ON* */
fib_table_entry_path_add2 (0,
&pfx,
@@ -245,7 +243,6 @@ sr_mpls_policy_del (mpls_label_t bsid)
/* remove each of the MPLS routes */
FOR_EACH_MPLS_EOS_BIT (eos)
{
- /* *INDENT-OFF* */
fib_prefix_t pfx = {
.fp_len = 21,
.fp_proto = FIB_PROTOCOL_MPLS,
@@ -253,7 +250,6 @@ sr_mpls_policy_del (mpls_label_t bsid)
.fp_eos = eos,
.fp_payload_proto = DPO_PROTO_MPLS,
};
- /* *INDENT-ON* */
fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths);
}
@@ -359,7 +355,6 @@ sr_mpls_policy_mod (mpls_label_t bsid, u8 operation,
FOR_EACH_MPLS_EOS_BIT (eos)
{
- /* *INDENT-OFF* */
fib_prefix_t pfx = {
.fp_len = 21,
.fp_proto = FIB_PROTOCOL_MPLS,
@@ -367,7 +362,6 @@ sr_mpls_policy_mod (mpls_label_t bsid, u8 operation,
.fp_eos = eos,
.fp_payload_proto = DPO_PROTO_MPLS,
};
- /* *INDENT-ON* */
fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths);
}
@@ -411,7 +405,6 @@ sr_mpls_policy_mod (mpls_label_t bsid, u8 operation,
FOR_EACH_MPLS_EOS_BIT (eos)
{
- /* *INDENT-OFF* */
fib_prefix_t pfx = {
.fp_len = 21,
.fp_proto = FIB_PROTOCOL_MPLS,
@@ -419,7 +412,6 @@ sr_mpls_policy_mod (mpls_label_t bsid, u8 operation,
.fp_eos = eos,
.fp_payload_proto = DPO_PROTO_MPLS,
};
- /* *INDENT-ON* */
fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths);
}
@@ -434,7 +426,6 @@ sr_mpls_policy_mod (mpls_label_t bsid, u8 operation,
FOR_EACH_MPLS_EOS_BIT (eos)
{
- /* *INDENT-OFF* */
fib_prefix_t pfx = {
.fp_len = 21,
.fp_proto = FIB_PROTOCOL_MPLS,
@@ -442,7 +433,6 @@ sr_mpls_policy_mod (mpls_label_t bsid, u8 operation,
.fp_eos = eos,
.fp_payload_proto = DPO_PROTO_MPLS,
};
- /* *INDENT-ON* */
fib_table_entry_path_add2 (0,
&pfx,
@@ -568,7 +558,6 @@ sr_mpls_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(sr_mpls_policy_command, static)=
{
.path = "sr mpls policy",
@@ -577,7 +566,6 @@ VLIB_CLI_COMMAND(sr_mpls_policy_command, static)=
.long_help = "TBD.\n",
.function = sr_mpls_policy_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief CLI to display onscreen all the SR MPLS policies
@@ -597,11 +585,9 @@ show_sr_mpls_policies_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "SR MPLS policies:");
- /* *INDENT-OFF* */
pool_foreach (sr_policy, sm->sr_policies) {
vec_add1(vec_policies, sr_policy);
}
- /* *INDENT-ON* */
vec_foreach_index (i, vec_policies)
{
@@ -647,14 +633,12 @@ show_sr_mpls_policies_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(show_sr_mpls_policies_command, static)=
{
.path = "show sr mpls policies",
.short_help = "show sr mpls policies",
.function = show_sr_mpls_policies_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief Update the Endpoint,Color tuple of an SR policy
@@ -888,14 +872,12 @@ cli_sr_mpls_policy_ec_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(cli_sr_mpls_policy_ec_command, static)=
{
.path = "sr mpls policy te",
.short_help = "sr mpls policy te bsid xxxxx endpoint x.x.x.x color 12341234",
.function = cli_sr_mpls_policy_ec_command_fn,
};
-/* *INDENT-ON* */
/********************* SR MPLS Policy initialization ***********************/
/**
diff --git a/src/vnet/srmpls/sr_mpls_steering.c b/src/vnet/srmpls/sr_mpls_steering.c
index b12e78d2755..e8920df542b 100644
--- a/src/vnet/srmpls/sr_mpls_steering.c
+++ b/src/vnet/srmpls/sr_mpls_steering.c
@@ -770,7 +770,6 @@ sr_mpls_steer_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(sr_mpls_steer_policy_command, static)=
{
.path = "sr mpls steer",
@@ -785,7 +784,6 @@ VLIB_CLI_COMMAND(sr_mpls_steer_policy_command, static)=
"\t\tsr steer l3 2001::/64 via next-hop 2001::1 color 1234 co 2 vpn-label 500\n",
.function = sr_mpls_steer_policy_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_sr_mpls_steering_policies_command_fn (vlib_main_t * vm,
@@ -799,11 +797,9 @@ show_sr_mpls_steering_policies_command_fn (vlib_main_t * vm,
int i;
vlib_cli_output (vm, "SR MPLS steering policies:");
- /* *INDENT-OFF* */
pool_foreach (steer_pl, sm->steer_policies) {
vec_add1(steer_policies, steer_pl);
}
- /* *INDENT-ON* */
for (i = 0; i < vec_len (steer_policies); i++)
{
vlib_cli_output (vm, "==========================");
@@ -871,14 +867,12 @@ show_sr_mpls_steering_policies_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(show_sr_mpls_steering_policies_command, static)=
{
.path = "show sr mpls steering policies",
.short_help = "show sr mpls steering policies",
.function = show_sr_mpls_steering_policies_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
sr_mpls_steering_init (vlib_main_t * vm)
@@ -894,9 +888,7 @@ sr_mpls_steering_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION(sr_mpls_steering_init);
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/srv6/sr.api b/src/vnet/srv6/sr.api
index 6190a8c7ff5..4766ce3ba11 100644
--- a/src/vnet/srv6/sr.api
+++ b/src/vnet/srv6/sr.api
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-option version = "2.0.0";
+option version = "2.1.0";
import "vnet/interface_types.api";
import "vnet/ip/ip_types.api";
@@ -109,6 +109,65 @@ autoreply define sr_policy_mod
vl_api_srv6_sid_list_t sids;
};
+enum sr_policy_type : u8
+{
+ SR_API_POLICY_TYPE_DEFAULT = 0,
+ SR_API_POLICY_TYPE_SPRAY = 1,
+ SR_API_POLICY_TYPE_TEF = 2,
+};
+
+/** \brief IPv6 SR policy add
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bsid is the bindingSID of the SR Policy
+ @param weight is the weight of the sid list. optional.
+ @param is_encap is the behavior of the SR policy. (0.SRH insert // 1.Encapsulation)
+ @param type is the SR policy param. (0.Default // 1.Spray // 2.Tef)
+ @param fib_table is the VRF where to install the FIB entry for the BSID
+ @param sids is a srv6_sid_list object
+ @param encap_src is a encaps IPv6 source addr. optional.
+*/
+autoreply define sr_policy_add_v2
+{
+ u32 client_index;
+ u32 context;
+ vl_api_ip6_address_t bsid_addr;
+ u32 weight;
+ bool is_encap;
+ vl_api_sr_policy_type_t type [default=0x0];
+ u32 fib_table;
+ vl_api_srv6_sid_list_t sids;
+ vl_api_ip6_address_t encap_src;
+ option status="in_progress";
+};
+
+/** \brief IPv6 SR policy modification
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bsid is the bindingSID of the SR Policy
+ @param sr_policy_index is the index of the SR policy
+ @param fib_table is the VRF where to install the FIB entry for the BSID
+ @param operation is the operation to perform (among the top ones)
+ @param sl_index is the index of the Segment List to modify/delete
+ @param weight is the weight of the sid list. optional.
+ @param sids is a srv6_sid_list object
+ @param encap_src is a encaps IPv6 source addr. optional.
+*/
+autoreply define sr_policy_mod_v2
+{
+ u32 client_index;
+ u32 context;
+ vl_api_ip6_address_t bsid_addr;
+ u32 sr_policy_index;
+ u32 fib_table;
+ vl_api_sr_policy_op_t operation;
+ u32 sl_index;
+ u32 weight;
+ vl_api_srv6_sid_list_t sids;
+ vl_api_ip6_address_t encap_src;
+ option status="in_progress";
+};
+
/** \brief IPv6 SR policy deletion
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -195,12 +254,45 @@ define sr_localsids_details
u32 xconnect_iface_or_vrf_table;
};
+
+/** \brief Dump the list of SR LocalSIDs along with packet statistics
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define sr_localsids_with_packet_stats_dump
+{
+ u32 client_index;
+ u32 context;
+ option status="in_progress";
+};
+
+define sr_localsids_with_packet_stats_details
+{
+ u32 context;
+ vl_api_ip6_address_t addr;
+ bool end_psp;
+ vl_api_sr_behavior_t behavior;
+ u32 fib_table;
+ u32 vlan_index;
+ vl_api_address_t xconnect_nh_addr;
+ u32 xconnect_iface_or_vrf_table;
+ u64 good_traffic_bytes;
+ u64 good_traffic_pkt_count;
+ u64 bad_traffic_bytes;
+ u64 bad_traffic_pkt_count;
+ option status="in_progress";
+};
+
+
+
/** \brief Dump the list of SR policies
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
*/
define sr_policies_dump
{
+ option deprecated;
+
u32 client_index;
u32 context;
};
@@ -217,6 +309,28 @@ define sr_policies_details
vl_api_srv6_sid_list_t sid_lists[num_sid_lists];
};
+/** \brief Dump the list of SR policies v2
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define sr_policies_v2_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+define sr_policies_v2_details
+{
+ u32 context;
+ vl_api_ip6_address_t bsid;
+ vl_api_ip6_address_t encap_src;
+ vl_api_sr_policy_type_t type;
+ bool is_encap;
+ u32 fib_table;
+ u8 num_sid_lists;
+ vl_api_srv6_sid_list_t sid_lists[num_sid_lists];
+};
+
/** \brief Dump the list of SR policies along with actual segment list index on VPP
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/vnet/srv6/sr.h b/src/vnet/srv6/sr.h
index 02cceade682..c2867eb7508 100644
--- a/src/vnet/srv6/sr.h
+++ b/src/vnet/srv6/sr.h
@@ -56,13 +56,11 @@
#define SR_SEGMENT_LIST_WEIGHT_DEFAULT 1
-/* *INDENT-OFF* */
typedef struct
{
ip6_header_t ip;
ip6_sr_header_t sr;
} __attribute__ ((packed)) ip6srv_combo_header_t;
-/* *INDENT-ON* */
/**
* @brief SR Segment List (SID list)
@@ -112,6 +110,8 @@ typedef struct
u8 is_encap; /**< Mode (0 is SRH insert, 1 Encaps) */
+ ip6_address_t encap_src;
+
u16 plugin;
void *plugin_mem;
} ip6_sr_policy_t;
@@ -345,11 +345,12 @@ sr_policy_register_function (vlib_main_t * vm, u8 * fn_name,
sr_p_plugin_callback_t * removal_fn);
extern int sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments,
- u32 weight, u8 type, u32 fib_table, u8 is_encap,
- u16 plugin, void *plugin_mem);
-extern int sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table,
- u8 operation, ip6_address_t * segments,
- u32 sl_index, u32 weight);
+ ip6_address_t *encap_src, u32 weight, u8 type,
+ u32 fib_table, u8 is_encap, u16 plugin,
+ void *plugin_mem);
+extern int sr_policy_mod (ip6_address_t *bsid, u32 index, u32 fib_table,
+ u8 operation, ip6_address_t *segments,
+ ip6_address_t *encap_src, u32 sl_index, u32 weight);
extern int sr_policy_del (ip6_address_t * bsid, u32 index);
extern int
diff --git a/src/vnet/srv6/sr_api.c b/src/vnet/srv6/sr_api.c
index 71924981841..a44c3098112 100644
--- a/src/vnet/srv6/sr_api.c
+++ b/src/vnet/srv6/sr_api.c
@@ -82,17 +82,16 @@ vl_api_sr_policy_add_t_handler (vl_api_sr_policy_add_t * mp)
ip6_address_decode (mp->bsid_addr, &bsid_addr);
-/*
- * sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments,
- * u32 weight, u8 behavior, u32 fib_table, u8 is_encap,
- * u16 behavior, void *plugin_mem)
- */
+ /*
+ * sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments,
+ * ip6_address_t *encap_src,
+ * u32 weight, u8 behavior, u32 fib_table, u8 is_encap,
+ * u16 behavior, void *plugin_mem)
+ */
int rv = 0;
- rv = sr_policy_add (&bsid_addr,
- segments,
- ntohl (mp->sids.weight),
- mp->is_spray, ntohl (mp->fib_table), mp->is_encap, 0,
- NULL);
+ rv =
+ sr_policy_add (&bsid_addr, segments, NULL, ntohl (mp->sids.weight),
+ mp->is_spray, ntohl (mp->fib_table), mp->is_encap, 0, NULL);
vec_free (segments);
REPLY_MACRO (VL_API_SR_POLICY_ADD_REPLY);
@@ -115,18 +114,93 @@ vl_api_sr_policy_mod_t_handler (vl_api_sr_policy_mod_t * mp)
ip6_address_decode (mp->bsid_addr, &bsid_addr);
int rv = 0;
-/*
- * int
- * sr_policy_mod(ip6_address_t *bsid, u32 index, u32 fib_table,
- * u8 operation, ip6_address_t *segments, u32 sl_index,
- * u32 weight, u8 is_encap)
- */
- rv = sr_policy_mod (&bsid_addr,
- ntohl (mp->sr_policy_index),
- ntohl (mp->fib_table),
- mp->operation,
- segments, ntohl (mp->sl_index),
- ntohl (mp->sids.weight));
+ /*
+ * int
+ * sr_policy_mod(ip6_address_t *bsid, u32 index, u32 fib_table,
+ * u8 operation, ip6_address_t *segments,
+ * ip6_address_t *encap_src, u32 sl_index,
+ * u32 weight, u8 is_encap)
+ */
+ rv = sr_policy_mod (&bsid_addr, ntohl (mp->sr_policy_index),
+ ntohl (mp->fib_table), mp->operation, segments, NULL,
+ ntohl (mp->sl_index), ntohl (mp->sids.weight));
+ vec_free (segments);
+
+ REPLY_MACRO (VL_API_SR_POLICY_MOD_REPLY);
+}
+
+static void
+vl_api_sr_policy_add_v2_t_handler (vl_api_sr_policy_add_v2_t *mp)
+{
+ vl_api_sr_policy_add_v2_reply_t *rmp;
+ ip6_address_t *segments = 0, *seg;
+ ip6_address_t bsid_addr;
+ ip6_address_t encap_src;
+
+ int i;
+ for (i = 0; i < mp->sids.num_sids; i++)
+ {
+ vec_add2 (segments, seg, 1);
+ ip6_address_decode (mp->sids.sids[i], seg);
+ }
+
+ ip6_address_decode (mp->bsid_addr, &bsid_addr);
+ ip6_address_decode (mp->encap_src, &encap_src);
+
+ if (ip6_address_is_zero (&encap_src))
+ {
+ encap_src = *sr_get_encaps_source ();
+ }
+ /*
+ * sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments,
+ * ip6_address_t *encap_src,
+ * u32 weight, u8 behavior, u32 fib_table, u8 is_encap,
+ * u16 behavior, void *plugin_mem)
+ */
+ int rv = 0;
+ rv =
+ sr_policy_add (&bsid_addr, segments, &encap_src, ntohl (mp->sids.weight),
+ mp->type, ntohl (mp->fib_table), mp->is_encap, 0, NULL);
+ vec_free (segments);
+
+ REPLY_MACRO (VL_API_SR_POLICY_ADD_V2_REPLY);
+}
+
+static void
+vl_api_sr_policy_mod_v2_t_handler (vl_api_sr_policy_mod_v2_t *mp)
+{
+ vl_api_sr_policy_mod_v2_reply_t *rmp;
+ ip6_address_t *segments = 0, *seg;
+ ip6_address_t bsid_addr;
+ ip6_address_t encap_src;
+
+ int i;
+ for (i = 0; i < mp->sids.num_sids; i++)
+ {
+ vec_add2 (segments, seg, 1);
+ ip6_address_decode (mp->sids.sids[i], seg);
+ }
+
+ ip6_address_decode (mp->bsid_addr, &bsid_addr);
+ ip6_address_decode (mp->encap_src, &encap_src);
+
+ if (ip6_address_is_zero (&encap_src))
+ {
+ encap_src = *sr_get_encaps_source ();
+ }
+
+ int rv = 0;
+ /*
+ * int
+ * sr_policy_mod(ip6_address_t *bsid, u32 index, u32 fib_table,
+ * u8 operation, ip6_address_t *segments,
+ * ip6_address_t *encap_src, u32 sl_index,
+ * u32 weight, u8 is_encap)
+ */
+ rv =
+ sr_policy_mod (&bsid_addr, ntohl (mp->sr_policy_index),
+ ntohl (mp->fib_table), mp->operation, segments, &encap_src,
+ ntohl (mp->sl_index), ntohl (mp->sids.weight));
vec_free (segments);
REPLY_MACRO (VL_API_SR_POLICY_MOD_REPLY);
@@ -247,12 +321,77 @@ static void vl_api_sr_localsids_dump_t_handler
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (t, sm->localsids)
{
send_sr_localsid_details(t, reg, mp->context);
}
- /* *INDENT-ON* */
+}
+
+static void
+send_sr_localsid_with_packet_stats_details (int local_sid_index,
+ ip6_sr_localsid_t *t,
+ vl_api_registration_t *reg,
+ u32 context)
+{
+ vl_api_sr_localsids_with_packet_stats_details_t *rmp;
+ vlib_counter_t good_traffic, bad_traffic;
+ ip6_sr_main_t *sm = &sr_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ clib_memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id =
+ ntohs (REPLY_MSG_ID_BASE + VL_API_SR_LOCALSIDS_WITH_PACKET_STATS_DETAILS);
+ ip6_address_encode (&t->localsid, rmp->addr);
+ rmp->end_psp = t->end_psp;
+ rmp->behavior = t->behavior;
+ rmp->fib_table = htonl (t->fib_table);
+ rmp->vlan_index = htonl (t->vlan_index);
+ ip_address_encode (&t->next_hop, IP46_TYPE_ANY, &rmp->xconnect_nh_addr);
+
+ if (t->behavior == SR_BEHAVIOR_T || t->behavior == SR_BEHAVIOR_DT6)
+ rmp->xconnect_iface_or_vrf_table =
+ htonl (fib_table_get_table_id (t->sw_if_index, FIB_PROTOCOL_IP6));
+ else if (t->behavior == SR_BEHAVIOR_DT4)
+ rmp->xconnect_iface_or_vrf_table =
+ htonl (fib_table_get_table_id (t->sw_if_index, FIB_PROTOCOL_IP4));
+ else
+ rmp->xconnect_iface_or_vrf_table = htonl (t->sw_if_index);
+
+ rmp->context = context;
+ vlib_get_combined_counter (&(sm->sr_ls_valid_counters), local_sid_index,
+ &good_traffic);
+ vlib_get_combined_counter (&(sm->sr_ls_invalid_counters), local_sid_index,
+ &bad_traffic);
+ rmp->good_traffic_bytes = clib_host_to_net_u64 (good_traffic.bytes);
+ rmp->good_traffic_pkt_count = clib_host_to_net_u64 (good_traffic.packets);
+ rmp->bad_traffic_bytes = clib_host_to_net_u64 (bad_traffic.bytes);
+ rmp->bad_traffic_pkt_count = clib_host_to_net_u64 (bad_traffic.packets);
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
+vl_api_sr_localsids_with_packet_stats_dump_t_handler (
+ vl_api_sr_localsids_with_packet_stats_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+ ip6_sr_main_t *sm = &sr_main;
+ ip6_sr_localsid_t **localsid_list = 0;
+ ip6_sr_localsid_t *t;
+ int i;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ pool_foreach (t, sm->localsids)
+ {
+ vec_add1 (localsid_list, t);
+ }
+ for (i = 0; i < vec_len (localsid_list); i++)
+ {
+ t = localsid_list[i];
+ send_sr_localsid_with_packet_stats_details (i, t, reg, mp->context);
+ }
}
static void send_sr_policies_details
@@ -312,15 +451,74 @@ vl_api_sr_policies_dump_t_handler (vl_api_sr_policies_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (t, sm->sr_policies)
{
send_sr_policies_details(t, reg, mp->context);
}
- /* *INDENT-ON* */
}
+static void
+send_sr_policies_v2_details (ip6_sr_policy_t *t, vl_api_registration_t *reg,
+ u32 context)
+{
+ vl_api_sr_policies_v2_details_t *rmp;
+ ip6_sr_main_t *sm = &sr_main;
+ u32 *sl_index, slidx = 0;
+ ip6_sr_sl_t *segment_list = 0;
+ ip6_address_t *segment;
+ vl_api_srv6_sid_list_t *api_sid_list;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp) + vec_len (t->segments_lists) *
+ sizeof (vl_api_srv6_sid_list_t));
+ clib_memset (rmp, 0,
+ (sizeof (*rmp) + vec_len (t->segments_lists) *
+ sizeof (vl_api_srv6_sid_list_t)));
+
+ rmp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_SR_POLICIES_V2_DETAILS);
+ ip6_address_encode (&t->bsid, rmp->bsid);
+ ip6_address_encode (&t->encap_src, rmp->encap_src);
+ rmp->is_encap = t->is_encap;
+ rmp->type = t->type;
+ rmp->fib_table = htonl (t->fib_table);
+ rmp->num_sid_lists = vec_len (t->segments_lists);
+
+ /* Fill in all the segments lists */
+ vec_foreach (sl_index, t->segments_lists)
+ {
+ segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
+
+ api_sid_list = &rmp->sid_lists[sl_index - t->segments_lists];
+
+ api_sid_list->num_sids = vec_len (segment_list->segments);
+ api_sid_list->weight = htonl (segment_list->weight);
+ slidx = 0;
+ vec_foreach (segment, segment_list->segments)
+ {
+ ip6_address_encode (segment, api_sid_list->sids[slidx++]);
+ }
+ }
+
+ rmp->context = context;
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
+vl_api_sr_policies_v2_dump_t_handler (vl_api_sr_policies_v2_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+ ip6_sr_main_t *sm = &sr_main;
+ ip6_sr_policy_t *t;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ pool_foreach (t, sm->sr_policies)
+ {
+ send_sr_policies_v2_details (t, reg, mp->context);
+ }
+}
static void send_sr_policies_details_with_sl_index
(ip6_sr_policy_t * t, vl_api_registration_t * reg, u32 context)
@@ -381,12 +579,10 @@ static void
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (t, sm->sr_policies)
{
send_sr_policies_details_with_sl_index(t, reg, mp->context);
}
- /* *INDENT-ON* */
}
static void send_sr_steering_pol_details
@@ -428,12 +624,10 @@ static void vl_api_sr_steering_pol_dump_t_handler
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (t, sm->steer_policies)
{
send_sr_steering_pol_details(t, reg, mp->context);
}
- /* *INDENT-ON* */
}
#include <vnet/srv6/sr.api.c>
diff --git a/src/vnet/srv6/sr_localsid.c b/src/vnet/srv6/sr_localsid.c
index a055c923be9..12349bb95e8 100644
--- a/src/vnet/srv6/sr_localsid.c
+++ b/src/vnet/srv6/sr_localsid.c
@@ -396,12 +396,10 @@ sr_cli_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input,
sr_localsid_fn_registration_t **plugin_it = 0;
/* Create a vector out of the plugin pool as recommended */
- /* *INDENT-OFF* */
pool_foreach (plugin, sm->plugin_functions)
{
vec_add1 (vec_plugins, plugin);
}
- /* *INDENT-ON* */
vec_foreach (plugin_it, vec_plugins)
{
@@ -506,7 +504,6 @@ sr_cli_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (sr_localsid_command, static) = {
.path = "sr localsid",
.short_help = "sr localsid (del) address XX:XX::YY:YY"
@@ -534,7 +531,6 @@ VLIB_CLI_COMMAND (sr_localsid_command, static) = {
"\t\tParameters: '<ip4_fib_table>'\n",
.function = sr_cli_localsid_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief CLI function to 'show' all SR LocalSIDs on console.
@@ -551,9 +547,7 @@ show_sr_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "SRv6 - My LocalSID Table:");
vlib_cli_output (vm, "=========================");
- /* *INDENT-OFF* */
pool_foreach (ls, sm->localsids) { vec_add1 (localsid_list, ls); }
- /* *INDENT-ON* */
for (i = 0; i < vec_len (localsid_list); i++)
{
ls = localsid_list[i];
@@ -676,13 +670,11 @@ show_sr_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_sr_localsid_command, static) = {
.path = "show sr localsids",
.short_help = "show sr localsids",
.function = show_sr_localsid_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief Function to 'clear' ALL SR localsid counters
@@ -700,13 +692,11 @@ clear_sr_localsid_counters_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_sr_localsid_counters_command, static) = {
.path = "clear sr localsid-counters",
.short_help = "clear sr localsid-counters",
.function = clear_sr_localsid_counters_command_fn,
};
-/* *INDENT-ON* */
/************************ SR LocalSID graphs node ****************************/
/**
@@ -1438,7 +1428,6 @@ sr_localsid_d_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_localsid_d_node) = {
.function = sr_localsid_d_fn,
.name = "sr-localsid-d",
@@ -1454,7 +1443,6 @@ VLIB_REGISTER_NODE (sr_localsid_d_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/**
* @brief SR LocalSID graph node. Supports all default SR Endpoint without decaps
@@ -1748,7 +1736,6 @@ sr_localsid_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_localsid_node) = {
.function = sr_localsid_fn,
.name = "sr-localsid",
@@ -1764,7 +1751,6 @@ VLIB_REGISTER_NODE (sr_localsid_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/**
* @brief SR LocalSID uN graph node. Supports all default SR Endpoint without decaps
@@ -2058,7 +2044,6 @@ sr_localsid_un_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_localsid_un_node) = {
.function = sr_localsid_un_fn,
.name = "sr-localsid-un",
@@ -2074,7 +2059,6 @@ VLIB_REGISTER_NODE (sr_localsid_un_node) = {
#undef _
},
};
-/* *INDENT-ON* */
static uword
sr_localsid_un_perf_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
@@ -2270,7 +2254,6 @@ sr_localsid_un_perf_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_localsid_un_perf_node) = {
.function = sr_localsid_un_perf_fn,
.name = "sr-localsid-un-perf",
@@ -2286,7 +2269,6 @@ VLIB_REGISTER_NODE (sr_localsid_un_perf_node) = {
#undef _
},
};
-/* *INDENT-ON* */
static u8 *
format_sr_dpo (u8 * s, va_list * args)
@@ -2406,10 +2388,8 @@ show_sr_localsid_behaviors_command_fn (vlib_main_t * vm,
vlib_cli_output (vm,
"SR LocalSIDs behaviors:\n-----------------------\n\n");
- /* *INDENT-OFF* */
pool_foreach (plugin, sm->plugin_functions)
{ vec_add1 (plugins_vec, plugin); }
- /* *INDENT-ON* */
/* Print static behaviors */
vlib_cli_output (vm, "Default behaviors:\n"
@@ -2439,13 +2419,11 @@ show_sr_localsid_behaviors_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_sr_localsid_behaviors_command, static) = {
.path = "show sr localsids behaviors",
.short_help = "show sr localsids behaviors",
.function = show_sr_localsid_behaviors_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief SR LocalSID initialization
diff --git a/src/vnet/srv6/sr_policy_rewrite.c b/src/vnet/srv6/sr_policy_rewrite.c
index 12e7f084c8c..0aa88cc273e 100644
--- a/src/vnet/srv6/sr_policy_rewrite.c
+++ b/src/vnet/srv6/sr_policy_rewrite.c
@@ -49,6 +49,7 @@
#include <vnet/dpo/replicate_dpo.h>
#include <vnet/srv6/sr_pt.h>
+#include <vppinfra/byte_order.h>
#include <vppinfra/error.h>
#include <vppinfra/elog.h>
@@ -141,13 +142,11 @@ set_sr_src_command_fn (vlib_main_t * vm, unformat_input_t * input,
return clib_error_return (0, "No address specified");
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_sr_src_command, static) = {
.path = "set sr encaps source",
.short_help = "set sr encaps source addr <ip6_addr>",
.function = set_sr_src_command_fn,
};
-/* *INDENT-ON* */
/******************** SR rewrite set encaps IPv6 hop-limit ********************/
@@ -179,24 +178,23 @@ set_sr_hop_limit_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_sr_hop_limit_command, static) = {
.path = "set sr encaps hop-limit",
.short_help = "set sr encaps hop-limit <value>",
.function = set_sr_hop_limit_command_fn,
};
-/* *INDENT-ON* */
/*********************** SR rewrite string computation ************************/
/**
* @brief SR rewrite string computation for IPv6 encapsulation (inline)
*
* @param sl is a vector of IPv6 addresses composing the Segment List
+ * @param src_v6addr is a encaps IPv6 source addr
*
* @return precomputed rewrite string for encapsulation
*/
static inline u8 *
-compute_rewrite_encaps (ip6_address_t *sl, u8 type)
+compute_rewrite_encaps (ip6_address_t *sl, ip6_address_t *src_v6addr, u8 type)
{
ip6_header_t *iph;
ip6_sr_header_t *srh;
@@ -224,8 +222,8 @@ compute_rewrite_encaps (ip6_address_t *sl, u8 type)
iph = (ip6_header_t *) rs;
iph->ip_version_traffic_class_and_flow_label =
clib_host_to_net_u32 (0 | ((6 & 0xF) << 28));
- iph->src_address.as_u64[0] = sr_pr_encaps_src.as_u64[0];
- iph->src_address.as_u64[1] = sr_pr_encaps_src.as_u64[1];
+ iph->src_address.as_u64[0] = src_v6addr->as_u64[0];
+ iph->src_address.as_u64[1] = src_v6addr->as_u64[1];
iph->payload_length = header_length - IPv6_DEFAULT_HEADER_LENGTH;
iph->protocol = IP_PROTOCOL_IPV6;
iph->hop_limit = sr_pr_encaps_hop_limit;
@@ -369,18 +367,20 @@ compute_rewrite_bsid (ip6_address_t * sl)
*
* @param sr_policy is the SR policy where the SL will be added
* @param sl is a vector of IPv6 addresses composing the Segment List
+ * @param encap_src is a encaps IPv6 source addr. optional.
* @param weight is the weight of the SegmentList (for load-balancing purposes)
* @param is_encap represents the mode (SRH insertion vs Encapsulation)
*
* @return pointer to the just created segment list
*/
static inline ip6_sr_sl_t *
-create_sl (ip6_sr_policy_t * sr_policy, ip6_address_t * sl, u32 weight,
- u8 is_encap)
+create_sl (ip6_sr_policy_t *sr_policy, ip6_address_t *sl,
+ ip6_address_t *encap_src, u32 weight, u8 is_encap)
{
ip6_sr_main_t *sm = &sr_main;
ip6_sr_sl_t *segment_list;
sr_policy_fn_registration_t *plugin = 0;
+ ip6_address_t encap_srcv6 = sr_pr_encaps_src;
pool_get (sm->sid_lists, segment_list);
clib_memset (segment_list, 0, sizeof (*segment_list));
@@ -399,8 +399,14 @@ create_sl (ip6_sr_policy_t * sr_policy, ip6_address_t * sl, u32 weight,
if (is_encap)
{
- segment_list->rewrite = compute_rewrite_encaps (sl, sr_policy->type);
+ if (encap_src)
+ {
+ clib_memcpy_fast (&encap_srcv6, encap_src, sizeof (ip6_address_t));
+ }
+ segment_list->rewrite =
+ compute_rewrite_encaps (sl, &encap_srcv6, sr_policy->type);
segment_list->rewrite_bsid = segment_list->rewrite;
+ sr_policy->encap_src = encap_srcv6;
}
else
{
@@ -659,17 +665,19 @@ update_replicate (ip6_sr_policy_t * sr_policy)
*
* @param bsid is the bindingSID of the SR Policy
* @param segments is a vector of IPv6 address composing the segment list
+ * @param encap_src is a encaps IPv6 source addr. optional.
* @param weight is the weight of the sid list. optional.
* @param behavior is the behavior of the SR policy. (default//spray)
* @param fib_table is the VRF where to install the FIB entry for the BSID
- * @param is_encap (bool) whether SR policy should behave as Encap/SRH Insertion
+ * @param is_encap (bool) whether SR policy should behave as Encap/SRH
+ * Insertion
*
* @return 0 if correct, else error
*/
int
-sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments, u32 weight,
- u8 type, u32 fib_table, u8 is_encap, u16 plugin,
- void *ls_plugin_mem)
+sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments,
+ ip6_address_t *encap_src, u32 weight, u8 type, u32 fib_table,
+ u8 is_encap, u16 plugin, void *ls_plugin_mem)
{
ip6_sr_main_t *sm = &sr_main;
ip6_sr_policy_t *sr_policy = 0;
@@ -725,7 +733,7 @@ sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments, u32 weight,
NULL);
/* Create a segment list and add the index to the SR policy */
- create_sl (sr_policy, segments, weight, is_encap);
+ create_sl (sr_policy, segments, encap_src, weight, is_encap);
/* If FIB doesnt exist, create them */
if (sm->fib_table_ip6 == (u32) ~ 0)
@@ -855,6 +863,7 @@ sr_policy_del (ip6_address_t * bsid, u32 index)
* @param fib_table is the VRF where to install the FIB entry for the BSID
* @param operation is the operation to perform (among the top ones)
* @param segments is a vector of IPv6 address composing the segment list
+ * @param encap_src is a encaps IPv6 source addr. optional.
* @param sl_index is the index of the Segment List to modify/delete
* @param weight is the weight of the sid list. optional.
* @param is_encap Mode. Encapsulation or SRH insertion.
@@ -862,8 +871,8 @@ sr_policy_del (ip6_address_t * bsid, u32 index)
* @return 0 if correct, else error
*/
int
-sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table,
- u8 operation, ip6_address_t * segments, u32 sl_index,
+sr_policy_mod (ip6_address_t *bsid, u32 index, u32 fib_table, u8 operation,
+ ip6_address_t *segments, ip6_address_t *encap_src, u32 sl_index,
u32 weight)
{
ip6_sr_main_t *sm = &sr_main;
@@ -888,8 +897,8 @@ sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table,
if (operation == 1) /* Add SR List to an existing SR policy */
{
/* Create the new SL */
- segment_list =
- create_sl (sr_policy, segments, weight, sr_policy->is_encap);
+ segment_list = create_sl (sr_policy, segments, encap_src, weight,
+ sr_policy->is_encap);
/* Create a new LB DPO */
if (sr_policy->type == SR_POLICY_TYPE_DEFAULT)
@@ -962,7 +971,7 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
int rv = -1;
char is_del = 0, is_add = 0, is_mod = 0;
char policy_set = 0;
- ip6_address_t bsid, next_address;
+ ip6_address_t bsid, next_address, src_v6addr;
u32 sr_policy_index = (u32) ~ 0, sl_index = (u32) ~ 0;
u32 weight = (u32) ~ 0, fib_table = (u32) ~ 0;
ip6_address_t *segments = 0, *this_seg;
@@ -971,6 +980,7 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
u8 type = SR_POLICY_TYPE_DEFAULT;
u16 behavior = 0;
void *ls_plugin_mem = 0;
+ ip6_address_t *encap_src = 0;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
@@ -994,6 +1004,10 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
clib_memcpy_fast (this_seg->as_u8, next_address.as_u8,
sizeof (*this_seg));
}
+ else if (unformat (input, "v6src %U", unformat_ip6_address, &src_v6addr))
+ {
+ encap_src = &src_v6addr;
+ }
else if (unformat (input, "add sl"))
operation = 1;
else if (unformat (input, "del sl index %d", &sl_index))
@@ -1015,12 +1029,10 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
sr_policy_fn_registration_t *plugin = 0, **vec_plugins = 0;
sr_policy_fn_registration_t **plugin_it = 0;
- /* *INDENT-OFF* */
pool_foreach (plugin, sm->policy_plugin_functions)
{
vec_add1 (vec_plugins, plugin);
}
- /* *INDENT-ON* */
vec_foreach (plugin_it, vec_plugins)
{
@@ -1058,8 +1070,8 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (vec_len (segments) == 0)
return clib_error_return (0, "No Segment List specified");
- rv = sr_policy_add (&bsid, segments, weight, type, fib_table, is_encap,
- behavior, ls_plugin_mem);
+ rv = sr_policy_add (&bsid, segments, encap_src, weight, type, fib_table,
+ is_encap, behavior, ls_plugin_mem);
vec_free (segments);
}
@@ -1077,9 +1089,9 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (operation == 3 && weight == (u32) ~ 0)
return clib_error_return (0, "No new weight for the SL specified");
- rv = sr_policy_mod ((sr_policy_index != (u32) ~ 0 ? NULL : &bsid),
+ rv = sr_policy_mod ((sr_policy_index != (u32) ~0 ? NULL : &bsid),
sr_policy_index, fib_table, operation, segments,
- sl_index, weight);
+ encap_src, sl_index, weight);
if (segments)
vec_free (segments);
@@ -1115,7 +1127,6 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (sr_policy_command, static) = {
.path = "sr policy",
.short_help = "sr policy [add||del||mod] [bsid 2001::1||index 5] "
@@ -1135,7 +1146,6 @@ VLIB_CLI_COMMAND (sr_policy_command, static) = {
"SID lists.\n",
.function = sr_policy_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief CLI to display onscreen all the SR policies
@@ -1155,10 +1165,8 @@ show_sr_policies_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "SR policies:");
- /* *INDENT-OFF* */
pool_foreach (sr_policy, sm->sr_policies)
{vec_add1 (vec_policies, sr_policy); }
- /* *INDENT-ON* */
vec_foreach_index (i, vec_policies)
{
@@ -1169,6 +1177,11 @@ show_sr_policies_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "\tBehavior: %s",
(sr_policy->is_encap ? "Encapsulation" :
"SRH insertion"));
+ if (sr_policy->is_encap)
+ {
+ vlib_cli_output (vm, "\tEncapSrcIP: %U", format_ip6_address,
+ &sr_policy->encap_src);
+ }
switch (sr_policy->type)
{
case SR_POLICY_TYPE_SPRAY:
@@ -1205,13 +1218,11 @@ show_sr_policies_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_sr_policies_command, static) = {
.path = "show sr policies",
.short_help = "show sr policies",
.function = show_sr_policies_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief CLI to display onscreen the SR encaps source addr
@@ -1226,13 +1237,11 @@ show_sr_encaps_source_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_sr_encaps_source_command, static) = {
.path = "show sr encaps source addr",
.short_help = "show sr encaps source addr",
.function = show_sr_encaps_source_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief CLI to display onscreen the hop-limit value used for SRv6 encapsulation
@@ -1247,13 +1256,11 @@ show_sr_encaps_hop_limit_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_sr_encaps_hop_limit_command, static) = {
.path = "show sr encaps hop-limit",
.short_help = "show sr encaps hop-limit",
.function = show_sr_encaps_hop_limit_command_fn,
};
-/* *INDENT-ON* */
/*************************** SR rewrite graph node ****************************/
/**
@@ -1293,14 +1300,14 @@ srv6_tef_behavior (vlib_node_runtime_t *node, vlib_buffer_t *b0,
sizeof (ip6_address_t) * (srh->last_entry + 1));
unix_time_now_nsec_fraction (&ts.sec, &ts.nsec);
- srh_pt_tlv->t64.sec = htobe32 (ts.sec);
- srh_pt_tlv->t64.nsec = htobe32 (ts.nsec);
+ srh_pt_tlv->t64.sec = clib_host_to_net_u32 (ts.sec);
+ srh_pt_tlv->t64.nsec = clib_host_to_net_u32 (ts.nsec);
ls = sr_pt_find_iface (vnet_buffer (b0)->sw_if_index[VLIB_RX]);
if (ls)
{
id_ld = ls->id << 4;
id_ld |= ls->ingress_load;
- srh_pt_tlv->id_ld = htobe16 (id_ld);
+ srh_pt_tlv->id_ld = clib_host_to_net_u16 (id_ld);
}
}
@@ -1571,7 +1578,6 @@ sr_policy_rewrite_encaps (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_node) = {
.function = sr_policy_rewrite_encaps,
.name = "sr-pl-rewrite-encaps",
@@ -1587,7 +1593,6 @@ VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/**
* @brief IPv4 encapsulation processing as per RFC2473
@@ -1864,7 +1869,6 @@ sr_policy_rewrite_encaps_v4 (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_v4_node) = {
.function = sr_policy_rewrite_encaps_v4,
.name = "sr-pl-rewrite-encaps-v4",
@@ -1880,7 +1884,6 @@ VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_v4_node) = {
#undef _
},
};
-/* *INDENT-ON* */
always_inline u32
ip_flow_hash (void *data)
@@ -2306,7 +2309,6 @@ sr_policy_rewrite_encaps_l2 (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_l2_node) = {
.function = sr_policy_rewrite_encaps_l2,
.name = "sr-pl-rewrite-encaps-l2",
@@ -2322,7 +2324,6 @@ VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_l2_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/**
* @brief Graph node for applying a SR policy into a packet. SRH insertion.
@@ -2728,7 +2729,6 @@ sr_policy_rewrite_insert (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_policy_rewrite_insert_node) = {
.function = sr_policy_rewrite_insert,
.name = "sr-pl-rewrite-insert",
@@ -2744,7 +2744,6 @@ VLIB_REGISTER_NODE (sr_policy_rewrite_insert_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/**
* @brief Graph node for applying a SR policy into a packet. BSID - SRH insertion.
@@ -3139,7 +3138,6 @@ sr_policy_rewrite_b_insert (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_policy_rewrite_b_insert_node) = {
.function = sr_policy_rewrite_b_insert,
.name = "sr-pl-rewrite-b-insert",
@@ -3155,7 +3153,6 @@ VLIB_REGISTER_NODE (sr_policy_rewrite_b_insert_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/**
* @brief Function BSID encapsulation
@@ -3450,7 +3447,6 @@ sr_policy_rewrite_b_encaps (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_policy_rewrite_b_encaps_node) = {
.function = sr_policy_rewrite_b_encaps,
.name = "sr-pl-rewrite-b-encaps",
@@ -3466,7 +3462,6 @@ VLIB_REGISTER_NODE (sr_policy_rewrite_b_encaps_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/*************************** SR Policy plugins ******************************/
/**
@@ -3534,10 +3529,8 @@ show_sr_policy_behaviors_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "SR Policy behaviors:\n-----------------------\n\n");
- /* *INDENT-OFF* */
pool_foreach (plugin, sm->policy_plugin_functions)
{ vec_add1 (plugins_vec, plugin); }
- /* *INDENT-ON* */
vlib_cli_output (vm, "Plugin behaviors:\n");
for (i = 0; i < vec_len (plugins_vec); i++)
@@ -3550,13 +3543,11 @@ show_sr_policy_behaviors_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_sr_policy_behaviors_command, static) = {
.path = "show sr policy behaviors",
.short_help = "show sr policy behaviors",
.function = show_sr_policy_behaviors_command_fn,
};
-/* *INDENT-ON* */
/*************************** SR Segment Lists DPOs ****************************/
static u8 *
diff --git a/src/vnet/srv6/sr_pt.api b/src/vnet/srv6/sr_pt.api
new file mode 100644
index 00000000000..e86359b421f
--- /dev/null
+++ b/src/vnet/srv6/sr_pt.api
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+option version = "1.0.0";
+
+import "vnet/interface_types.api";
+
+/** \brief SR PT iface dump request
+ @param client_index - opaque cookie to identifty the sender
+ @param context - sender context, to match reply w/ request
+*/
+define sr_pt_iface_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+define sr_pt_iface_details
+{
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ u16 id;
+ u8 ingress_load;
+ u8 egress_load;
+ u8 tts_template;
+};
+
+/** \brief SR PT iface add request
+ @param client_index - opaque cookie to identifty the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - index of the interface to add to SR PT
+ @param id - SR PT interface id
+ @param ingress_load - incoming interface load
+ @param egress_load - outgoing interface load
+ @param tts_template - truncated timestamp template to use
+*/
+autoreply define sr_pt_iface_add
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ u16 id;
+ u8 ingress_load;
+ u8 egress_load;
+ u8 tts_template;
+};
+
+/** \brief SR PT iface del request
+ @param client_index - opaque cookie to identifty the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - index of the interface to delete from SR PT
+*/
+autoreply define sr_pt_iface_del
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+}; \ No newline at end of file
diff --git a/src/vnet/srv6/sr_pt.c b/src/vnet/srv6/sr_pt.c
index 5d907eaf639..6299faa84ab 100644
--- a/src/vnet/srv6/sr_pt.c
+++ b/src/vnet/srv6/sr_pt.c
@@ -69,6 +69,8 @@ sr_pt_add_iface (u32 iface, u16 id, u8 ingress_load, u8 egress_load,
if (tts_template > SR_PT_TTS_TEMPLATE_MAX)
return SR_PT_ERR_TTS_TEMPLATE_INVALID;
+ vnet_feature_enable_disable ("ip6-output", "pt", iface, 1, 0, 0);
+
/* Create a new sr_pt_iface */
pool_get_zero (sr_pt->sr_pt_iface, ls);
ls->iface = iface;
@@ -101,6 +103,7 @@ sr_pt_del_iface (u32 iface)
{
/* Retrieve sr_pt_iface */
ls = pool_elt_at_index (sr_pt->sr_pt_iface, p[0]);
+ vnet_feature_enable_disable ("ip6-output", "pt", iface, 0, 0, 0);
/* Delete sr_pt_iface */
pool_put (sr_pt->sr_pt_iface, ls);
mhash_unset (&sr_pt->sr_pt_iface_index_hash, &iface, NULL);
diff --git a/src/vnet/srv6/sr_pt.h b/src/vnet/srv6/sr_pt.h
index 87fdb68a36e..53001e10ac7 100644
--- a/src/vnet/srv6/sr_pt.h
+++ b/src/vnet/srv6/sr_pt.h
@@ -11,6 +11,8 @@
#ifndef included_vnet_sr_pt_h
#define included_vnet_sr_pt_h
+#define IP6_HBH_PT_TYPE 50
+
/*SR PT error codes*/
#define SR_PT_ERR_NOENT -1 /* No such entry*/
#define SR_PT_ERR_EXIST -2 /* Entry exists */
@@ -37,6 +39,11 @@
#define SR_PT_TTS_SHIFT_TEMPLATE_2 16
#define SR_PT_TTS_SHIFT_TEMPLATE_3 20
+/*PT node behaviors*/
+#define PT_BEHAVIOR_SRC 0
+#define PT_BEHAVIOR_MID 1
+#define PT_BEHAVIOR_SNK 2
+
typedef struct
{
u32 iface; /**< Interface */
@@ -46,6 +53,17 @@ typedef struct
u8 tts_template; /**< Interface TTS Template */
} sr_pt_iface_t;
+typedef struct
+{
+ u16 oif_oil;
+ u8 tts;
+} __clib_packed sr_pt_cmd_t;
+
+typedef struct
+{
+ sr_pt_cmd_t cmd_stack[12];
+} __clib_packed ip6_hop_by_hop_option_pt_t;
+
/**
* @brief SR Path Tracing main datastructure
*/
@@ -57,9 +75,12 @@ typedef struct
/* Hash table for sr_pt_iface parameters */
mhash_t sr_pt_iface_index_hash;
+ /* convenience */
+ u16 msg_id_base;
} sr_pt_main_t;
extern sr_pt_main_t sr_pt_main;
+extern vlib_node_registration_t sr_pt_node;
extern int sr_pt_add_iface (u32 iface, u16 id, u8 ingress_load, u8 egress_load,
u8 tts_template);
extern int sr_pt_del_iface (u32 iface);
diff --git a/src/vnet/srv6/sr_pt_api.c b/src/vnet/srv6/sr_pt_api.c
new file mode 100644
index 00000000000..b0b67a210fb
--- /dev/null
+++ b/src/vnet/srv6/sr_pt_api.c
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+#include <vnet/srv6/sr_pt.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+
+#include <vnet/srv6/sr_pt.api_enum.h>
+#include <vnet/srv6/sr_pt.api_types.h>
+
+#define REPLY_MSG_ID_BASE sr_pt_main.msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+static void
+send_sr_pt_iface_details (sr_pt_iface_t *t, vl_api_registration_t *reg,
+ u32 context)
+{
+ vl_api_sr_pt_iface_details_t *rmp;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ clib_memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_SR_PT_IFACE_DETAILS);
+
+ rmp->sw_if_index = ntohl (t->iface);
+ rmp->id = ntohs (t->id);
+ rmp->ingress_load = t->ingress_load;
+ rmp->egress_load = t->egress_load;
+ rmp->tts_template = t->tts_template;
+
+ rmp->context = context;
+
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
+vl_api_sr_pt_iface_dump_t_handler (vl_api_sr_pt_iface_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+ sr_pt_main_t *pt = &sr_pt_main;
+ sr_pt_iface_t *t;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ pool_foreach (t, pt->sr_pt_iface)
+ {
+ send_sr_pt_iface_details (t, reg, mp->context);
+ }
+}
+
+static void
+vl_api_sr_pt_iface_add_t_handler (vl_api_sr_pt_iface_add_t *mp)
+{
+ vl_api_sr_pt_iface_add_reply_t *rmp;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = sr_pt_add_iface (ntohl (mp->sw_if_index), ntohs (mp->id),
+ mp->ingress_load, mp->egress_load, mp->tts_template);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_SR_PT_IFACE_ADD_REPLY);
+}
+
+static void
+vl_api_sr_pt_iface_del_t_handler (vl_api_sr_pt_iface_del_t *mp)
+{
+ vl_api_sr_pt_iface_del_reply_t *rmp;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = sr_pt_del_iface (ntohl (mp->sw_if_index));
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_SR_PT_IFACE_DEL_REPLY);
+}
+
+#include <vnet/srv6/sr_pt.api.c>
+static clib_error_t *
+sr_pt_api_hookup (vlib_main_t *vm)
+{
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ REPLY_MSG_ID_BASE = setup_message_id_table ();
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (sr_pt_api_hookup); \ No newline at end of file
diff --git a/src/vnet/srv6/sr_pt_node.c b/src/vnet/srv6/sr_pt_node.c
new file mode 100644
index 00000000000..fa8b1f69b57
--- /dev/null
+++ b/src/vnet/srv6/sr_pt_node.c
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/srv6/sr.h>
+#include <vnet/srv6/sr_pt.h>
+
+/**
+ * @brief PT node trace
+ */
+typedef struct
+{
+ u32 iface;
+ u16 id;
+ u8 load;
+ timestamp_64_t t64;
+ u8 tts_template;
+ u8 tts;
+ u8 behavior;
+} pt_trace_t;
+
+static u8 *
+format_pt_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ pt_trace_t *t = va_arg (*args, pt_trace_t *);
+ switch (t->behavior)
+ {
+ case PT_BEHAVIOR_MID:
+ s = format (
+ s,
+ "Behavior Midpoint, outgoing interface %U, outgoing interface id %u, "
+ "outgoing interface load %u, t64_sec %u, t64_nsec %u, tts_template "
+ "%u, tts %u",
+ format_vnet_sw_if_index_name, vnet_get_main (), t->iface, t->id,
+ t->load, clib_host_to_net_u32 (t->t64.sec),
+ clib_host_to_net_u32 (t->t64.nsec), t->tts_template, t->tts);
+ break;
+ default:
+ break;
+ }
+ return s;
+}
+
+static_always_inline void
+pt_midpoint_processing (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_buffer_t *b0, ip6_header_t *ip0,
+ sr_pt_iface_t *ls, timestamp_64_t t64)
+{
+ ip6_hop_by_hop_header_t *hbh;
+ ip6_hop_by_hop_option_t *hbh_opt;
+ ip6_hop_by_hop_option_pt_t *hbh_opt_pt;
+
+ if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ {
+ hbh = (void *) (ip0 + 1);
+ hbh_opt = (void *) (hbh + 1);
+ if (hbh_opt->type == IP6_HBH_PT_TYPE)
+ {
+ hbh_opt_pt = (void *) (hbh_opt + 1);
+ clib_memcpy_fast (&hbh_opt_pt->cmd_stack[1],
+ &hbh_opt_pt->cmd_stack[0], 33);
+ hbh_opt_pt->cmd_stack[0].oif_oil =
+ clib_host_to_net_u16 (ls->id << 4);
+ hbh_opt_pt->cmd_stack[0].oif_oil |= ls->egress_load;
+ switch (ls->tts_template)
+ {
+ case SR_PT_TTS_TEMPLATE_0:
+ hbh_opt_pt->cmd_stack[0].tts =
+ t64.nsec >> SR_PT_TTS_SHIFT_TEMPLATE_0;
+ break;
+ case SR_PT_TTS_TEMPLATE_1:
+ hbh_opt_pt->cmd_stack[0].tts =
+ t64.nsec >> SR_PT_TTS_SHIFT_TEMPLATE_1;
+ break;
+ case SR_PT_TTS_TEMPLATE_2:
+ hbh_opt_pt->cmd_stack[0].tts =
+ t64.nsec >> SR_PT_TTS_SHIFT_TEMPLATE_2;
+ break;
+ case SR_PT_TTS_TEMPLATE_3:
+ hbh_opt_pt->cmd_stack[0].tts =
+ t64.nsec >> SR_PT_TTS_SHIFT_TEMPLATE_0;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ return;
+}
+
+VLIB_NODE_FN (sr_pt_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ next_index = node->cached_next_index;
+ u8 pt_behavior = ~(u8) 0;
+ sr_pt_iface_t *ls = 0;
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ // Getting the timestamp (one for each batch of packets)
+ timestamp_64_t t64 = {};
+ unix_time_now_nsec_fraction (&t64.sec, &t64.nsec);
+
+ // Single loop for potentially the last three packets
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ u32 iface;
+ vlib_buffer_t *b0;
+ u32 next0 = 0;
+ ethernet_header_t *en0;
+ ip6_header_t *ip0 = 0;
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ iface = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ ls = sr_pt_find_iface (iface);
+ if (ls)
+ {
+ en0 = vlib_buffer_get_current (b0);
+ ip0 = (void *) (en0 + 1);
+ pt_midpoint_processing (vm, node, b0, ip0, ls, t64);
+ pt_behavior = PT_BEHAVIOR_MID;
+ }
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ pt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->iface = iface;
+ tr->id = ls->id;
+ tr->load = ls->egress_load;
+ tr->tts_template = ls->tts_template;
+ tr->t64.sec = t64.sec;
+ tr->t64.nsec = t64.nsec;
+ tr->tts = t64.nsec >> 20;
+ tr->behavior = pt_behavior;
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (sr_pt_node) = {
+ .name = "pt",
+ .vector_size = sizeof (u32),
+ .format_trace = format_pt_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = 0,
+ .n_next_nodes = 1,
+ .next_nodes = { [0] = "interface-output" },
+};
+
+VNET_FEATURE_INIT (sr_pt_node, static) = {
+ .arc_name = "ip6-output",
+ .node_name = "pt",
+}; \ No newline at end of file
diff --git a/src/vnet/srv6/sr_steering.c b/src/vnet/srv6/sr_steering.c
index 6e5c5e0e9f0..94c3d67a27a 100644
--- a/src/vnet/srv6/sr_steering.c
+++ b/src/vnet/srv6/sr_steering.c
@@ -456,7 +456,6 @@ sr_steer_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (sr_steer_policy_command, static) = {
.path = "sr steer",
.short_help = "sr steer (del) [l3 <ip_addr/mask>|l2 <sf_if>] "
@@ -471,7 +470,6 @@ VLIB_CLI_COMMAND (sr_steer_policy_command, static) = {
"\t\tsr steer del l3 2001::/64 via sr_policy index 5\n",
.function = sr_steer_policy_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_sr_steering_policies_command_fn (vlib_main_t * vm,
@@ -488,9 +486,7 @@ show_sr_steering_policies_command_fn (vlib_main_t * vm,
int i;
vlib_cli_output (vm, "SR steering policies:");
- /* *INDENT-OFF* */
pool_foreach (steer_pl, sm->steer_policies) {vec_add1(steer_policies, steer_pl);}
- /* *INDENT-ON* */
vlib_cli_output (vm, "Traffic\t\tSR policy BSID");
for (i = 0; i < vec_len (steer_policies); i++)
{
@@ -523,13 +519,11 @@ show_sr_steering_policies_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_sr_steering_policies_command, static) = {
.path = "show sr steering-policies",
.short_help = "show sr steering-policies",
.function = show_sr_steering_policies_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
sr_steering_init (vlib_main_t * vm)
@@ -547,18 +541,14 @@ sr_steering_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (sr_steering_init);
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (sr_pl_rewrite_encaps_l2, static) =
{
.arc_name = "device-input",
.node_name = "sr-pl-rewrite-encaps-l2",
.runs_before = VNET_FEATURES ("ethernet-input"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/srv6/sr_test.c b/src/vnet/srv6/sr_test.c
index 85f64e1e230..be898599e96 100644
--- a/src/vnet/srv6/sr_test.c
+++ b/src/vnet/srv6/sr_test.c
@@ -80,6 +80,18 @@ api_sr_policy_add (vat_main_t *vam)
}
static int
+api_sr_policy_mod_v2 (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sr_policy_add_v2 (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
api_sr_localsids_dump (vat_main_t *vam)
{
return -1;
@@ -92,6 +104,12 @@ api_sr_policies_dump (vat_main_t *vam)
}
static int
+api_sr_policies_v2_dump (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
api_sr_policies_with_sl_index_dump (vat_main_t *vam)
{
return -1;
@@ -109,6 +127,11 @@ vl_api_sr_policies_details_t_handler (vl_api_sr_policies_details_t *mp)
}
static void
+vl_api_sr_policies_v2_details_t_handler (vl_api_sr_policies_v2_details_t *mp)
+{
+}
+
+static void
vl_api_sr_localsids_details_t_handler (vl_api_sr_localsids_details_t *mp)
{
}
diff --git a/src/vnet/syslog/syslog.c b/src/vnet/syslog/syslog.c
index 8f3313950e8..caa55830eb3 100644
--- a/src/vnet/syslog/syslog.c
+++ b/src/vnet/syslog/syslog.c
@@ -506,7 +506,6 @@ show_syslog_filter_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
/*?
* Set syslog sender configuration.
*
@@ -599,7 +598,6 @@ VLIB_CLI_COMMAND (show_syslog_filter_command, static) = {
.short_help = "show syslog filter",
.function = show_syslog_filter_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
syslog_init (vlib_main_t * vm)
diff --git a/src/vnet/syslog/syslog_api.c b/src/vnet/syslog/syslog_api.c
index 21e79c6e2bd..195a6e52eef 100644
--- a/src/vnet/syslog/syslog_api.c
+++ b/src/vnet/syslog/syslog_api.c
@@ -128,7 +128,6 @@ vl_api_syslog_get_sender_t_handler (vl_api_syslog_get_sender_t * mp)
syslog_main_t *sm = &syslog_main;
u32 vrf_id;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_SYSLOG_GET_SENDER_REPLY,
({
clib_memcpy (&rmp->collector_address, &(sm->collector),
@@ -143,7 +142,6 @@ vl_api_syslog_get_sender_t_handler (vl_api_syslog_get_sender_t * mp)
rmp->vrf_id = vrf_id;
rmp->max_msg_size = htonl (sm->max_msg_size);
}))
- /* *INDENT-ON* */
}
static void
@@ -171,12 +169,10 @@ vl_api_syslog_get_filter_t_handler (vl_api_syslog_get_filter_t * mp)
vl_api_syslog_get_filter_reply_t *rmp;
syslog_main_t *sm = &syslog_main;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_SYSLOG_GET_FILTER_REPLY,
({
rv = syslog_severity_encode (sm->severity_filter, &rmp->severity);
}))
- /* *INDENT-ON* */
}
#include <vnet/syslog/syslog.api.c>
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index 09913fa1242..efc72a227e8 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -25,6 +25,8 @@
#include <vnet/dpo/load_balance.h>
#include <math.h>
+#include <vlib/stats/stats.h>
+
tcp_main_t tcp_main;
typedef struct
@@ -188,8 +190,7 @@ tcp_session_get_listener (u32 listener_index)
static tcp_connection_t *
tcp_half_open_connection_alloc (void)
{
- ASSERT (vlib_get_thread_index () == 0);
- return tcp_connection_alloc (0);
+ return tcp_connection_alloc (transport_cl_thread ());
}
/**
@@ -199,7 +200,8 @@ tcp_half_open_connection_alloc (void)
static void
tcp_half_open_connection_free (tcp_connection_t * tc)
{
- ASSERT (vlib_get_thread_index () == 0);
+ ASSERT (vlib_get_thread_index () == tc->c_thread_index ||
+ vlib_thread_is_main_w_barrier ());
return tcp_connection_free (tc);
}
@@ -240,8 +242,8 @@ tcp_connection_cleanup (tcp_connection_t * tc)
/* Cleanup local endpoint if this was an active connect */
if (!(tc->cfg_flags & TCP_CFG_F_NO_ENDPOINT))
- transport_endpoint_cleanup (TRANSPORT_PROTO_TCP, &tc->c_lcl_ip,
- tc->c_lcl_port);
+ transport_release_local_endpoint (TRANSPORT_PROTO_TCP, &tc->c_lcl_ip,
+ tc->c_lcl_port);
/* Check if connection is not yet fully established */
if (tc->state == TCP_STATE_SYN_SENT)
@@ -408,8 +410,8 @@ tcp_connection_close (tcp_connection_t * tc)
case TCP_STATE_CLOSE_WAIT:
if (!transport_max_tx_dequeue (&tc->connection))
{
- tcp_send_fin (tc);
tcp_connection_timers_reset (tc);
+ tcp_send_fin (tc);
tcp_connection_set_state (tc, TCP_STATE_LAST_ACK);
tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
tcp_cfg.lastack_time);
@@ -489,6 +491,14 @@ tcp_session_reset (u32 conn_index, u32 thread_index)
{
tcp_connection_t *tc;
tc = tcp_connection_get (conn_index, thread_index);
+
+ /* For half-opens just cleanup */
+ if (tc->state == TCP_STATE_SYN_SENT)
+ {
+ tcp_connection_cleanup (tc);
+ return;
+ }
+
tcp_send_reset (tc);
tcp_connection_timers_reset (tc);
tcp_cong_recovery_off (tc);
@@ -764,11 +774,13 @@ tcp_connection_init_vars (tcp_connection_t * tc)
}
static int
-tcp_alloc_custom_local_endpoint (tcp_main_t * tm, ip46_address_t * lcl_addr,
- u16 * lcl_port, u8 is_ip4)
+tcp_alloc_custom_local_endpoint (ip46_address_t *lcl_addr, u16 *lcl_port,
+ transport_endpoint_cfg_t *rmt)
{
+ tcp_main_t *tm = vnet_get_tcp_main ();
int index, port;
- if (is_ip4)
+
+ if (rmt->is_ip4)
{
index = tm->last_v4_addr_rotor++;
if (tm->last_v4_addr_rotor >= vec_len (tcp_cfg.ip4_src_addrs))
@@ -784,7 +796,7 @@ tcp_alloc_custom_local_endpoint (tcp_main_t * tm, ip46_address_t * lcl_addr,
clib_memcpy_fast (&lcl_addr->ip6, &tcp_cfg.ip6_src_addrs[index],
sizeof (ip6_address_t));
}
- port = transport_alloc_local_port (TRANSPORT_PROTO_TCP, lcl_addr);
+ port = transport_alloc_local_port (TRANSPORT_PROTO_TCP, lcl_addr, rmt);
if (port < 1)
return SESSION_E_NOPORT;
*lcl_port = port;
@@ -794,7 +806,6 @@ tcp_alloc_custom_local_endpoint (tcp_main_t * tm, ip46_address_t * lcl_addr,
static int
tcp_session_open (transport_endpoint_cfg_t * rmt)
{
- tcp_main_t *tm = vnet_get_tcp_main ();
tcp_connection_t *tc;
ip46_address_t lcl_addr;
u16 lcl_port;
@@ -805,27 +816,13 @@ tcp_session_open (transport_endpoint_cfg_t * rmt)
*/
if ((rmt->is_ip4 && vec_len (tcp_cfg.ip4_src_addrs))
|| (!rmt->is_ip4 && vec_len (tcp_cfg.ip6_src_addrs)))
- rv = tcp_alloc_custom_local_endpoint (tm, &lcl_addr, &lcl_port,
- rmt->is_ip4);
+ rv = tcp_alloc_custom_local_endpoint (&lcl_addr, &lcl_port, rmt);
else
- rv = transport_alloc_local_endpoint (TRANSPORT_PROTO_TCP,
- rmt, &lcl_addr, &lcl_port);
+ rv = transport_alloc_local_endpoint (TRANSPORT_PROTO_TCP, rmt, &lcl_addr,
+ &lcl_port);
if (rv)
- {
- if (rv != SESSION_E_PORTINUSE)
- return rv;
-
- if (session_lookup_connection (rmt->fib_index, &lcl_addr, &rmt->ip,
- lcl_port, rmt->port, TRANSPORT_PROTO_TCP,
- rmt->is_ip4))
- return SESSION_E_PORTINUSE;
-
- /* 5-tuple is available so increase lcl endpoint refcount and proceed
- * with connection allocation */
- transport_share_local_endpoint (TRANSPORT_PROTO_TCP, &lcl_addr,
- lcl_port);
- }
+ return rv;
/*
* Create connection and send SYN
@@ -834,7 +831,7 @@ tcp_session_open (transport_endpoint_cfg_t * rmt)
ip_copy (&tc->c_rmt_ip, &rmt->ip, rmt->is_ip4);
ip_copy (&tc->c_lcl_ip, &lcl_addr, rmt->is_ip4);
tc->c_rmt_port = rmt->port;
- tc->c_lcl_port = clib_host_to_net_u16 (lcl_port);
+ tc->c_lcl_port = lcl_port;
tc->c_is_ip4 = rmt->is_ip4;
tc->c_proto = TRANSPORT_PROTO_TCP;
tc->c_fib_index = rmt->fib_index;
@@ -1226,7 +1223,6 @@ tcp_timer_waitclose_handler (tcp_connection_t * tc)
}
}
-/* *INDENT-OFF* */
static timer_expiration_handler *timer_expiration_handlers[TCP_N_TIMERS] =
{
tcp_timer_retransmit_handler,
@@ -1234,7 +1230,6 @@ static timer_expiration_handler *timer_expiration_handlers[TCP_N_TIMERS] =
tcp_timer_waitclose_handler,
tcp_timer_retransmit_syn_handler,
};
-/* *INDENT-ON* */
static void
tcp_dispatch_pending_timers (tcp_worker_ctx_t * wrk)
@@ -1342,7 +1337,6 @@ tcp_session_app_rx_evt (transport_connection_t *conn)
return 0;
}
-/* *INDENT-OFF* */
const static transport_proto_vft_t tcp_proto = {
.enable = vnet_tcp_enable_disable,
.start_listen = tcp_session_bind,
@@ -1373,7 +1367,6 @@ const static transport_proto_vft_t tcp_proto = {
.service_type = TRANSPORT_SERVICE_VC,
},
};
-/* *INDENT-ON* */
void
tcp_connection_tx_pacer_update (tcp_connection_t * tc)
@@ -1463,6 +1456,51 @@ tcp_initialize_iss_seed (tcp_main_t * tm)
tm->iss_seed.second = random_u64 (&time_now);
}
+static void
+tcp_stats_collector_fn (vlib_stats_collector_data_t *d)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ counter_t **counters = d->entry->data;
+ counter_t *cb = counters[0];
+ tcp_wrk_stats_t acc = {};
+ tcp_worker_ctx_t *wrk;
+
+ vec_foreach (wrk, tm->wrk_ctx)
+ {
+#define _(name, type, str) acc.name += wrk->stats.name;
+ foreach_tcp_wrk_stat
+#undef _
+ }
+
+#define _(name, type, str) cb[TCP_STAT_##name] = acc.name;
+ foreach_tcp_wrk_stat
+#undef _
+}
+
+static void
+tcp_counters_init (tcp_main_t *tm)
+{
+ vlib_stats_collector_reg_t r = {};
+ u32 idx;
+
+ if (tm->counters_init)
+ return;
+
+ r.entry_index = idx = vlib_stats_add_counter_vector ("/sys/tcp");
+ r.collect_fn = tcp_stats_collector_fn;
+ vlib_stats_validate (idx, 0, TCP_STAT_no_buffer);
+
+#define _(name, type, str) \
+ vlib_stats_add_symlink (idx, TCP_STAT_##name, "/sys/tcp/%s", \
+ CLIB_STRING_MACRO (name));
+ foreach_tcp_wrk_stat
+#undef _
+
+ vlib_stats_register_collector_fn (&r);
+
+ tm->counters_init = 1;
+}
+
static clib_error_t *
tcp_main_enable (vlib_main_t * vm)
{
@@ -1539,10 +1577,8 @@ tcp_main_enable (vlib_main_t * vm)
tm->bytes_per_buffer = vlib_buffer_get_default_data_size (vm);
tm->cc_last_type = TCP_CC_LAST;
- tm->ipl_next_node[0] = vlib_node_get_next (vm, session_queue_node.index,
- ip4_lookup_node.index);
- tm->ipl_next_node[1] = vlib_node_get_next (vm, session_queue_node.index,
- ip6_lookup_node.index);
+ tcp_counters_init (tm);
+
return error;
}
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index c5dd3172302..2362a8bb857 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -66,6 +66,13 @@ typedef struct tcp_wrk_stats_
#undef _
} tcp_wrk_stats_t;
+typedef enum
+{
+#define _(name, type, str) TCP_STAT_##name,
+ foreach_tcp_wrk_stat
+#undef _
+} tcp_wrk_stats_e;
+
typedef struct tcp_free_req_
{
clib_time_type_t free_time;
@@ -215,9 +222,6 @@ typedef struct _tcp_main
/** vlib buffer size */
u32 bytes_per_buffer;
- /** Session layer edge indices to ip lookup (syns, rst) */
- u32 ipl_next_node[2];
-
/** Dispatch table by state and flags */
tcp_lookup_dispatch_t dispatch_table[TCP_N_STATES][64];
@@ -236,6 +240,9 @@ typedef struct _tcp_main
/** Flag that indicates if stack is on or off */
u8 is_enabled;
+ /** Set if counters on stats segment initialized */
+ u8 counters_init;
+
/** Flag that indicates if v4 punting is enabled */
u8 punt_unknown4;
@@ -268,6 +275,10 @@ extern vlib_node_registration_t tcp4_rcv_process_node;
extern vlib_node_registration_t tcp6_rcv_process_node;
extern vlib_node_registration_t tcp4_listen_node;
extern vlib_node_registration_t tcp6_listen_node;
+extern vlib_node_registration_t tcp4_input_nolookup_node;
+extern vlib_node_registration_t tcp6_input_nolookup_node;
+extern vlib_node_registration_t tcp4_drop_node;
+extern vlib_node_registration_t tcp6_drop_node;
#define tcp_cfg tcp_main.cfg
#define tcp_node_index(node_id, is_ip4) \
diff --git a/src/vnet/tcp/tcp_bt.c b/src/vnet/tcp/tcp_bt.c
index 67e9a14ceda..3cb57a550de 100644
--- a/src/vnet/tcp/tcp_bt.c
+++ b/src/vnet/tcp/tcp_bt.c
@@ -638,11 +638,9 @@ tcp_bt_flush_samples (tcp_connection_t * tc)
vec_validate (samples, pool_elts (bt->samples) - 1);
vec_reset_length (samples);
- /* *INDENT-OFF* */
pool_foreach (bts, bt->samples) {
vec_add1 (samples, bts - bt->samples);
}
- /* *INDENT-ON* */
vec_foreach (si, samples)
{
diff --git a/src/vnet/tcp/tcp_cli.c b/src/vnet/tcp/tcp_cli.c
index f7d26ff79d0..b04c0bdc0cf 100644
--- a/src/vnet/tcp/tcp_cli.c
+++ b/src/vnet/tcp/tcp_cli.c
@@ -613,14 +613,12 @@ tcp_src_address_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tcp_src_address_command, static) =
{
.path = "tcp src-address",
.short_help = "tcp src-address <ip-addr> [- <ip-addr>] add src address range",
.function = tcp_src_address_fn,
};
-/* *INDENT-ON* */
static u8 *
tcp_scoreboard_dump_trace (u8 * s, sack_scoreboard_t * sb)
@@ -676,14 +674,12 @@ tcp_show_scoreboard_trace_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tcp_show_scoreboard_trace_command, static) =
{
.path = "show tcp scoreboard trace",
.short_help = "show tcp scoreboard trace <connection>",
.function = tcp_show_scoreboard_trace_fn,
};
-/* *INDENT-ON* */
u8 *
tcp_scoreboard_replay (u8 * s, tcp_connection_t * tc, u8 verbose)
@@ -801,14 +797,12 @@ tcp_scoreboard_trace_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tcp_replay_scoreboard_command, static) =
{
.path = "tcp replay scoreboard",
.short_help = "tcp replay scoreboard <connection>",
.function = tcp_scoreboard_trace_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_tcp_punt_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -824,14 +818,12 @@ show_tcp_punt_fn (vlib_main_t * vm, unformat_input_t * input,
tm->punt_unknown6 ? "enabled" : "disabled");
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_tcp_punt_command, static) =
{
.path = "show tcp punt",
.short_help = "show tcp punt",
.function = show_tcp_punt_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -863,14 +855,12 @@ show_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_tcp_stats_command, static) =
{
.path = "show tcp stats",
.short_help = "show tcp stats",
.function = show_tcp_stats_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
clear_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -893,14 +883,12 @@ clear_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_tcp_stats_command, static) =
{
.path = "clear tcp stats",
.short_help = "clear tcp stats",
.function = clear_tcp_stats_fn,
};
-/* *INDENT-ON* */
uword
unformat_tcp_cc_algo (unformat_input_t * input, va_list * va)
diff --git a/src/vnet/tcp/tcp_debug.c b/src/vnet/tcp/tcp_debug.c
index 309b6951559..ab466f30efb 100644
--- a/src/vnet/tcp/tcp_debug.c
+++ b/src/vnet/tcp/tcp_debug.c
@@ -134,14 +134,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tcp_debug_command, static) =
{
.path = "tcp debug",
.short_help = "tcp [show] [debug group <N> level <N>]",
.function = tcp_debug_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h
index 1202f7f44d3..04e921cd601 100644
--- a/src/vnet/tcp/tcp_debug.h
+++ b/src/vnet/tcp/tcp_debug.h
@@ -17,13 +17,18 @@
#define SRC_VNET_TCP_TCP_DEBUG_H_
#include <vlib/vlib.h>
+#include <vpp/vnet/config.h>
/**
* Build debugging infra unconditionally. Debug components controlled via
* debug configuration. Comes with some overhead so it's not recommended for
* production/performance scenarios. Takes priority over TCP_DEBUG_ENABLE.
*/
+#ifdef VPP_TCP_DEBUG_ALWAYS
+#define TCP_DEBUG_ALWAYS (1)
+#else
#define TCP_DEBUG_ALWAYS (0)
+#endif
/**
* Build debugging infra only if enabled. Debug components controlled via
* macros that follow.
@@ -867,11 +872,12 @@ if (TCP_DEBUG_CC > 1) \
*/
#if TCP_DEBUG_CS || TCP_DEBUG_ALWAYS
-#define STATS_INTERVAL 1
+#define STATS_INTERVAL 0.001
-#define tcp_cc_time_to_print_stats(_tc) \
- _tc->c_cc_stat_tstamp + STATS_INTERVAL < tcp_time_now() \
- || tcp_in_fastrecovery (_tc) \
+#define tcp_cc_time_to_print_stats(_tc) \
+ _tc->c_cc_stat_tstamp + STATS_INTERVAL < \
+ tcp_time_now_us (_tc->c_thread_index) || \
+ tcp_in_fastrecovery (_tc)
#define TCP_EVT_CC_RTO_STAT_PRINT(_tc) \
{ \
@@ -887,14 +893,14 @@ if (TCP_DEBUG_CC > 1) \
ed->data[3] = _tc->rttvar; \
}
-#define TCP_EVT_CC_RTO_STAT_HANDLER(_tc, ...) \
-{ \
-if (tcp_cc_time_to_print_stats (_tc)) \
-{ \
- TCP_EVT_CC_RTO_STAT_PRINT (_tc); \
- _tc->c_cc_stat_tstamp = tcp_time_now (); \
-} \
-}
+#define TCP_EVT_CC_RTO_STAT_HANDLER(_tc, ...) \
+ { \
+ if (tcp_cc_time_to_print_stats (_tc)) \
+ { \
+ TCP_EVT_CC_RTO_STAT_PRINT (_tc); \
+ _tc->c_cc_stat_tstamp = tcp_time_now_us (_tc->c_thread_index); \
+ } \
+ }
#define TCP_EVT_CC_SND_STAT_PRINT(_tc) \
{ \
@@ -911,14 +917,14 @@ if (tcp_cc_time_to_print_stats (_tc)) \
ed->data[3] = _tc->snd_rxt_bytes; \
}
-#define TCP_EVT_CC_SND_STAT_HANDLER(_tc, ...) \
-{ \
-if (tcp_cc_time_to_print_stats (_tc)) \
-{ \
- TCP_EVT_CC_SND_STAT_PRINT(_tc); \
- _tc->c_cc_stat_tstamp = tcp_time_now (); \
-} \
-}
+#define TCP_EVT_CC_SND_STAT_HANDLER(_tc, ...) \
+ { \
+ if (tcp_cc_time_to_print_stats (_tc)) \
+ { \
+ TCP_EVT_CC_SND_STAT_PRINT (_tc); \
+ _tc->c_cc_stat_tstamp = tcp_time_now_us (_tc->c_thread_index); \
+ } \
+ }
#define TCP_EVT_CC_STAT_PRINT(_tc) \
{ \
@@ -937,14 +943,14 @@ if (tcp_cc_time_to_print_stats (_tc)) \
TCP_EVT_CC_SND_STAT_PRINT (_tc); \
}
-#define TCP_EVT_CC_STAT_HANDLER(_tc, ...) \
-{ \
-if (tcp_cc_time_to_print_stats (_tc)) \
-{ \
- TCP_EVT_CC_STAT_PRINT (_tc); \
- _tc->c_cc_stat_tstamp = tcp_time_now(); \
-} \
-}
+#define TCP_EVT_CC_STAT_HANDLER(_tc, ...) \
+ { \
+ if (tcp_cc_time_to_print_stats (_tc)) \
+ { \
+ TCP_EVT_CC_STAT_PRINT (_tc); \
+ _tc->c_cc_stat_tstamp = tcp_time_now_us (_tc->c_thread_index); \
+ } \
+ }
#else
#define TCP_EVT_CC_STAT_HANDLER(_tc, ...)
#define TCP_EVT_CC_STAT_PRINT(_tc)
diff --git a/src/vnet/tcp/tcp_error.def b/src/vnet/tcp/tcp_error.def
index a6f0ce4b35f..87fdcc02615 100644
--- a/src/vnet/tcp/tcp_error.def
+++ b/src/vnet/tcp/tcp_error.def
@@ -49,3 +49,4 @@ tcp_error (RCV_WND, rcv_wnd, WARN, "Segment not in receive window")
tcp_error (FIN_RCVD, fin_rcvd, INFO, "FINs received")
tcp_error (LINK_LOCAL_RW, link_local_rw, ERROR, "No rewrite for link local connection")
tcp_error (ZERO_RWND, zero_rwnd, WARN, "Zero receive window")
+tcp_error (CONN_ACCEPTED, conn_accepted, INFO, "Connections accepted") \ No newline at end of file
diff --git a/src/vnet/tcp/tcp_format.c b/src/vnet/tcp/tcp_format.c
index a3245f2046a..4674f2cbaed 100644
--- a/src/vnet/tcp/tcp_format.c
+++ b/src/vnet/tcp/tcp_format.c
@@ -52,12 +52,68 @@ format_tcp_flags (u8 * s, va_list * args)
return s;
}
+u8 *
+format_tcp_options (u8 *s, va_list *args)
+{
+ tcp_options_t *opts = va_arg (*args, tcp_options_t *);
+ u32 indent, n_opts = 0;
+ int i;
+
+ if (!opts->flags)
+ return s;
+
+ indent = format_get_indent (s);
+ indent += 2;
+
+ s = format (s, "options:\n%U", format_white_space, indent);
+
+ if (tcp_opts_mss (opts))
+ {
+ s = format (s, "mss %d", opts->mss);
+ n_opts++;
+ }
+ if (tcp_opts_wscale (opts))
+ {
+ s = format (s, "%swindow scale %d", n_opts > 0 ? ", " : "",
+ format_white_space, indent, opts->wscale);
+ n_opts++;
+ }
+ if (tcp_opts_tstamp (opts))
+ {
+ s = format (s, "%stimestamp %d, echo/reflected timestamp",
+ n_opts > 0 ? ", " : "", format_white_space, indent,
+ opts->tsval, opts->tsecr);
+ n_opts++;
+ }
+ if (tcp_opts_sack_permitted (opts))
+ {
+ s = format (s, "%ssack permitted", n_opts > 0 ? ", " : "",
+ format_white_space, indent);
+ n_opts++;
+ }
+ if (tcp_opts_sack (opts))
+ {
+ s = format (s, "%ssacks:", n_opts > 0 ? ", " : "", format_white_space,
+ indent);
+ for (i = 0; i < opts->n_sack_blocks; ++i)
+ {
+ s = format (s, "\n%Ublock %d: start %d, end %d", format_white_space,
+ indent + 2, i + 1, opts->sacks[i].start,
+ opts->sacks[i].end);
+ }
+ n_opts++;
+ }
+
+ return s;
+}
+
/* Format TCP header. */
u8 *
format_tcp_header (u8 * s, va_list * args)
{
tcp_header_t *tcp = va_arg (*args, tcp_header_t *);
u32 max_header_bytes = va_arg (*args, u32);
+ tcp_options_t opts = { .flags = 0 };
u32 header_bytes;
u32 indent;
@@ -83,32 +139,13 @@ format_tcp_header (u8 * s, va_list * args)
clib_net_to_host_u16 (tcp->window),
clib_net_to_host_u16 (tcp->checksum));
-
-#if 0
- /* Format TCP options. */
- {
- u8 *o;
- u8 *option_start = (void *) (tcp + 1);
- u8 *option_end = (void *) tcp + header_bytes;
-
- for (o = option_start; o < option_end;)
- {
- u32 length = o[1];
- switch (o[0])
- {
- case TCP_OPTION_END:
- length = 1;
- o = option_end;
- break;
-
- case TCP_OPTION_NOOP:
- length = 1;
- break;
-
- }
- }
- }
-#endif
+ if (header_bytes > max_header_bytes)
+ s = format (s, "\n%Uoptions: truncated", format_white_space, indent);
+ else if (tcp_options_parse (tcp, &opts, tcp_is_syn (tcp)) < 0)
+ s = format (s, "\n%Uoptions: parsing failed", format_white_space, indent);
+ else
+ s = format (s, "\n%U%U", format_white_space, indent, format_tcp_options,
+ &opts);
/* Recurse into next protocol layer. */
if (max_header_bytes != 0 && header_bytes < max_header_bytes)
diff --git a/src/vnet/tcp/tcp_inlines.h b/src/vnet/tcp/tcp_inlines.h
index 69f8ce7ff27..ccd0e3fe3ee 100644
--- a/src/vnet/tcp/tcp_inlines.h
+++ b/src/vnet/tcp/tcp_inlines.h
@@ -18,6 +18,35 @@
#include <vnet/tcp/tcp.h>
+always_inline void
+tcp_node_inc_counter_i (vlib_main_t *vm, u32 tcp4_node, u32 tcp6_node,
+ u8 is_ip4, u32 evt, u32 val)
+{
+ if (is_ip4)
+ vlib_node_increment_counter (vm, tcp4_node, evt, val);
+ else
+ vlib_node_increment_counter (vm, tcp6_node, evt, val);
+}
+
+#define tcp_inc_counter(node_id, err, count) \
+ tcp_node_inc_counter_i (vm, tcp4_##node_id##_node.index, \
+ tcp6_##node_id##_node.index, is_ip4, err, count)
+#define tcp_maybe_inc_err_counter(cnts, err) \
+ { \
+ cnts[err] += (next0 != tcp_next_drop (is_ip4)); \
+ }
+#define tcp_inc_err_counter(cnts, err, val) \
+ { \
+ cnts[err] += val; \
+ }
+#define tcp_store_err_counters(node_id, cnts) \
+ { \
+ int i; \
+ for (i = 0; i < TCP_N_ERROR; i++) \
+ if (cnts[i]) \
+ tcp_inc_counter (node_id, i, cnts[i]); \
+ }
+
always_inline tcp_header_t *
tcp_buffer_hdr (vlib_buffer_t * b)
{
@@ -66,7 +95,7 @@ tcp_listener_get (u32 tli)
always_inline tcp_connection_t *
tcp_half_open_connection_get (u32 conn_index)
{
- return tcp_connection_get (conn_index, 0);
+ return tcp_connection_get (conn_index, transport_cl_thread ());
}
/**
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index a6d135812e1..70b5d28e0cc 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -27,59 +27,17 @@ static vlib_error_desc_t tcp_input_error_counters[] = {
#undef tcp_error
};
-/* All TCP nodes have the same outgoing arcs */
-#define foreach_tcp_state_next \
- _ (DROP4, "ip4-drop") \
- _ (DROP6, "ip6-drop") \
- _ (TCP4_OUTPUT, "tcp4-output") \
- _ (TCP6_OUTPUT, "tcp6-output")
-
-typedef enum _tcp_established_next
-{
-#define _(s,n) TCP_ESTABLISHED_NEXT_##s,
- foreach_tcp_state_next
-#undef _
- TCP_ESTABLISHED_N_NEXT,
-} tcp_established_next_t;
-
-typedef enum _tcp_rcv_process_next
-{
-#define _(s,n) TCP_RCV_PROCESS_NEXT_##s,
- foreach_tcp_state_next
-#undef _
- TCP_RCV_PROCESS_N_NEXT,
-} tcp_rcv_process_next_t;
-
-typedef enum _tcp_syn_sent_next
-{
-#define _(s,n) TCP_SYN_SENT_NEXT_##s,
- foreach_tcp_state_next
-#undef _
- TCP_SYN_SENT_N_NEXT,
-} tcp_syn_sent_next_t;
-
-typedef enum _tcp_listen_next
-{
-#define _(s,n) TCP_LISTEN_NEXT_##s,
- foreach_tcp_state_next
-#undef _
- TCP_LISTEN_N_NEXT,
-} tcp_listen_next_t;
-
-/* Generic, state independent indices */
-typedef enum _tcp_state_next
+typedef enum _tcp_input_next
{
-#define _(s,n) TCP_NEXT_##s,
- foreach_tcp_state_next
-#undef _
- TCP_STATE_N_NEXT,
-} tcp_state_next_t;
-
-#define tcp_next_output(is_ip4) (is_ip4 ? TCP_NEXT_TCP4_OUTPUT \
- : TCP_NEXT_TCP6_OUTPUT)
-
-#define tcp_next_drop(is_ip4) (is_ip4 ? TCP_NEXT_DROP4 \
- : TCP_NEXT_DROP6)
+ TCP_INPUT_NEXT_DROP,
+ TCP_INPUT_NEXT_LISTEN,
+ TCP_INPUT_NEXT_RCV_PROCESS,
+ TCP_INPUT_NEXT_SYN_SENT,
+ TCP_INPUT_NEXT_ESTABLISHED,
+ TCP_INPUT_NEXT_RESET,
+ TCP_INPUT_NEXT_PUNT,
+ TCP_INPUT_N_NEXT
+} tcp_input_next_t;
/**
* Validate segment sequence number. As per RFC793:
@@ -404,17 +362,10 @@ tcp_rcv_ack_no_cc (tcp_connection_t * tc, vlib_buffer_t * b, u32 * error)
if (!(seq_leq (tc->snd_una, vnet_buffer (b)->tcp.ack_number)
&& seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)))
{
- if (seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)
- && seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_una))
- {
- tc->snd_nxt = vnet_buffer (b)->tcp.ack_number;
- goto acceptable;
- }
*error = TCP_ERROR_ACK_INVALID;
return -1;
}
-acceptable:
tc->bytes_acked = vnet_buffer (b)->tcp.ack_number - tc->snd_una;
tc->snd_una = vnet_buffer (b)->tcp.ack_number;
*error = TCP_ERROR_ACK_OK;
@@ -981,15 +932,6 @@ tcp_rcv_ack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, vlib_buffer_t * b,
/* If the ACK acks something not yet sent (SEG.ACK > SND.NXT) */
if (PREDICT_FALSE (seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)))
{
- /* We've probably entered recovery and the peer still has some
- * of the data we've sent. Update snd_nxt and accept the ack */
- if (seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)
- && seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_una))
- {
- tc->snd_nxt = vnet_buffer (b)->tcp.ack_number;
- goto process_ack;
- }
-
tc->errors.above_ack_wnd += 1;
*error = TCP_ERROR_ACK_FUTURE;
TCP_EVT (TCP_EVT_ACK_RCV_ERR, tc, 0, vnet_buffer (b)->tcp.ack_number);
@@ -1012,8 +954,6 @@ tcp_rcv_ack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, vlib_buffer_t * b,
return 0;
}
-process_ack:
-
/*
* Looks okay, process feedback
*/
@@ -1356,9 +1296,13 @@ format_tcp_rx_trace (u8 * s, va_list * args)
tcp_connection_t *tc = &t->tcp_connection;
u32 indent = format_get_indent (s);
- s = format (s, "%U state %U\n%U%U", format_tcp_connection_id, tc,
- format_tcp_state, tc->state, format_white_space, indent,
- format_tcp_header, &t->tcp_header, 128);
+ if (!tc->c_lcl_port)
+ s = format (s, "no tcp connection\n%U%U", format_white_space, indent,
+ format_tcp_header, &t->tcp_header, 128);
+ else
+ s = format (s, "%U state %U\n%U%U", format_tcp_connection_id, tc,
+ format_tcp_state, tc->state, format_white_space, indent,
+ format_tcp_header, &t->tcp_header, 128);
return s;
}
@@ -1428,53 +1372,14 @@ tcp_established_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
}
}
-always_inline void
-tcp_node_inc_counter_i (vlib_main_t * vm, u32 tcp4_node, u32 tcp6_node,
- u8 is_ip4, u32 evt, u32 val)
-{
- if (is_ip4)
- vlib_node_increment_counter (vm, tcp4_node, evt, val);
- else
- vlib_node_increment_counter (vm, tcp6_node, evt, val);
-}
-
-#define tcp_maybe_inc_counter(node_id, err, count) \
-{ \
- if (next0 != tcp_next_drop (is_ip4)) \
- tcp_node_inc_counter_i (vm, tcp4_##node_id##_node.index, \
- tcp6_##node_id##_node.index, is_ip4, err, \
- 1); \
-}
-#define tcp_inc_counter(node_id, err, count) \
- tcp_node_inc_counter_i (vm, tcp4_##node_id##_node.index, \
- tcp6_##node_id##_node.index, is_ip4, \
- err, count)
-#define tcp_maybe_inc_err_counter(cnts, err) \
-{ \
- cnts[err] += (next0 != tcp_next_drop (is_ip4)); \
-}
-#define tcp_inc_err_counter(cnts, err, val) \
-{ \
- cnts[err] += val; \
-}
-#define tcp_store_err_counters(node_id, cnts) \
-{ \
- int i; \
- for (i = 0; i < TCP_N_ERROR; i++) \
- if (cnts[i]) \
- tcp_inc_counter(node_id, i, cnts[i]); \
-}
-
-
always_inline uword
tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame, int is_ip4)
{
- u32 thread_index = vm->thread_index, errors = 0;
+ u32 thread_index = vm->thread_index, n_left_from, *from;
tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u16 err_counters[TCP_N_ERROR] = { 0 };
- u32 n_left_from, *from;
if (node->flags & VLIB_NODE_FLAG_TRACE)
tcp_established_trace_frame (vm, node, frame, is_ip4);
@@ -1538,9 +1443,7 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
b += 1;
}
- errors = session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP,
- thread_index);
- err_counters[TCP_ERROR_MSG_QUEUE_FULL] = errors;
+ session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP, thread_index);
tcp_store_err_counters (established, err_counters);
tcp_handle_postponed_dequeues (wrk);
tcp_handle_disconnects (wrk);
@@ -1563,43 +1466,23 @@ VLIB_NODE_FN (tcp6_established_node) (vlib_main_t * vm,
return tcp46_established_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (tcp4_established_node) =
-{
+VLIB_REGISTER_NODE (tcp4_established_node) = {
.name = "tcp4-established",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
.error_counters = tcp_input_error_counters,
- .n_next_nodes = TCP_ESTABLISHED_N_NEXT,
- .next_nodes =
- {
-#define _(s,n) [TCP_ESTABLISHED_NEXT_##s] = n,
- foreach_tcp_state_next
-#undef _
- },
.format_trace = format_tcp_rx_trace_short,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (tcp6_established_node) =
-{
+VLIB_REGISTER_NODE (tcp6_established_node) = {
.name = "tcp6-established",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
.error_counters = tcp_input_error_counters,
- .n_next_nodes = TCP_ESTABLISHED_N_NEXT,
- .next_nodes =
- {
-#define _(s,n) [TCP_ESTABLISHED_NEXT_##s] = n,
- foreach_tcp_state_next
-#undef _
- },
.format_trace = format_tcp_rx_trace_short,
};
-/* *INDENT-ON* */
static u8
@@ -1795,11 +1678,50 @@ tcp_check_tx_offload (tcp_connection_t * tc, int is_ipv4)
tc->cfg_flags |= TCP_CFG_F_TSO;
}
+static void
+tcp_input_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_buffer_t **bs, u16 *nexts, u32 n_bufs, u8 is_ip4)
+{
+ tcp_connection_t *tc;
+ tcp_header_t *tcp;
+ tcp_rx_trace_t *t;
+ u8 flags;
+ int i;
+
+ for (i = 0; i < n_bufs; i++)
+ {
+ if (!(bs[i]->flags & VLIB_BUFFER_IS_TRACED))
+ continue;
+
+ t = vlib_add_trace (vm, node, bs[i], sizeof (*t));
+ if (nexts[i] == TCP_INPUT_NEXT_DROP || nexts[i] == TCP_INPUT_NEXT_PUNT ||
+ nexts[i] == TCP_INPUT_NEXT_RESET)
+ {
+ tc = 0;
+ }
+ else
+ {
+ flags = vnet_buffer (bs[i])->tcp.flags;
+
+ if (flags == TCP_STATE_LISTEN)
+ tc = tcp_listener_get (vnet_buffer (bs[i])->tcp.connection_index);
+ else if (flags == TCP_STATE_SYN_SENT)
+ tc = tcp_half_open_connection_get (
+ vnet_buffer (bs[i])->tcp.connection_index);
+ else
+ tc = tcp_connection_get (vnet_buffer (bs[i])->tcp.connection_index,
+ vm->thread_index);
+ }
+ tcp = tcp_buffer_hdr (bs[i]);
+ tcp_set_rx_trace_data (t, tc, tcp, bs[i], is_ip4);
+ }
+}
+
always_inline uword
tcp46_syn_sent_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_frame_t *frame, int is_ip4)
{
- u32 n_left_from, *from, thread_index = vm->thread_index, errors = 0;
+ u32 n_left_from, *from, thread_index = vm->thread_index;
tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
@@ -1965,7 +1887,9 @@ tcp46_syn_sent_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
SESSION_E_NONE))
{
tcp_send_reset_w_pkt (new_tc, b[0], thread_index, is_ip4);
- tcp_connection_cleanup (new_tc);
+ tcp_program_cleanup (wrk, new_tc);
+ new_tc->state = TCP_STATE_CLOSED;
+ new_tc->c_s_index = ~0;
error = TCP_ERROR_CREATE_SESSION_FAIL;
goto cleanup_ho;
}
@@ -1986,8 +1910,10 @@ tcp46_syn_sent_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (session_stream_connect_notify (&new_tc->connection,
SESSION_E_NONE))
{
- tcp_connection_cleanup (new_tc);
tcp_send_reset_w_pkt (tc, b[0], thread_index, is_ip4);
+ tcp_program_cleanup (wrk, new_tc);
+ new_tc->state = TCP_STATE_CLOSED;
+ new_tc->c_s_index = ~0;
TCP_EVT (TCP_EVT_RST_SENT, tc);
error = TCP_ERROR_CREATE_SESSION_FAIL;
goto cleanup_ho;
@@ -2034,9 +1960,7 @@ tcp46_syn_sent_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
tcp_inc_counter (syn_sent, error, 1);
}
- errors =
- session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP, thread_index);
- tcp_inc_counter (syn_sent, TCP_ERROR_MSG_QUEUE_FULL, errors);
+ session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP, thread_index);
vlib_buffer_free (vm, from, frame->n_vectors);
tcp_handle_disconnects (wrk);
@@ -2057,7 +1981,6 @@ VLIB_NODE_FN (tcp6_syn_sent_node) (vlib_main_t * vm,
return tcp46_syn_sent_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp4_syn_sent_node) =
{
.name = "tcp4-syn-sent",
@@ -2065,18 +1988,9 @@ VLIB_REGISTER_NODE (tcp4_syn_sent_node) =
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
.error_counters = tcp_input_error_counters,
- .n_next_nodes = TCP_SYN_SENT_N_NEXT,
- .next_nodes =
- {
-#define _(s,n) [TCP_SYN_SENT_NEXT_##s] = n,
- foreach_tcp_state_next
-#undef _
- },
.format_trace = format_tcp_rx_trace_short,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp6_syn_sent_node) =
{
.name = "tcp6-syn-sent",
@@ -2084,16 +1998,8 @@ VLIB_REGISTER_NODE (tcp6_syn_sent_node) =
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
.error_counters = tcp_input_error_counters,
- .n_next_nodes = TCP_SYN_SENT_N_NEXT,
- .next_nodes =
- {
-#define _(s,n) [TCP_SYN_SENT_NEXT_##s] = n,
- foreach_tcp_state_next
-#undef _
- },
.format_trace = format_tcp_rx_trace_short,
};
-/* *INDENT-ON* */
static void
tcp46_rcv_process_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
@@ -2125,7 +2031,7 @@ always_inline uword
tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_frame_t *frame, int is_ip4)
{
- u32 thread_index = vm->thread_index, errors, n_left_from, *from, max_deq;
+ u32 thread_index = vm->thread_index, n_left_from, *from, max_deq;
tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
@@ -2193,15 +2099,6 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
switch (tc->state)
{
case TCP_STATE_SYN_RCVD:
-
- /* Make sure the segment is exactly right */
- if (tc->rcv_nxt != vnet_buffer (b[0])->tcp.seq_number || is_fin)
- {
- tcp_send_reset_w_pkt (tc, b[0], thread_index, is_ip4);
- error = TCP_ERROR_SEGMENT_INVALID;
- goto drop;
- }
-
/*
* If the segment acknowledgment is not acceptable, form a
* reset segment,
@@ -2215,6 +2112,10 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
goto drop;
}
+ /* Avoid notifying app if connection is about to be closed */
+ if (PREDICT_FALSE (is_fin))
+ break;
+
/* Update rtt and rto */
tcp_estimate_initial_rtt (tc);
tcp_connection_tx_pacer_update (tc);
@@ -2243,7 +2144,7 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
tcp_connection_cleanup (tc);
goto drop;
}
- error = TCP_ERROR_ACK_OK;
+ error = TCP_ERROR_CONN_ACCEPTED;
break;
case TCP_STATE_ESTABLISHED:
/* We can get packets in established state here because they
@@ -2322,8 +2223,8 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (max_deq > tc->burst_acked)
break;
- tcp_send_fin (tc);
tcp_connection_timers_reset (tc);
+ tcp_send_fin (tc);
tcp_connection_set_state (tc, TCP_STATE_LAST_ACK);
tcp_timer_set (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
tcp_cfg.lastack_time);
@@ -2435,15 +2336,15 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
tcp_cfg.closewait_time);
break;
case TCP_STATE_SYN_RCVD:
- /* Send FIN-ACK, enter LAST-ACK and because the app was not
- * notified yet, set a cleanup timer instead of relying on
- * disconnect notify and the implicit close call. */
+ /* Send FIN-ACK and enter TIME-WAIT, as opposed to LAST-ACK,
+ * because the app was not notified yet and we want to avoid
+ * session state transitions to ensure cleanup does not
+ * propagate to app. */
tcp_connection_timers_reset (tc);
tc->rcv_nxt += 1;
tcp_send_fin (tc);
- tcp_connection_set_state (tc, TCP_STATE_LAST_ACK);
- tcp_timer_set (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
- tcp_cfg.lastack_time);
+ tcp_connection_set_state (tc, TCP_STATE_TIME_WAIT);
+ tcp_program_cleanup (wrk, tc);
break;
case TCP_STATE_CLOSE_WAIT:
case TCP_STATE_CLOSING:
@@ -2498,9 +2399,7 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
tcp_inc_counter (rcv_process, error, 1);
}
- errors = session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP,
- thread_index);
- tcp_inc_counter (rcv_process, TCP_ERROR_MSG_QUEUE_FULL, errors);
+ session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP, thread_index);
tcp_handle_postponed_dequeues (wrk);
tcp_handle_disconnects (wrk);
vlib_buffer_free (vm, from, frame->n_vectors);
@@ -2522,43 +2421,23 @@ VLIB_NODE_FN (tcp6_rcv_process_node) (vlib_main_t * vm,
return tcp46_rcv_process_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (tcp4_rcv_process_node) =
-{
+VLIB_REGISTER_NODE (tcp4_rcv_process_node) = {
.name = "tcp4-rcv-process",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
.error_counters = tcp_input_error_counters,
- .n_next_nodes = TCP_RCV_PROCESS_N_NEXT,
- .next_nodes =
- {
-#define _(s,n) [TCP_RCV_PROCESS_NEXT_##s] = n,
- foreach_tcp_state_next
-#undef _
- },
.format_trace = format_tcp_rx_trace_short,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (tcp6_rcv_process_node) =
-{
+VLIB_REGISTER_NODE (tcp6_rcv_process_node) = {
.name = "tcp6-rcv-process",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
.error_counters = tcp_input_error_counters,
- .n_next_nodes = TCP_RCV_PROCESS_N_NEXT,
- .next_nodes =
- {
-#define _(s,n) [TCP_RCV_PROCESS_NEXT_##s] = n,
- foreach_tcp_state_next
-#undef _
- },
.format_trace = format_tcp_rx_trace_short,
};
-/* *INDENT-ON* */
static void
tcp46_listen_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
@@ -2666,7 +2545,8 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
lc = tcp_listener_get (vnet_buffer (b[0])->tcp.connection_index);
}
- else /* We are in TimeWait state*/
+ /* Probably we are in time-wait or closed state */
+ else
{
tcp_connection_t *tc;
tc = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
@@ -2780,98 +2660,82 @@ VLIB_NODE_FN (tcp6_listen_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return tcp46_listen_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (tcp4_listen_node) =
-{
+VLIB_REGISTER_NODE (tcp4_listen_node) = {
.name = "tcp4-listen",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
.error_counters = tcp_input_error_counters,
- .n_next_nodes = TCP_LISTEN_N_NEXT,
- .next_nodes =
- {
-#define _(s,n) [TCP_LISTEN_NEXT_##s] = n,
- foreach_tcp_state_next
-#undef _
- },
.format_trace = format_tcp_rx_trace_short,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (tcp6_listen_node) =
-{
+VLIB_REGISTER_NODE (tcp6_listen_node) = {
.name = "tcp6-listen",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
.error_counters = tcp_input_error_counters,
- .n_next_nodes = TCP_LISTEN_N_NEXT,
- .next_nodes =
- {
-#define _(s,n) [TCP_LISTEN_NEXT_##s] = n,
- foreach_tcp_state_next
-#undef _
- },
.format_trace = format_tcp_rx_trace_short,
};
-/* *INDENT-ON* */
-typedef enum _tcp_input_next
+always_inline uword
+tcp46_drop_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, int is_ip4)
{
- TCP_INPUT_NEXT_DROP,
- TCP_INPUT_NEXT_LISTEN,
- TCP_INPUT_NEXT_RCV_PROCESS,
- TCP_INPUT_NEXT_SYN_SENT,
- TCP_INPUT_NEXT_ESTABLISHED,
- TCP_INPUT_NEXT_RESET,
- TCP_INPUT_NEXT_PUNT,
- TCP_INPUT_N_NEXT
-} tcp_input_next_t;
+ u32 *from = vlib_frame_vector_args (frame);
-#define foreach_tcp4_input_next \
- _ (DROP, "ip4-drop") \
- _ (LISTEN, "tcp4-listen") \
- _ (RCV_PROCESS, "tcp4-rcv-process") \
- _ (SYN_SENT, "tcp4-syn-sent") \
- _ (ESTABLISHED, "tcp4-established") \
- _ (RESET, "tcp4-reset") \
- _ (PUNT, "ip4-punt")
-
-#define foreach_tcp6_input_next \
- _ (DROP, "ip6-drop") \
- _ (LISTEN, "tcp6-listen") \
- _ (RCV_PROCESS, "tcp6-rcv-process") \
- _ (SYN_SENT, "tcp6-syn-sent") \
- _ (ESTABLISHED, "tcp6-established") \
- _ (RESET, "tcp6-reset") \
- _ (PUNT, "ip6-punt")
+ /* Error counters must be incremented by previous nodes */
+ vlib_buffer_free (vm, from, frame->n_vectors);
-#define filter_flags (TCP_FLAG_SYN|TCP_FLAG_ACK|TCP_FLAG_RST|TCP_FLAG_FIN)
+ return frame->n_vectors;
+}
-static void
-tcp_input_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_buffer_t ** bs, u32 n_bufs, u8 is_ip4)
+VLIB_NODE_FN (tcp4_drop_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
{
- tcp_connection_t *tc;
- tcp_header_t *tcp;
- tcp_rx_trace_t *t;
- int i;
+ return tcp46_drop_inline (vm, node, from_frame, 1 /* is_ip4 */);
+}
- for (i = 0; i < n_bufs; i++)
- {
- if (bs[i]->flags & VLIB_BUFFER_IS_TRACED)
- {
- t = vlib_add_trace (vm, node, bs[i], sizeof (*t));
- tc = tcp_connection_get (vnet_buffer (bs[i])->tcp.connection_index,
- vm->thread_index);
- tcp = vlib_buffer_get_current (bs[i]);
- tcp_set_rx_trace_data (t, tc, tcp, bs[i], is_ip4);
- }
- }
+VLIB_NODE_FN (tcp6_drop_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ return tcp46_drop_inline (vm, node, from_frame, 0 /* is_ip4 */);
}
+VLIB_REGISTER_NODE (tcp4_drop_node) = {
+ .name = "tcp4-drop",
+ .vector_size = sizeof (u32),
+ .n_errors = TCP_N_ERROR,
+ .error_counters = tcp_input_error_counters,
+};
+
+VLIB_REGISTER_NODE (tcp6_drop_node) = {
+ .name = "tcp6-drop",
+ .vector_size = sizeof (u32),
+ .n_errors = TCP_N_ERROR,
+ .error_counters = tcp_input_error_counters,
+};
+
+#define foreach_tcp4_input_next \
+ _ (DROP, "tcp4-drop") \
+ _ (LISTEN, "tcp4-listen") \
+ _ (RCV_PROCESS, "tcp4-rcv-process") \
+ _ (SYN_SENT, "tcp4-syn-sent") \
+ _ (ESTABLISHED, "tcp4-established") \
+ _ (RESET, "tcp4-reset") \
+ _ (PUNT, "ip4-punt")
+
+#define foreach_tcp6_input_next \
+ _ (DROP, "tcp6-drop") \
+ _ (LISTEN, "tcp6-listen") \
+ _ (RCV_PROCESS, "tcp6-rcv-process") \
+ _ (SYN_SENT, "tcp6-syn-sent") \
+ _ (ESTABLISHED, "tcp6-established") \
+ _ (RESET, "tcp6-reset") \
+ _ (PUNT, "ip6-punt")
+
+#define filter_flags (TCP_FLAG_SYN|TCP_FLAG_ACK|TCP_FLAG_RST|TCP_FLAG_FIN)
+
static void
tcp_input_set_error_next (tcp_main_t * tm, u16 * next, u32 * error, u8 is_ip4)
{
@@ -2892,9 +2756,8 @@ tcp_input_set_error_next (tcp_main_t * tm, u16 * next, u32 * error, u8 is_ip4)
}
static inline void
-tcp_input_dispatch_buffer (tcp_main_t * tm, tcp_connection_t * tc,
- vlib_buffer_t * b, u16 * next,
- vlib_node_runtime_t * error_node)
+tcp_input_dispatch_buffer (tcp_main_t *tm, tcp_connection_t *tc,
+ vlib_buffer_t *b, u16 *next, u16 *err_counters)
{
tcp_header_t *tcp;
u32 error;
@@ -2916,7 +2779,7 @@ tcp_input_dispatch_buffer (tcp_main_t * tm, tcp_connection_t * tc,
if (PREDICT_FALSE (error != TCP_ERROR_NONE))
{
- b->error = error_node->errors[error];
+ tcp_inc_err_counter (err_counters, error, 1);
if (error == TCP_ERROR_DISPATCH)
clib_warning ("tcp conn %u disp error state %U flags %U",
tc->c_c_index, format_tcp_state, tc->state,
@@ -2932,6 +2795,7 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
tcp_main_t *tm = vnet_get_tcp_main ();
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u16 nexts[VLIB_FRAME_SIZE], *next;
+ u16 err_counters[TCP_N_ERROR] = { 0 };
tcp_update_time_now (tcp_get_worker (thread_index));
@@ -2970,8 +2834,8 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vnet_buffer (b[0])->tcp.connection_index = tc0->c_c_index;
vnet_buffer (b[1])->tcp.connection_index = tc1->c_c_index;
- tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], node);
- tcp_input_dispatch_buffer (tm, tc1, b[1], &next[1], node);
+ tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], err_counters);
+ tcp_input_dispatch_buffer (tm, tc1, b[1], &next[1], err_counters);
}
else
{
@@ -2979,24 +2843,26 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
{
ASSERT (tcp_lookup_is_valid (tc0, b[0], tcp_buffer_hdr (b[0])));
vnet_buffer (b[0])->tcp.connection_index = tc0->c_c_index;
- tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], node);
+ tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0],
+ err_counters);
}
else
{
tcp_input_set_error_next (tm, &next[0], &error0, is_ip4);
- b[0]->error = node->errors[error0];
+ tcp_inc_err_counter (err_counters, error0, 1);
}
if (PREDICT_TRUE (tc1 != 0))
{
ASSERT (tcp_lookup_is_valid (tc1, b[1], tcp_buffer_hdr (b[1])));
vnet_buffer (b[1])->tcp.connection_index = tc1->c_c_index;
- tcp_input_dispatch_buffer (tm, tc1, b[1], &next[1], node);
+ tcp_input_dispatch_buffer (tm, tc1, b[1], &next[1],
+ err_counters);
}
else
{
tcp_input_set_error_next (tm, &next[1], &error1, is_ip4);
- b[1]->error = node->errors[error1];
+ tcp_inc_err_counter (err_counters, error1, 1);
}
}
@@ -3022,12 +2888,12 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
{
ASSERT (tcp_lookup_is_valid (tc0, b[0], tcp_buffer_hdr (b[0])));
vnet_buffer (b[0])->tcp.connection_index = tc0->c_c_index;
- tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], node);
+ tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], err_counters);
}
else
{
tcp_input_set_error_next (tm, &next[0], &error0, is_ip4);
- b[0]->error = node->errors[error0];
+ tcp_inc_err_counter (err_counters, error0, 1);
}
b += 1;
@@ -3036,8 +2902,9 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
- tcp_input_trace_frame (vm, node, bufs, frame->n_vectors, is_ip4);
+ tcp_input_trace_frame (vm, node, bufs, nexts, frame->n_vectors, is_ip4);
+ tcp_store_err_counters (input, err_counters);
vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
return frame->n_vectors;
}
@@ -3058,7 +2925,6 @@ VLIB_NODE_FN (tcp6_input_nolookup_node) (vlib_main_t * vm,
1 /* is_nolookup */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp4_input_nolookup_node) =
{
.name = "tcp4-input-nolookup",
@@ -3076,9 +2942,7 @@ VLIB_REGISTER_NODE (tcp4_input_nolookup_node) =
.format_buffer = format_tcp_header,
.format_trace = format_tcp_rx_trace,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp6_input_nolookup_node) =
{
.name = "tcp6-input-nolookup",
@@ -3096,7 +2960,6 @@ VLIB_REGISTER_NODE (tcp6_input_nolookup_node) =
.format_buffer = format_tcp_header,
.format_trace = format_tcp_rx_trace,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (tcp4_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * from_frame)
@@ -3112,7 +2975,6 @@ VLIB_NODE_FN (tcp6_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
0 /* is_nolookup */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp4_input_node) =
{
.name = "tcp4-input",
@@ -3130,9 +2992,7 @@ VLIB_REGISTER_NODE (tcp4_input_node) =
.format_buffer = format_tcp_header,
.format_trace = format_tcp_rx_trace,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp6_input_node) =
{
.name = "tcp6-input",
@@ -3150,7 +3010,6 @@ VLIB_REGISTER_NODE (tcp6_input_node) =
.format_buffer = format_tcp_header,
.format_trace = format_tcp_rx_trace,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
void
@@ -3336,6 +3195,8 @@ do { \
_(FIN_WAIT_2, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
TCP_ERROR_NONE);
_(FIN_WAIT_2, TCP_FLAG_SYN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+ _ (FIN_WAIT_2, TCP_FLAG_SYN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
+ TCP_ERROR_NONE);
_(CLOSE_WAIT, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
_(CLOSE_WAIT, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
TCP_ERROR_NONE);
@@ -3385,7 +3246,7 @@ do { \
_(CLOSED, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_DROP,
TCP_ERROR_CONNECTION_CLOSED);
_(CLOSED, TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET, TCP_ERROR_CONNECTION_CLOSED);
- _(CLOSED, TCP_FLAG_SYN, TCP_INPUT_NEXT_RESET, TCP_ERROR_CONNECTION_CLOSED);
+ _ (CLOSED, TCP_FLAG_SYN, TCP_INPUT_NEXT_LISTEN, TCP_ERROR_NONE);
_(CLOSED, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET,
TCP_ERROR_CONNECTION_CLOSED);
#undef _
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index f5035006822..78148cd5695 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -420,7 +420,7 @@ static inline void
tcp_make_ack_i (tcp_connection_t * tc, vlib_buffer_t * b, tcp_state_t state,
u8 flags)
{
- tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
+ tcp_options_t _snd_opts = {}, *snd_opts = &_snd_opts;
u8 tcp_opts_len, tcp_hdr_opts_len;
tcp_header_t *th;
u16 wnd;
@@ -656,8 +656,8 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt,
u8 tcp_hdr_len, flags = 0;
tcp_header_t *th, *pkt_th;
u32 seq, ack, bi;
- ip4_header_t *ih4, *pkt_ih4;
- ip6_header_t *ih6, *pkt_ih6;
+ ip4_header_t *pkt_ih4;
+ ip6_header_t *pkt_ih6;
if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
{
@@ -667,6 +667,7 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt,
b = vlib_get_buffer (vm, bi);
tcp_init_buffer (vm, b);
+ vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
/* Make and write options */
tcp_hdr_len = sizeof (tcp_header_t);
@@ -698,28 +699,7 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt,
th = vlib_buffer_push_tcp_net_order (b, pkt_th->dst_port, pkt_th->src_port,
seq, ack, tcp_hdr_len, flags, 0);
-
- /* Swap src and dst ip */
- if (is_ip4)
- {
- ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40);
- ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address,
- &pkt_ih4->src_address, IP_PROTOCOL_TCP,
- tcp_csum_offload (tc));
- th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
- }
- else
- {
- int bogus = ~0;
- ASSERT ((pkt_ih6->ip_version_traffic_class_and_flow_label & 0xF0) ==
- 0x60);
- ih6 = vlib_buffer_push_ip6_custom (vm, b, &pkt_ih6->dst_address,
- &pkt_ih6->src_address,
- IP_PROTOCOL_TCP,
- tc->ipv6_flow_label);
- th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
- ASSERT (!bogus);
- }
+ th->checksum = tcp_compute_checksum (tc, b);
tcp_enqueue_half_open (wrk, tc, b, bi);
TCP_EVT (TCP_EVT_RST_SENT, tc);
@@ -858,10 +838,9 @@ tcp_send_fin (tcp_connection_t * tc)
/* Out of buffers so program fin retransmit ASAP */
tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT,
tcp_cfg.alloc_err_timeout);
- if (fin_snt)
- tc->snd_nxt += 1;
- else
- /* Make sure retransmit retries a fin not data */
+ tc->snd_nxt += 1;
+ /* Make sure retransmit retries a fin not data with right snd_nxt */
+ if (!fin_snt)
tc->flags |= TCP_CONN_FINSNT;
tcp_worker_stats_inc (wrk, no_buffer, 1);
return;
@@ -1137,7 +1116,7 @@ tcp_prepare_segment (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
data = tcp_init_buffer (vm, *b);
n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
max_deq_bytes);
- ASSERT (n_bytes == max_deq_bytes);
+ ASSERT (n_bytes > 0);
b[0]->current_length = n_bytes;
tcp_push_hdr_i (tc, *b, tc->snd_una + offset, /* compute opts */ 0,
/* burst */ 0, /* update_snd_nxt */ 0);
@@ -1299,6 +1278,7 @@ tcp_cc_init_rxt_timeout (tcp_connection_t * tc)
tc->cwnd_acc_bytes = 0;
tc->tr_occurences += 1;
tc->sack_sb.reorder = TCP_DUPACK_THRESHOLD;
+ tc->sack_sb.rescue_rxt = tc->snd_una - 1;
tcp_recovery_on (tc);
}
@@ -1749,7 +1729,7 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
&& tc->rxt_head != tc->snd_una
&& tcp_retransmit_should_retry_head (tc, sb))
{
- max_bytes = clib_min (tc->snd_mss, tc->snd_congestion - tc->snd_una);
+ max_bytes = clib_min (tc->snd_mss, tc->snd_nxt - tc->snd_una);
n_written = tcp_prepare_retransmit_segment (wrk, tc, 0, max_bytes, &b);
if (!n_written)
{
@@ -1781,7 +1761,7 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
if (!hole)
{
/* We are out of lost holes to retransmit so send some new data. */
- if (max_deq > tc->snd_mss)
+ if (max_deq)
{
u32 n_segs_new;
int av_wnd;
@@ -1791,7 +1771,10 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
av_wnd = (int) tc->snd_wnd - (tc->snd_nxt - tc->snd_una);
av_wnd = clib_max (av_wnd - tc->snd_mss, 0);
snd_space = clib_min (snd_space, av_wnd);
- snd_space = clib_min (max_deq, snd_space);
+ /* Low bound max_deq to mss to be able to send a segment even
+ * when it is less than mss */
+ snd_space =
+ clib_min (clib_max (max_deq, tc->snd_mss), snd_space);
burst_size = clib_min (burst_size - n_segs,
snd_space / tc->snd_mss);
burst_size = clib_min (burst_size, TCP_RXT_MAX_BURST);
@@ -1803,8 +1786,7 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
goto done;
}
- if (tcp_in_recovery (tc) || !can_rescue
- || scoreboard_rescue_rxt_valid (sb, tc))
+ if (!can_rescue || scoreboard_rescue_rxt_valid (sb, tc))
break;
/* If rescue rxt undefined or less than snd_una then one segment of
@@ -1828,7 +1810,11 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
break;
}
- max_bytes = clib_min (hole->end - sb->high_rxt, snd_space);
+ max_bytes = hole->end - sb->high_rxt;
+ /* Avoid retransmitting segment less than mss if possible */
+ if (snd_space < tc->snd_mss && max_bytes > snd_space)
+ break;
+ max_bytes = clib_min (max_bytes, snd_space);
max_bytes = snd_limited ? clib_min (max_bytes, tc->snd_mss) : max_bytes;
if (max_bytes == 0)
break;
@@ -2191,6 +2177,7 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
u32 n_left_from, *from, thread_index = vm->thread_index;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u16 nexts[VLIB_FRAME_SIZE], *next;
+ u16 err_counters[TCP_N_ERROR] = { 0 };
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -2241,7 +2228,8 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
else
{
- b[0]->error = node->errors[TCP_ERROR_INVALID_CONNECTION];
+ tcp_inc_err_counter (err_counters, TCP_ERROR_INVALID_CONNECTION,
+ 1);
next[0] = TCP_OUTPUT_NEXT_DROP;
}
if (tc1 != 0)
@@ -2252,7 +2240,8 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
else
{
- b[1]->error = node->errors[TCP_ERROR_INVALID_CONNECTION];
+ tcp_inc_err_counter (err_counters, TCP_ERROR_INVALID_CONNECTION,
+ 1);
next[1] = TCP_OUTPUT_NEXT_DROP;
}
}
@@ -2282,7 +2271,7 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
else
{
- b[0]->error = node->errors[TCP_ERROR_INVALID_CONNECTION];
+ tcp_inc_err_counter (err_counters, TCP_ERROR_INVALID_CONNECTION, 1);
next[0] = TCP_OUTPUT_NEXT_DROP;
}
@@ -2291,6 +2280,7 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
n_left_from -= 1;
}
+ tcp_store_err_counters (output, err_counters);
vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
vlib_node_increment_counter (vm, tcp_node_index (output, is_ip4),
TCP_ERROR_PKTS_SENT, frame->n_vectors);
@@ -2309,7 +2299,6 @@ VLIB_NODE_FN (tcp6_output_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return tcp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp4_output_node) =
{
.name = "tcp4-output",
@@ -2327,9 +2316,7 @@ VLIB_REGISTER_NODE (tcp4_output_node) =
.format_buffer = format_tcp_header,
.format_trace = format_tcp_tx_trace,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp6_output_node) =
{
.name = "tcp6-output",
@@ -2347,7 +2334,6 @@ VLIB_REGISTER_NODE (tcp6_output_node) =
.format_buffer = format_tcp_header,
.format_trace = format_tcp_tx_trace,
};
-/* *INDENT-ON* */
typedef enum _tcp_reset_next
{
@@ -2458,7 +2444,6 @@ VLIB_NODE_FN (tcp6_reset_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return tcp46_reset_inline (vm, node, from_frame, 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp4_reset_node) = {
.name = "tcp4-reset",
.vector_size = sizeof (u32),
@@ -2472,9 +2457,7 @@ VLIB_REGISTER_NODE (tcp4_reset_node) = {
},
.format_trace = format_tcp_tx_trace,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp6_reset_node) = {
.name = "tcp6-reset",
.vector_size = sizeof (u32),
@@ -2488,7 +2471,6 @@ VLIB_REGISTER_NODE (tcp6_reset_node) = {
},
.format_trace = format_tcp_tx_trace,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/tcp/tcp_pg.c b/src/vnet/tcp/tcp_pg.c
index 07bdb113fd0..9b98e3d8ee4 100644
--- a/src/vnet/tcp/tcp_pg.c
+++ b/src/vnet/tcp/tcp_pg.c
@@ -51,6 +51,13 @@
_ (ECE) \
_ (CWR)
+#define foreach_tcp_options \
+ _ (mss, TCP_OPTION_MSS, TCP_OPTION_LEN_MSS, 1) \
+ _ (timestamp, TCP_OPTION_TIMESTAMP, TCP_OPTION_LEN_TIMESTAMP, 2) \
+ _ (winscale, TCP_OPTION_WINDOW_SCALE, TCP_OPTION_LEN_WINDOW_SCALE, 1) \
+ _ (sackperm, TCP_OPTION_SACK_PERMITTED, TCP_OPTION_LEN_SACK_PERMITTED, 0) \
+ _ (sack, TCP_OPTION_SACK_BLOCK, TCP_OPTION_LEN_SACK_BLOCK, 0)
+
static void
tcp_pg_edit_function (pg_main_t * pg,
pg_stream_t * s,
@@ -150,82 +157,192 @@ uword
unformat_pg_tcp_header (unformat_input_t * input, va_list * args)
{
pg_stream_t *s = va_arg (*args, pg_stream_t *);
- pg_tcp_header_t *p;
- u32 group_index;
+ pg_tcp_header_t *pth;
+ u32 header_group_index, opt_group_index = ~0, noop_len, opts_len = 0;
- p = pg_create_edit_group (s, sizeof (p[0]), sizeof (tcp_header_t),
- &group_index);
- pg_tcp_header_init (p);
+ pth = pg_create_edit_group (s, sizeof (pth[0]), sizeof (tcp_header_t),
+ &header_group_index);
+ pg_tcp_header_init (pth);
/* Defaults. */
- pg_edit_set_fixed (&p->seq_number, 0);
- pg_edit_set_fixed (&p->ack_number, 0);
-
- pg_edit_set_fixed (&p->data_offset_and_reserved,
- sizeof (tcp_header_t) / sizeof (u32));
+ pg_edit_set_fixed (&pth->seq_number, 0);
+ pg_edit_set_fixed (&pth->ack_number, 0);
- pg_edit_set_fixed (&p->window, 4096);
- pg_edit_set_fixed (&p->urgent_pointer, 0);
+ pg_edit_set_fixed (&pth->window, 4096);
+ pg_edit_set_fixed (&pth->urgent_pointer, 0);
-#define _(f) pg_edit_set_fixed (&p->f##_flag, 0);
+#define _(f) pg_edit_set_fixed (&pth->f##_flag, 0);
foreach_tcp_flag
#undef _
- p->checksum.type = PG_EDIT_UNSPECIFIED;
+ pth->checksum.type = PG_EDIT_UNSPECIFIED;
- if (!unformat (input, "TCP: %U -> %U",
- unformat_pg_edit,
- unformat_tcp_udp_port, &p->src,
- unformat_pg_edit, unformat_tcp_udp_port, &p->dst))
+ if (!unformat (input, "TCP: %U -> %U", unformat_pg_edit,
+ unformat_tcp_udp_port, &pth->src, unformat_pg_edit,
+ unformat_tcp_udp_port, &pth->dst))
goto error;
/* Parse options. */
while (1)
{
- if (unformat (input, "window %U",
- unformat_pg_edit, unformat_pg_number, &p->window))
+ if (unformat (input, "window %U", unformat_pg_edit, unformat_pg_number,
+ &pth->window))
;
- else if (unformat (input, "checksum %U",
- unformat_pg_edit, unformat_pg_number, &p->checksum))
+ else if (unformat (input, "checksum %U", unformat_pg_edit,
+ unformat_pg_number, &pth->checksum))
;
else if (unformat (input, "seqnum %U", unformat_pg_edit,
- unformat_pg_number, &p->seq_number))
+ unformat_pg_number, &pth->seq_number))
;
else if (unformat (input, "acknum %U", unformat_pg_edit,
- unformat_pg_number, &p->ack_number))
+ unformat_pg_number, &pth->ack_number))
;
/* Flags. */
-#define _(f) else if (unformat (input, #f)) pg_edit_set_fixed (&p->f##_flag, 1);
+#define _(f) \
+ else if (unformat (input, #f)) pg_edit_set_fixed (&pth->f##_flag, 1);
foreach_tcp_flag
#undef _
- /* Can't parse input: try next protocol level. */
+ /* Can't parse input: try TCP options and next protocol level. */
+ else break;
+ }
+
+ while (unformat (input, "opt"))
+ {
+ int i;
+ pg_edit_t *opt_header, *opt_values;
+ u8 type, opt_len, n_values;
+
+ /* first allocate a new edit group for options */
+ if (opt_group_index == ~0)
+ (void) pg_create_edit_group (s, 0, 0, &opt_group_index);
+
+ if (false)
+ {
+ }
+#define _(n, t, l, k) \
+ else if (unformat (input, #n)) \
+ { \
+ type = (t); \
+ opt_len = (l); \
+ n_values = (k); \
+ }
+ foreach_tcp_options
+#undef _
else
+ {
+ /* unknown TCP option */
break;
+ }
+
+#define pg_tcp_option_init(e, o, b) \
+ do \
+ { \
+ *(o) += (b); \
+ (e)->lsb_bit_offset = *(o) > 0 ? (*(o) -1) * BITS (u8) : 0; \
+ (e)->n_bits = (b) *BITS (u8); \
+ } \
+ while (0);
+
+ /* if we don't know how many values to read, just ask */
+ if (n_values == 0 &&
+ unformat (input, "nvalues %D", sizeof (n_values), &n_values))
+ {
+ switch (type)
+ {
+ case TCP_OPTION_SACK_BLOCK:
+ /* each sack block is composed of 2 32-bits values */
+ n_values *= 2;
+ /*
+ opt_len contains the length of a single sack block,
+ it needs to be updated to contains the final number of bytes
+ for the sack options
+ */
+ opt_len = 2 + 2 * opt_len;
+ break;
+ default:
+ /* unknown variable options */
+ continue;
+ }
+ }
+
+ opt_header = pg_add_edits (s, sizeof (pg_edit_t) * (2 + n_values),
+ opt_len, opt_group_index);
+ pg_tcp_option_init (opt_header, &opts_len, 1);
+ pg_tcp_option_init (opt_header + 1, &opts_len, 1);
+ pg_edit_set_fixed (opt_header, type);
+ pg_edit_set_fixed (opt_header + 1, opt_len);
+ opt_values = opt_header + 2;
+
+ switch (type)
+ {
+ case TCP_OPTION_MSS:
+ pg_tcp_option_init (opt_values, &opts_len, 2);
+ break;
+ case TCP_OPTION_WINDOW_SCALE:
+ pg_tcp_option_init (opt_values, &opts_len, 1);
+ break;
+ case TCP_OPTION_TIMESTAMP:
+ case TCP_OPTION_SACK_BLOCK:
+ for (i = 0; i < n_values; ++i)
+ pg_tcp_option_init (opt_values + i, &opts_len, 4);
+ break;
+ default:
+ break;
+ }
+
+ for (i = 0; i < n_values; ++i)
+ {
+ if (!unformat (input, "%U", unformat_pg_edit, unformat_pg_number,
+ opt_values + i))
+ goto error;
+ }
}
+ /* add TCP NO-OP options to fill options up to a 4-bytes boundary */
+ noop_len = (TCP_OPTS_ALIGN - opts_len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
+ if (noop_len > 0)
+ {
+ pg_edit_t *noop_edit;
+ u8 *noops = 0;
+
+ vec_validate (noops, noop_len - 1);
+ clib_memset (noops, 1, noop_len);
+
+ noop_edit =
+ pg_add_edits (s, sizeof (noop_edit[0]), noop_len, opt_group_index);
+ pg_tcp_option_init (noop_edit, &opts_len, noop_len);
+ noop_edit->type = PG_EDIT_FIXED;
+ noop_edit->values[PG_EDIT_LO] = noops;
+ }
+#undef pg_tcp_option_init
+
+ /* set the data offset according to options */
+ pg_edit_set_fixed (&pth->data_offset_and_reserved,
+ (sizeof (tcp_header_t) + opts_len) / sizeof (u32));
+
{
ip_main_t *im = &ip_main;
u16 dst_port;
tcp_udp_port_info_t *pi;
pi = 0;
- if (p->dst.type == PG_EDIT_FIXED)
+ if (pth->dst.type == PG_EDIT_FIXED)
{
- dst_port = pg_edit_get_value (&p->dst, PG_EDIT_LO);
+ dst_port = pg_edit_get_value (&pth->dst, PG_EDIT_LO);
pi = ip_get_tcp_udp_port_info (im, dst_port);
}
- if (pi && pi->unformat_pg_edit
- && unformat_user (input, pi->unformat_pg_edit, s))
+ if (pi && pi->unformat_pg_edit &&
+ unformat_user (input, pi->unformat_pg_edit, s))
;
else if (!unformat_user (input, unformat_pg_payload, s))
goto error;
- if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+ if (pth->checksum.type == PG_EDIT_UNSPECIFIED)
{
- pg_edit_group_t *g = pg_stream_get_group (s, group_index);
+ pg_edit_group_t *g = pg_stream_get_group (s, header_group_index);
g->edit_function = tcp_pg_edit_function;
g->edit_function_opaque = 0;
}
diff --git a/src/vnet/tcp/tcp_syn_filter4.c b/src/vnet/tcp/tcp_syn_filter4.c
index 1b003e04e51..6e867240ad6 100644
--- a/src/vnet/tcp/tcp_syn_filter4.c
+++ b/src/vnet/tcp/tcp_syn_filter4.c
@@ -399,7 +399,6 @@ VLIB_NODE_FN (syn_filter4_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (syn_filter4_node) =
{
.name = "syn-filter-4",
@@ -418,16 +417,13 @@ VLIB_REGISTER_NODE (syn_filter4_node) =
[SYN_FILTER_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (syn_filter_4, static) =
{
.arc_name = "ip4-local",
.node_name = "syn-filter-4",
.runs_before = VNET_FEATURES("ip4-local-end-of-arc"),
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
int
@@ -525,14 +521,12 @@ syn_filter_enable_disable_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (sr_content_command, static) =
{
.path = "ip syn filter",
.short_help = "ip syn filter <interface-name> [disable]",
.function = syn_filter_enable_disable_command_fn,
};
-/* *INDENT-ON* */
#endif /* CLIB_MARCH_VARIANT */
/*
diff --git a/src/vnet/tcp/tcp_timer.h b/src/vnet/tcp/tcp_timer.h
index 7f7dbf193eb..c0907cae1cc 100644
--- a/src/vnet/tcp/tcp_timer.h
+++ b/src/vnet/tcp/tcp_timer.h
@@ -17,11 +17,18 @@
#include <vnet/tcp/tcp_types.h>
+static inline u8
+tcp_timer_thread_is_valid (tcp_connection_t *tc)
+{
+ return ((tc->c_thread_index == vlib_get_thread_index ()) ||
+ vlib_thread_is_main_w_barrier ());
+}
+
always_inline void
-tcp_timer_set (tcp_timer_wheel_t * tw, tcp_connection_t * tc, u8 timer_id,
+tcp_timer_set (tcp_timer_wheel_t *tw, tcp_connection_t *tc, u8 timer_id,
u32 interval)
{
- ASSERT (tc->c_thread_index == vlib_get_thread_index ());
+ ASSERT (tcp_timer_thread_is_valid (tc));
ASSERT (tc->timers[timer_id] == TCP_TIMER_HANDLE_INVALID);
tc->timers[timer_id] = tw_timer_start_tcp_twsl (tw, tc->c_c_index,
timer_id, interval);
@@ -30,7 +37,7 @@ tcp_timer_set (tcp_timer_wheel_t * tw, tcp_connection_t * tc, u8 timer_id,
always_inline void
tcp_timer_reset (tcp_timer_wheel_t * tw, tcp_connection_t * tc, u8 timer_id)
{
- ASSERT (tc->c_thread_index == vlib_get_thread_index ());
+ ASSERT (tcp_timer_thread_is_valid (tc));
tc->pending_timers &= ~(1 << timer_id);
if (tc->timers[timer_id] == TCP_TIMER_HANDLE_INVALID)
return;
@@ -43,7 +50,7 @@ always_inline void
tcp_timer_update (tcp_timer_wheel_t * tw, tcp_connection_t * tc, u8 timer_id,
u32 interval)
{
- ASSERT (tc->c_thread_index == vlib_get_thread_index ());
+ ASSERT (tcp_timer_thread_is_valid (tc));
if (tc->timers[timer_id] != TCP_TIMER_HANDLE_INVALID)
tw_timer_update_tcp_twsl (tw, tc->timers[timer_id], interval);
else
diff --git a/src/vnet/tcp/tcp_types.h b/src/vnet/tcp/tcp_types.h
index aacfd8f2fd4..f9a9ff9a4da 100644
--- a/src/vnet/tcp/tcp_types.h
+++ b/src/vnet/tcp/tcp_types.h
@@ -389,7 +389,6 @@ typedef struct _tcp_connection
#define rst_state snd_wl1
} tcp_connection_t;
-/* *INDENT-OFF* */
struct _tcp_cc_algorithm
{
const char *name;
@@ -406,7 +405,6 @@ struct _tcp_cc_algorithm
void (*event) (tcp_connection_t *tc, tcp_cc_event_t evt);
u64 (*get_pacing_rate) (tcp_connection_t *tc);
};
-/* *INDENT-ON* */
#define tcp_fastrecovery_on(tc) (tc)->flags |= TCP_CONN_FAST_RECOVERY
#define tcp_fastrecovery_off(tc) (tc)->flags &= ~TCP_CONN_FAST_RECOVERY
diff --git a/src/vnet/teib/teib.c b/src/vnet/teib/teib.c
index 44bbc7cfd89..a9234bbeb5e 100644
--- a/src/vnet/teib/teib.c
+++ b/src/vnet/teib/teib.c
@@ -34,7 +34,7 @@ struct teib_entry_t_
{
teib_key_t *te_key;
fib_prefix_t te_nh;
- u32 te_fib_index;
+ u32 te_nh_fib_index;
};
typedef struct teib_db_t_
@@ -83,7 +83,7 @@ teib_entry_get_af (const teib_entry_t * te)
u32
teib_entry_get_fib_index (const teib_entry_t * te)
{
- return (te->te_fib_index);
+ return (te->te_nh_fib_index);
}
const ip_address_t *
@@ -101,7 +101,7 @@ teib_entry_get_nh (const teib_entry_t * te)
void
teib_entry_adj_stack (const teib_entry_t * te, adj_index_t ai)
{
- adj_midchain_delegate_stack (ai, te->te_fib_index, &te->te_nh);
+ adj_midchain_delegate_stack (ai, te->te_nh_fib_index, &te->te_nh);
}
teib_entry_t *
@@ -139,7 +139,7 @@ teib_entry_find_46 (u32 sw_if_index,
}
static void
-teib_adj_fib_add (const ip_address_t * ip, u32 sw_if_index, u32 fib_index)
+teib_adj_fib_add (const ip_address_t *ip, u32 sw_if_index, u32 peer_fib_index)
{
if (AF_IP6 == ip_addr_version (ip) &&
ip6_address_is_link_local_unicast (&ip_addr_v6 (ip)))
@@ -155,21 +155,18 @@ teib_adj_fib_add (const ip_address_t * ip, u32 sw_if_index, u32 fib_index)
fib_prefix_t pfx;
ip_address_to_fib_prefix (ip, &pfx);
- fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ,
- FIB_ENTRY_FLAG_ATTACHED,
- fib_proto_to_dpo (pfx.fp_proto),
- &pfx.fp_addr,
- sw_if_index,
- ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
-
+ fib_table_entry_path_add (
+ peer_fib_index, &pfx, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED,
+ fib_proto_to_dpo (pfx.fp_proto), &pfx.fp_addr, sw_if_index, ~0, 1,
+ NULL, FIB_ROUTE_PATH_FLAG_NONE);
if (0 == teib_db.td_n_entries[ip_addr_version (ip)]++)
- fib_table_lock (fib_index, pfx.fp_proto, FIB_SOURCE_ADJ);
+ fib_table_lock (peer_fib_index, pfx.fp_proto, FIB_SOURCE_ADJ);
}
}
static void
-teib_adj_fib_remove (ip_address_t * ip, u32 sw_if_index, u32 fib_index)
+teib_adj_fib_remove (ip_address_t *ip, u32 sw_if_index, u32 peer_fib_index)
{
if (AF_IP6 == ip_addr_version (ip) &&
ip6_address_is_link_local_unicast (&ip_addr_v6 (ip)))
@@ -185,14 +182,12 @@ teib_adj_fib_remove (ip_address_t * ip, u32 sw_if_index, u32 fib_index)
fib_prefix_t pfx;
ip_address_to_fib_prefix (ip, &pfx);
- fib_table_entry_path_remove (fib_index, &pfx, FIB_SOURCE_ADJ,
- fib_proto_to_dpo (pfx.fp_proto),
- &pfx.fp_addr,
- sw_if_index,
- ~0, 1, FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_remove (
+ peer_fib_index, &pfx, FIB_SOURCE_ADJ, fib_proto_to_dpo (pfx.fp_proto),
+ &pfx.fp_addr, sw_if_index, ~0, 1, FIB_ROUTE_PATH_FLAG_NONE);
if (0 == --teib_db.td_n_entries[ip_addr_version (ip)])
- fib_table_unlock (fib_index, pfx.fp_proto, FIB_SOURCE_ADJ);
+ fib_table_unlock (peer_fib_index, pfx.fp_proto, FIB_SOURCE_ADJ);
}
}
@@ -203,15 +198,17 @@ teib_entry_add (u32 sw_if_index,
{
fib_protocol_t nh_proto;
teib_entry_t *te;
- u32 fib_index;
+ u32 nh_fib_index, peer_fib_index;
index_t tei;
nh_proto = (AF_IP4 == ip_addr_version (nh) ?
FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6);
- fib_index = fib_table_find (nh_proto, nh_table_id);
+ peer_fib_index = fib_table_get_index_for_sw_if_index (
+ ip_address_family_to_fib_proto (peer->version), sw_if_index);
+ nh_fib_index = fib_table_find (nh_proto, nh_table_id);
- if (~0 == fib_index)
+ if (~0 == nh_fib_index)
{
return (VNET_API_ERROR_NO_SUCH_FIB);
}
@@ -233,12 +230,12 @@ teib_entry_add (u32 sw_if_index,
clib_memcpy (te->te_key, &nk, sizeof (*te->te_key));
ip_address_to_fib_prefix (nh, &te->te_nh);
- te->te_fib_index = fib_index;
+ te->te_nh_fib_index = nh_fib_index;
hash_set_mem (teib_db.td_db, te->te_key, tei);
/* we how have a /32 in the overlay, add an adj-fib */
- teib_adj_fib_add (&te->te_key->tk_peer, sw_if_index, fib_index);
+ teib_adj_fib_add (&te->te_key->tk_peer, sw_if_index, peer_fib_index);
TEIB_NOTIFY (te, nv_added);
TEIB_TE_INFO (te, "created");
@@ -262,13 +259,12 @@ teib_entry_del (u32 sw_if_index, const ip_address_t * peer)
{
TEIB_TE_INFO (te, "removed");
- u32 fib_index;
+ u32 peer_fib_index;
- fib_index = fib_table_get_index_for_sw_if_index
- (ip_address_family_to_fib_proto (ip_addr_version (peer)),
- sw_if_index);
+ peer_fib_index = fib_table_get_index_for_sw_if_index (
+ ip_address_family_to_fib_proto (peer->version), sw_if_index);
- teib_adj_fib_remove (&te->te_key->tk_peer, sw_if_index, fib_index);
+ teib_adj_fib_remove (&te->te_key->tk_peer, sw_if_index, peer_fib_index);
hash_unset_mem (teib_db.td_db, te->te_key);
@@ -301,7 +297,7 @@ format_teib_entry (u8 * s, va_list * args)
s = format (s, "%U", format_ip_address,
&te->te_key->tk_peer, IP46_TYPE_ANY);
s = format (s, " via [%d]:%U",
- fib_table_get_table_id (te->te_fib_index, te->te_nh.fp_proto),
+ fib_table_get_table_id (te->te_nh_fib_index, te->te_nh.fp_proto),
format_fib_prefix, &te->te_nh);
return (s);
@@ -312,12 +308,10 @@ teib_walk (teib_walk_cb_t fn, void *ctx)
{
index_t tei;
- /* *INDENT-OFF* */
pool_foreach_index (tei, teib_pool)
{
fn(tei, ctx);
}
- /* *INDENT-ON* */
}
void
@@ -325,13 +319,11 @@ teib_walk_itf (u32 sw_if_index, teib_walk_cb_t fn, void *ctx)
{
index_t tei;
- /* *INDENT-OFF* */
pool_foreach_index (tei, teib_pool)
{
if (sw_if_index == teib_entry_get_sw_if_index(teib_entry_get(tei)))
fn(tei, ctx);
}
- /* *INDENT-ON* */
}
static void
@@ -340,20 +332,18 @@ teib_walk_itf_proto (u32 sw_if_index,
{
index_t tei;
- /* *INDENT-OFF* */
pool_foreach_index (tei, teib_pool)
{
if (sw_if_index == teib_entry_get_sw_if_index(teib_entry_get(tei)) &&
af == teib_entry_get_af(teib_entry_get(tei)))
fn(tei, ctx);
}
- /* *INDENT-ON* */
}
typedef struct teib_table_bind_ctx_t_
{
- u32 new_fib_index;
- u32 old_fib_index;
+ u32 new_peer_fib_index;
+ u32 old_peer_fib_index;
} teib_table_bind_ctx_t;
static walk_rc_t
@@ -364,12 +354,13 @@ teib_walk_table_bind (index_t tei, void *arg)
te = teib_entry_get (tei);
- TEIB_TE_INFO (te, "bind: %d -> %d", ctx->old_fib_index, ctx->new_fib_index);
+ TEIB_TE_INFO (te, "bind: %d -> %d", ctx->old_peer_fib_index,
+ ctx->new_peer_fib_index);
- teib_adj_fib_remove (&te->te_key->tk_peer,
- te->te_key->tk_sw_if_index, ctx->old_fib_index);
- teib_adj_fib_add (&te->te_key->tk_peer,
- te->te_key->tk_sw_if_index, ctx->new_fib_index);
+ teib_adj_fib_remove (&te->te_key->tk_peer, te->te_key->tk_sw_if_index,
+ ctx->old_peer_fib_index);
+ teib_adj_fib_add (&te->te_key->tk_peer, te->te_key->tk_sw_if_index,
+ ctx->new_peer_fib_index);
return (WALK_CONTINUE);
}
@@ -380,8 +371,8 @@ teib_table_bind_v4 (ip4_main_t * im,
u32 sw_if_index, u32 new_fib_index, u32 old_fib_index)
{
teib_table_bind_ctx_t ctx = {
- .old_fib_index = old_fib_index,
- .new_fib_index = new_fib_index,
+ .old_peer_fib_index = old_fib_index,
+ .new_peer_fib_index = new_fib_index,
};
teib_walk_itf_proto (sw_if_index, AF_IP4, teib_walk_table_bind, &ctx);
@@ -393,8 +384,8 @@ teib_table_bind_v6 (ip6_main_t * im,
u32 sw_if_index, u32 new_fib_index, u32 old_fib_index)
{
teib_table_bind_ctx_t ctx = {
- .old_fib_index = old_fib_index,
- .new_fib_index = new_fib_index,
+ .old_peer_fib_index = old_fib_index,
+ .new_peer_fib_index = new_fib_index,
};
teib_walk_itf_proto (sw_if_index, AF_IP6, teib_walk_table_bind, &ctx);
diff --git a/src/vnet/teib/teib_cli.c b/src/vnet/teib/teib_cli.c
index a23902e0f60..03cec15c7a1 100644
--- a/src/vnet/teib/teib_cli.c
+++ b/src/vnet/teib/teib_cli.c
@@ -85,13 +85,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (teib_create_command, static) = {
.path = "create teib",
.short_help = "create teib <interface> peer <addr> nh <addr> [nh-table-id <ID>]",
.function = teib_add,
};
-/* *INDENT-ON* */
static clib_error_t *
teib_del (vlib_main_t * vm,
@@ -150,13 +148,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (teib_delete_command, static) = {
.path = "delete teib",
.short_help = "delete teib <interface> peer <addr>",
.function = teib_del,
};
-/* *INDENT-ON* */
static walk_rc_t
teib_show_one (index_t nei, void *ctx)
@@ -175,13 +171,11 @@ teib_show (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (teib_show_command, static) = {
.path = "show teib",
.short_help = "show teib",
.function = teib_show,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/tls/tls.c b/src/vnet/tls/tls.c
index c1689954975..5f00e6e302d 100644
--- a/src/vnet/tls/tls.c
+++ b/src/vnet/tls/tls.c
@@ -61,8 +61,7 @@ tls_add_vpp_q_rx_evt (session_t * s)
int
tls_add_vpp_q_builtin_rx_evt (session_t * s)
{
- if (svm_fifo_set_event (s->rx_fifo))
- session_send_io_evt_to_thread (s->rx_fifo, SESSION_IO_EVT_BUILTIN_RX);
+ session_enqueue_notify (s);
return 0;
}
@@ -75,9 +74,10 @@ tls_add_vpp_q_tx_evt (session_t * s)
}
static inline int
-tls_add_app_q_evt (app_worker_t * app, session_t * app_session)
+tls_add_app_q_evt (app_worker_t *app_wrk, session_t *app_session)
{
- return app_worker_lock_and_send_event (app, app_session, SESSION_IO_EVT_RX);
+ app_worker_add_event (app_wrk, app_session, SESSION_IO_EVT_RX);
+ return 0;
}
u32
@@ -115,57 +115,74 @@ u32
tls_ctx_half_open_alloc (void)
{
tls_main_t *tm = &tls_main;
- u8 will_expand = pool_get_will_expand (tm->half_open_ctx_pool);
tls_ctx_t *ctx;
- u32 ctx_index;
- if (PREDICT_FALSE (will_expand && vlib_num_workers ()))
- {
- clib_rwlock_writer_lock (&tm->half_open_rwlock);
- pool_get_zero (tm->half_open_ctx_pool, ctx);
- ctx->c_c_index = ctx - tm->half_open_ctx_pool;
- ctx_index = ctx->c_c_index;
- clib_rwlock_writer_unlock (&tm->half_open_rwlock);
- }
- else
- {
- /* reader lock assumption: only main thread will call pool_get */
- clib_rwlock_reader_lock (&tm->half_open_rwlock);
- pool_get_zero (tm->half_open_ctx_pool, ctx);
- ctx->c_c_index = ctx - tm->half_open_ctx_pool;
- ctx_index = ctx->c_c_index;
- clib_rwlock_reader_unlock (&tm->half_open_rwlock);
- }
- return ctx_index;
+ if (vec_len (tm->postponed_ho_free))
+ tls_flush_postponed_ho_cleanups ();
+
+ pool_get_aligned_safe (tm->half_open_ctx_pool, ctx, CLIB_CACHE_LINE_BYTES);
+
+ clib_memset (ctx, 0, sizeof (*ctx));
+ ctx->c_c_index = ctx - tm->half_open_ctx_pool;
+ ctx->c_thread_index = transport_cl_thread ();
+
+ return ctx->c_c_index;
}
void
tls_ctx_half_open_free (u32 ho_index)
{
- tls_main_t *tm = &tls_main;
- clib_rwlock_writer_lock (&tm->half_open_rwlock);
pool_put_index (tls_main.half_open_ctx_pool, ho_index);
- clib_rwlock_writer_unlock (&tm->half_open_rwlock);
}
tls_ctx_t *
tls_ctx_half_open_get (u32 ctx_index)
{
tls_main_t *tm = &tls_main;
- clib_rwlock_reader_lock (&tm->half_open_rwlock);
return pool_elt_at_index (tm->half_open_ctx_pool, ctx_index);
}
void
-tls_ctx_half_open_reader_unlock ()
+tls_add_postponed_ho_cleanups (u32 ho_index)
{
- clib_rwlock_reader_unlock (&tls_main.half_open_rwlock);
+ tls_main_t *tm = &tls_main;
+ vec_add1 (tm->postponed_ho_free, ho_index);
}
-u32
-tls_ctx_half_open_index (tls_ctx_t * ctx)
+static void
+tls_ctx_ho_try_free (u32 ho_index)
{
- return (ctx - tls_main.half_open_ctx_pool);
+ tls_ctx_t *ctx;
+
+ ctx = tls_ctx_half_open_get (ho_index);
+ /* Probably tcp connected just before tcp establish timeout and
+ * worker that owns established session has not yet received
+ * @ref tls_session_connected_cb */
+ if (!(ctx->flags & TLS_CONN_F_HO_DONE))
+ {
+ ctx->tls_session_handle = SESSION_INVALID_HANDLE;
+ tls_add_postponed_ho_cleanups (ho_index);
+ return;
+ }
+ if (!(ctx->flags & TLS_CONN_F_NO_APP_SESSION))
+ session_half_open_delete_notify (&ctx->connection);
+ tls_ctx_half_open_free (ho_index);
+}
+
+void
+tls_flush_postponed_ho_cleanups ()
+{
+ tls_main_t *tm = &tls_main;
+ u32 *ho_indexp, *tmp;
+
+ tmp = tm->postponed_ho_free;
+ tm->postponed_ho_free = tm->ho_free_list;
+ tm->ho_free_list = tmp;
+
+ vec_foreach (ho_indexp, tm->ho_free_list)
+ tls_ctx_ho_try_free (*ho_indexp);
+
+ vec_reset_length (tm->ho_free_list);
}
void
@@ -188,17 +205,19 @@ tls_notify_app_accept (tls_ctx_t * ctx)
lctx = tls_listener_ctx_get (ctx->listener_ctx_index);
app_listener = listen_session_get_from_handle (lctx->app_session_handle);
- app_session = session_get (ctx->c_s_index, ctx->c_thread_index);
- app_session->app_wrk_index = ctx->parent_app_wrk_index;
- app_session->connection_index = ctx->tls_ctx_handle;
+ app_session = session_alloc (ctx->c_thread_index);
+ app_session->session_state = SESSION_STATE_ACCEPTING;
app_session->session_type = app_listener->session_type;
app_session->listener_handle = listen_session_get_handle (app_listener);
- app_session->session_state = SESSION_STATE_ACCEPTING;
+ app_session->app_wrk_index = ctx->parent_app_wrk_index;
+ app_session->connection_index = ctx->tls_ctx_handle;
+ ctx->c_s_index = app_session->session_index;
if ((rv = app_worker_init_accepted (app_session)))
{
TLS_DBG (1, "failed to allocate fifos");
session_free (app_session);
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
return rv;
}
ctx->app_session_handle = session_handle (app_session);
@@ -217,45 +236,44 @@ tls_notify_app_connected (tls_ctx_t * ctx, session_error_t err)
app_wrk = app_worker_get_if_valid (ctx->parent_app_wrk_index);
if (!app_wrk)
{
- if (ctx->tls_type == TRANSPORT_PROTO_TLS)
- session_free (session_get (ctx->c_s_index, ctx->c_thread_index));
- ctx->no_app_session = 1;
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
return -1;
}
if (err)
{
- /* Free app session pre-allocated when transport was established */
- if (ctx->tls_type == TRANSPORT_PROTO_TLS)
- session_free (session_get (ctx->c_s_index, ctx->c_thread_index));
- ctx->no_app_session = 1;
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
goto send_reply;
}
- /* For DTLS the app session is not preallocated because the underlying udp
- * session might migrate to a different worker during the handshake */
+ app_session = session_alloc (ctx->c_thread_index);
+ app_session->session_state = SESSION_STATE_CREATED;
+ app_session->connection_index = ctx->tls_ctx_handle;
+
if (ctx->tls_type == TRANSPORT_PROTO_DTLS)
{
- session_type_t st;
/* Cleanup half-open session as we don't get notification from udp */
session_half_open_delete_notify (&ctx->connection);
- app_session = session_alloc (ctx->c_thread_index);
- app_session->session_state = SESSION_STATE_CREATED;
- ctx->c_s_index = app_session->session_index;
- st =
+ app_session->session_type =
session_type_from_proto_and_ip (TRANSPORT_PROTO_DTLS, ctx->tcp_is_ip4);
- app_session->session_type = st;
- app_session->connection_index = ctx->tls_ctx_handle;
}
else
{
- app_session = session_get (ctx->c_s_index, ctx->c_thread_index);
+ app_session->session_type =
+ session_type_from_proto_and_ip (TRANSPORT_PROTO_TLS, ctx->tcp_is_ip4);
}
app_session->app_wrk_index = ctx->parent_app_wrk_index;
+ app_session->opaque = ctx->parent_app_api_context;
+ ctx->c_s_index = app_session->session_index;
if ((err = app_worker_init_connected (app_wrk, app_session)))
- goto failed;
+ {
+ app_worker_connect_notify (app_wrk, 0, err, ctx->parent_app_api_context);
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
+ session_free (app_session);
+ return -1;
+ }
app_session->session_state = SESSION_STATE_READY;
parent_app_api_ctx = ctx->parent_app_api_context;
@@ -266,15 +284,12 @@ tls_notify_app_connected (tls_ctx_t * ctx, session_error_t err)
{
TLS_DBG (1, "failed to notify app");
session_free (session_get (ctx->c_s_index, ctx->c_thread_index));
- ctx->no_app_session = 1;
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
return -1;
}
return 0;
-failed:
- ctx->no_app_session = 1;
- tls_disconnect (ctx->tls_ctx_handle, vlib_get_thread_index ());
send_reply:
return app_worker_connect_notify (app_wrk, 0, err,
ctx->parent_app_api_context);
@@ -385,6 +400,12 @@ tls_ctx_transport_close (tls_ctx_t * ctx)
}
static inline int
+tls_ctx_transport_reset (tls_ctx_t *ctx)
+{
+ return tls_vfts[ctx->tls_ctx_engine].ctx_transport_reset (ctx);
+}
+
+static inline int
tls_ctx_app_close (tls_ctx_t * ctx)
{
return tls_vfts[ctx->tls_ctx_engine].ctx_app_close (ctx);
@@ -419,43 +440,20 @@ tls_notify_app_io_error (tls_ctx_t *ctx)
}
void
-tls_session_reset_callback (session_t * s)
+tls_session_reset_callback (session_t *ts)
{
tls_ctx_t *ctx;
- transport_connection_t *tc;
- session_t *app_session;
- ctx = tls_ctx_get (s->opaque);
- ctx->is_passive_close = 1;
- tc = &ctx->connection;
- if (tls_ctx_handshake_is_over (ctx))
- {
- session_transport_reset_notify (tc);
- session_transport_closed_notify (tc);
- tls_disconnect_transport (ctx);
- }
- else
- if ((app_session =
- session_get_if_valid (ctx->c_s_index, ctx->c_thread_index)))
- {
- session_free (app_session);
- ctx->c_s_index = SESSION_INVALID_INDEX;
- tls_disconnect_transport (ctx);
- }
+ ctx = tls_ctx_get_w_thread (ts->opaque, ts->thread_index);
+ ctx->flags |= TLS_CONN_F_PASSIVE_CLOSE;
+ tls_ctx_transport_reset (ctx);
}
static void
tls_session_cleanup_ho (session_t *s)
{
- tls_ctx_t *ctx;
- u32 ho_index;
-
/* session opaque stores the opaque passed on connect */
- ho_index = s->opaque;
- ctx = tls_ctx_half_open_get (ho_index);
- session_half_open_delete_notify (&ctx->connection);
- tls_ctx_half_open_reader_unlock ();
- tls_ctx_half_open_free (ho_index);
+ tls_ctx_ho_try_free (s->opaque);
}
int
@@ -483,61 +481,69 @@ tls_session_disconnect_callback (session_t * tls_session)
|| vlib_thread_is_main_w_barrier ());
ctx = tls_ctx_get_w_thread (tls_session->opaque, tls_session->thread_index);
- ctx->is_passive_close = 1;
+ ctx->flags |= TLS_CONN_F_PASSIVE_CLOSE;
tls_ctx_transport_close (ctx);
}
int
-tls_session_accept_callback (session_t * tls_session)
+tls_session_accept_callback (session_t *ts)
{
- session_t *tls_listener, *app_session;
+ session_t *tls_listener;
tls_ctx_t *lctx, *ctx;
u32 ctx_handle;
- tls_listener =
- listen_session_get_from_handle (tls_session->listener_handle);
+ tls_listener = listen_session_get_from_handle (ts->listener_handle);
lctx = tls_listener_ctx_get (tls_listener->opaque);
ctx_handle = tls_ctx_alloc (lctx->tls_ctx_engine);
ctx = tls_ctx_get (ctx_handle);
- memcpy (ctx, lctx, sizeof (*lctx));
- ctx->c_thread_index = vlib_get_thread_index ();
+ clib_memcpy (ctx, lctx, sizeof (*lctx));
+ ctx->c_s_index = SESSION_INVALID_INDEX;
+ ctx->c_thread_index = ts->thread_index;
ctx->tls_ctx_handle = ctx_handle;
- tls_session->session_state = SESSION_STATE_READY;
- tls_session->opaque = ctx_handle;
- ctx->tls_session_handle = session_handle (tls_session);
+ ts->opaque = ctx_handle;
+ ctx->tls_session_handle = session_handle (ts);
ctx->listener_ctx_index = tls_listener->opaque;
ctx->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP;
ctx->ckpair_index = lctx->ckpair_index;
- /* Preallocate app session. Avoids allocating a session post handshake
- * on tls_session rx and potentially invalidating the session pool */
- app_session = session_alloc (ctx->c_thread_index);
- app_session->session_state = SESSION_STATE_CREATED;
- ctx->c_s_index = app_session->session_index;
-
TLS_DBG (1, "Accept on listener %u new connection [%u]%x",
tls_listener->opaque, vlib_get_thread_index (), ctx_handle);
- return tls_ctx_init_server (ctx);
+ if (tls_ctx_init_server (ctx))
+ {
+ /* Do not free ctx yet, in case we have pending rx events */
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
+ tls_disconnect_transport (ctx);
+ }
+
+ if (ts->session_state < SESSION_STATE_READY)
+ ts->session_state = SESSION_STATE_READY;
+
+ return 0;
}
int
-tls_app_rx_callback (session_t * tls_session)
+tls_app_rx_callback (session_t *ts)
{
tls_ctx_t *ctx;
/* DTLS session migrating, wait for next notification */
- if (PREDICT_FALSE (tls_session->flags & SESSION_F_IS_MIGRATING))
+ if (PREDICT_FALSE (ts->flags & SESSION_F_IS_MIGRATING))
return 0;
- ctx = tls_ctx_get (tls_session->opaque);
- if (PREDICT_FALSE (ctx->no_app_session))
+ /* Read rescheduled but underlying transport deleted now */
+ if (PREDICT_FALSE ((ts->session_state == SESSION_STATE_TRANSPORT_DELETED)))
+ return 0;
+
+ ctx = tls_ctx_get (ts->opaque);
+ if (PREDICT_FALSE ((ctx->flags & TLS_CONN_F_NO_APP_SESSION) ||
+ (ctx->flags & TLS_CONN_F_APP_CLOSED)))
{
TLS_DBG (1, "Local App closed");
return 0;
}
- tls_ctx_read (ctx, tls_session);
+ tls_ctx_read (ctx, ts);
return 0;
}
@@ -556,9 +562,7 @@ int
tls_session_connected_cb (u32 tls_app_index, u32 ho_ctx_index,
session_t *tls_session, session_error_t err)
{
- session_t *app_session;
tls_ctx_t *ho_ctx, *ctx;
- session_type_t st;
u32 ctx_handle;
ho_ctx = tls_ctx_half_open_get (ho_ctx_index);
@@ -566,8 +570,9 @@ tls_session_connected_cb (u32 tls_app_index, u32 ho_ctx_index,
ctx_handle = tls_ctx_alloc (ho_ctx->tls_ctx_engine);
ctx = tls_ctx_get (ctx_handle);
clib_memcpy_fast (ctx, ho_ctx, sizeof (*ctx));
+
/* Half-open freed on tcp half-open cleanup notification */
- tls_ctx_half_open_reader_unlock ();
+ __atomic_fetch_or (&ho_ctx->flags, TLS_CONN_F_HO_DONE, __ATOMIC_RELEASE);
ctx->c_thread_index = vlib_get_thread_index ();
ctx->tls_ctx_handle = ctx_handle;
@@ -579,18 +584,17 @@ tls_session_connected_cb (u32 tls_app_index, u32 ho_ctx_index,
ctx->tls_session_handle = session_handle (tls_session);
tls_session->opaque = ctx_handle;
- tls_session->session_state = SESSION_STATE_READY;
- /* Preallocate app session. Avoids allocating a session post handshake
- * on tls_session rx and potentially invalidating the session pool */
- app_session = session_alloc (ctx->c_thread_index);
- app_session->session_state = SESSION_STATE_CREATED;
- ctx->c_s_index = app_session->session_index;
- st = session_type_from_proto_and_ip (TRANSPORT_PROTO_TLS, ctx->tcp_is_ip4);
- app_session->session_type = st;
- app_session->connection_index = ctx->tls_ctx_handle;
+ if (tls_ctx_init_client (ctx))
+ {
+ tls_notify_app_connected (ctx, SESSION_E_TLS_HANDSHAKE);
+ tls_disconnect_transport (ctx);
+ }
- return tls_ctx_init_client (ctx);
+ if (tls_session->session_state < SESSION_STATE_READY)
+ tls_session->session_state = SESSION_STATE_READY;
+
+ return 0;
}
int
@@ -622,13 +626,13 @@ tls_session_connected_callback (u32 tls_app_index, u32 ho_ctx_index,
u32 api_context;
ho_ctx = tls_ctx_half_open_get (ho_ctx_index);
+ ho_ctx->flags |= TLS_CONN_F_HO_DONE;
app_wrk = app_worker_get_if_valid (ho_ctx->parent_app_wrk_index);
if (app_wrk)
{
api_context = ho_ctx->parent_app_api_context;
app_worker_connect_notify (app_wrk, 0, err, api_context);
}
- tls_ctx_half_open_reader_unlock ();
return 0;
}
@@ -655,7 +659,7 @@ tls_app_session_cleanup (session_t * s, session_cleanup_ntf_t ntf)
}
ctx = tls_ctx_get (s->opaque);
- if (!ctx->no_app_session)
+ if (!(ctx->flags & TLS_CONN_F_NO_APP_SESSION))
session_transport_delete_notify (&ctx->connection);
tls_ctx_free (ctx);
}
@@ -681,7 +685,7 @@ dtls_migrate_ctx (void *arg)
/* Probably the app detached while the session was migrating. Cleanup */
if (session_half_open_migrated_notify (&ctx->connection))
{
- ctx->no_app_session = 1;
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
tls_disconnect (ctx->tls_ctx_handle, vlib_get_thread_index ());
return;
}
@@ -700,7 +704,7 @@ dtls_session_migrate_callback (session_t *us, session_handle_t new_sh)
ctx = tls_ctx_get_w_thread (us->opaque, us->thread_index);
ctx->tls_session_handle = new_sh;
cloned_ctx = tls_ctx_detach (ctx);
- ctx->is_migrated = 1;
+ ctx->flags |= TLS_CONN_F_MIGRATED;
session_half_open_migrate_notify (&ctx->connection);
session_send_rpc_evt_to_thread (new_thread, dtls_migrate_ctx,
@@ -709,11 +713,22 @@ dtls_session_migrate_callback (session_t *us, session_handle_t new_sh)
tls_ctx_free (ctx);
}
+static void
+tls_session_transport_closed_callback (session_t *ts)
+{
+ tls_ctx_t *ctx;
+
+ ctx = tls_ctx_get_w_thread (ts->opaque, ts->thread_index);
+ if (!(ctx->flags & TLS_CONN_F_NO_APP_SESSION))
+ session_transport_closed_notify (&ctx->connection);
+}
+
static session_cb_vft_t tls_app_cb_vft = {
.session_accept_callback = tls_session_accept_callback,
.session_disconnect_callback = tls_session_disconnect_callback,
.session_connected_callback = tls_session_connected_callback,
.session_reset_callback = tls_session_reset_callback,
+ .session_transport_closed_callback = tls_session_transport_closed_callback,
.half_open_cleanup_callback = tls_session_cleanup_ho,
.add_segment_callback = tls_add_segment_callback,
.del_segment_callback = tls_del_segment_callback,
@@ -766,7 +781,6 @@ tls_connect (transport_endpoint_cfg_t * tep)
ctx->srv_hostname = format (0, "%s", ccfg->hostname);
vec_terminate_c_string (ctx->srv_hostname);
}
- tls_ctx_half_open_reader_unlock ();
ctx->tls_ctx_engine = engine_type;
@@ -776,7 +790,10 @@ tls_connect (transport_endpoint_cfg_t * tep)
cargs->api_context = ctx_index;
cargs->sep_ext.ns_index = app->ns_index;
if ((rv = vnet_connect (cargs)))
- return rv;
+ {
+ tls_ctx_half_open_free (ctx_index);
+ return rv;
+ }
/* Track half-open tcp session in case we need to clean it up */
ctx->tls_session_handle = cargs->sh;
@@ -793,6 +810,7 @@ tls_disconnect (u32 ctx_handle, u32 thread_index)
TLS_DBG (1, "Disconnecting %x", ctx_handle);
ctx = tls_ctx_get (ctx_handle);
+ ctx->flags |= TLS_CONN_F_APP_CLOSED;
tls_ctx_app_close (ctx);
}
@@ -936,39 +954,53 @@ tls_listener_get (u32 listener_index)
static transport_connection_t *
tls_half_open_get (u32 ho_index)
{
- tls_main_t *tm = &tls_main;
tls_ctx_t *ctx;
ctx = tls_ctx_half_open_get (ho_index);
- clib_rwlock_reader_unlock (&tm->half_open_rwlock);
return &ctx->connection;
}
static void
tls_cleanup_ho (u32 ho_index)
{
- tls_main_t *tm = &tls_main;
- session_handle_t tcp_sh;
tls_ctx_t *ctx;
+ session_t *s;
ctx = tls_ctx_half_open_get (ho_index);
- tcp_sh = ctx->tls_session_handle;
- clib_rwlock_reader_unlock (&tm->half_open_rwlock);
- session_cleanup_half_open (tcp_sh);
- tls_ctx_half_open_free (ho_index);
+ /* Already pending cleanup */
+ if (ctx->tls_session_handle == SESSION_INVALID_HANDLE)
+ {
+ ASSERT (ctx->flags & TLS_CONN_F_HO_DONE);
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
+ return;
+ }
+
+ s = session_get_from_handle (ctx->tls_session_handle);
+ /* If no pending cleanup notification, force cleanup now. Otherwise,
+ * wait for cleanup notification and set no app session on ctx */
+ if (s->session_state != SESSION_STATE_TRANSPORT_DELETED)
+ {
+ session_cleanup_half_open (ctx->tls_session_handle);
+ tls_ctx_half_open_free (ho_index);
+ }
+ else
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
}
int
tls_custom_tx_callback (void *session, transport_send_params_t * sp)
{
- session_t *app_session = (session_t *) session;
+ session_t *as = (session_t *) session;
tls_ctx_t *ctx;
- if (PREDICT_FALSE (app_session->session_state
- >= SESSION_STATE_TRANSPORT_CLOSED))
- return 0;
+ if (PREDICT_FALSE (as->session_state >= SESSION_STATE_TRANSPORT_CLOSED ||
+ as->session_state <= SESSION_STATE_ACCEPTING))
+ {
+ sp->flags |= TRANSPORT_SND_F_DESCHED;
+ return 0;
+ }
- ctx = tls_ctx_get (app_session->connection_index);
- return tls_ctx_write (ctx, app_session, sp);
+ ctx = tls_ctx_get (as->connection_index);
+ return tls_ctx_write (ctx, as, sp);
}
u8 *
@@ -1079,6 +1111,7 @@ format_tls_half_open (u8 * s, va_list * args)
{
u32 ho_index = va_arg (*args, u32);
u32 __clib_unused thread_index = va_arg (*args, u32);
+ u32 __clib_unused verbose = va_arg (*args, u32);
session_t *tcp_ho;
tls_ctx_t *ho_ctx;
@@ -1090,7 +1123,6 @@ format_tls_half_open (u8 * s, va_list * args)
ho_ctx->parent_app_wrk_index, ho_ctx->tls_ctx_engine,
tcp_ho->thread_index, tcp_ho->session_index);
- tls_ctx_half_open_reader_unlock ();
return s;
}
@@ -1099,10 +1131,11 @@ tls_transport_endpoint_get (u32 ctx_handle, u32 thread_index,
transport_endpoint_t * tep, u8 is_lcl)
{
tls_ctx_t *ctx = tls_ctx_get_w_thread (ctx_handle, thread_index);
- session_t *tcp_session;
+ session_t *ts;
- tcp_session = session_get_from_handle (ctx->tls_session_handle);
- session_get_endpoint (tcp_session, tep, is_lcl);
+ ts = session_get_from_handle (ctx->tls_session_handle);
+ if (ts && ts->session_state < SESSION_STATE_TRANSPORT_DELETED)
+ session_get_endpoint (ts, tep, is_lcl);
}
static void
@@ -1125,7 +1158,7 @@ tls_enable (vlib_main_t * vm, u8 is_en)
vnet_app_attach_args_t _a, *a = &_a;
u64 options[APP_OPTIONS_N_OPTIONS];
tls_main_t *tm = &tls_main;
- u32 fifo_size = 128 << 12;
+ u32 fifo_size = 512 << 10;
if (!is_en)
{
@@ -1334,8 +1367,6 @@ tls_init (vlib_main_t * vm)
if (!tm->ca_cert_path)
tm->ca_cert_path = TLS_CA_CERT_PATH;
- clib_rwlock_init (&tm->half_open_rwlock);
-
vec_validate (tm->rx_bufs, num_threads - 1);
vec_validate (tm->tx_bufs, num_threads - 1);
diff --git a/src/vnet/tls/tls.h b/src/vnet/tls/tls.h
index 4a5da15a88f..6bd1371b984 100644
--- a/src/vnet/tls/tls.h
+++ b/src/vnet/tls/tls.h
@@ -36,26 +36,48 @@
#define TLS_DBG(_lvl, _fmt, _args...)
#endif
-/* *INDENT-OFF* */
typedef struct tls_cxt_id_
{
- union {
- session_handle_t app_session_handle;
- u32 parent_app_api_ctx;
- };
+ session_handle_t app_session_handle;
session_handle_t tls_session_handle;
void *migrate_ctx;
u32 parent_app_wrk_index;
u32 ssl_ctx;
- u32 listener_ctx_index;
+ union
+ {
+ u32 listener_ctx_index;
+ u32 parent_app_api_ctx;
+ };
u8 tcp_is_ip4;
u8 tls_engine_id;
} tls_ctx_id_t;
-/* *INDENT-ON* */
STATIC_ASSERT (sizeof (tls_ctx_id_t) <= TRANSPORT_CONN_ID_LEN,
"ctx id must be less than TRANSPORT_CONN_ID_LEN");
+#define foreach_tls_conn_flags \
+ _ (HO_DONE, "ho-done") \
+ _ (PASSIVE_CLOSE, "passive-close") \
+ _ (APP_CLOSED, "app-closed") \
+ _ (MIGRATED, "migrated") \
+ _ (NO_APP_SESSION, "no-app-session") \
+ _ (RESUME, "resume") \
+ _ (HS_DONE, "handshake-done")
+
+typedef enum tls_conn_flags_bit_
+{
+#define _(sym, str) TLS_CONN_F_BIT_##sym,
+ foreach_tls_conn_flags
+#undef _
+} tls_conn_flags_bit_t;
+
+typedef enum tls_conn_flags_
+{
+#define _(sym, str) TLS_CONN_F_##sym = 1 << TLS_CONN_F_BIT_##sym,
+ foreach_tls_conn_flags
+#undef _
+} __clib_packed tls_conn_flags_t;
+
typedef struct tls_ctx_
{
union
@@ -76,11 +98,7 @@ typedef struct tls_ctx_
#define parent_app_api_context c_tls_ctx_id.parent_app_api_ctx
#define migration_ctx c_tls_ctx_id.migrate_ctx
- u8 is_passive_close;
- u8 resume;
- u8 app_closed;
- u8 no_app_session;
- u8 is_migrated;
+ tls_conn_flags_t flags;
u8 *srv_hostname;
u32 evt_index;
u32 ckpair_index;
@@ -92,7 +110,8 @@ typedef struct tls_main_
u32 app_index;
tls_ctx_t *listener_ctx_pool;
tls_ctx_t *half_open_ctx_pool;
- clib_rwlock_t half_open_rwlock;
+ u32 *postponed_ho_free;
+ u32 *ho_free_list;
u8 **rx_bufs;
u8 **tx_bufs;
@@ -124,6 +143,7 @@ typedef struct tls_engine_vft_
int (*ctx_start_listen) (tls_ctx_t * ctx);
int (*ctx_stop_listen) (tls_ctx_t * ctx);
int (*ctx_transport_close) (tls_ctx_t * ctx);
+ int (*ctx_transport_reset) (tls_ctx_t *ctx);
int (*ctx_app_close) (tls_ctx_t * ctx);
int (*ctx_reinit_cachain) (void);
} tls_engine_vft_t;
@@ -141,6 +161,10 @@ void tls_notify_app_enqueue (tls_ctx_t * ctx, session_t * app_session);
void tls_notify_app_io_error (tls_ctx_t *ctx);
void tls_disconnect_transport (tls_ctx_t * ctx);
int tls_reinit_ca_chain (crypto_engine_type_t tls_engine_id);
+
+void tls_add_postponed_ho_cleanups (u32 ho_index);
+void tls_flush_postponed_ho_cleanups ();
+
#endif /* SRC_VNET_TLS_TLS_H_ */
/*
diff --git a/src/vnet/udp/udp.api b/src/vnet/udp/udp.api
index 02176be7c2b..6b468be461a 100644
--- a/src/vnet/udp/udp.api
+++ b/src/vnet/udp/udp.api
@@ -32,7 +32,7 @@ import "vnet/ip/ip_types.api";
* @param dst_ip - Encap destination address
* @param src_ip - Encap source address
* @param dst_port - Encap destination port
- * @param src_port - Encap source port
+ * @param src_port - Encap source port, 0 for entopy per rfc7510
* @param id - VPP assigned id; ignored in add message, set in dump
*/
typedef udp_encap
diff --git a/src/vnet/udp/udp.c b/src/vnet/udp/udp.c
index 9b2ed886d0f..b3c02510232 100644
--- a/src/vnet/udp/udp.c
+++ b/src/vnet/udp/udp.c
@@ -26,80 +26,60 @@ static void
udp_connection_register_port (u16 lcl_port, u8 is_ip4)
{
udp_main_t *um = &udp_main;
- udp_dst_port_info_t *pi;
u16 *n;
- pi = udp_get_dst_port_info (um, lcl_port, is_ip4);
- if (!pi)
- {
- udp_add_dst_port (um, lcl_port, 0, is_ip4);
- pi = udp_get_dst_port_info (um, lcl_port, is_ip4);
- pi->n_connections = 1;
- }
- else
- {
- pi->n_connections += 1;
- /* Do not return. The fact that the pi is valid does not mean
- * it's up to date */
- }
-
- pi->node_index = is_ip4 ? udp4_input_node.index : udp6_input_node.index;
- pi->next_index = um->local_to_input_edge[is_ip4];
+ /* Setup udp protocol -> next index sparse vector mapping. Do not setup
+ * udp_dst_port_info_t as that is used to distinguish between external
+ * and transport consumed ports */
- /* Setup udp protocol -> next index sparse vector mapping. */
if (is_ip4)
- n = sparse_vec_validate (um->next_by_dst_port4,
- clib_host_to_net_u16 (lcl_port));
+ n = sparse_vec_validate (um->next_by_dst_port4, lcl_port);
else
- n = sparse_vec_validate (um->next_by_dst_port6,
- clib_host_to_net_u16 (lcl_port));
+ n = sparse_vec_validate (um->next_by_dst_port6, lcl_port);
+
+ n[0] = um->local_to_input_edge[is_ip4];
+
+ __atomic_add_fetch (&um->transport_ports_refcnt[is_ip4][lcl_port], 1,
+ __ATOMIC_RELAXED);
+}
- n[0] = pi->next_index;
+void
+udp_connection_share_port (u16 lcl_port, u8 is_ip4)
+{
+ udp_main_t *um = &udp_main;
+ __atomic_add_fetch (&um->transport_ports_refcnt[is_ip4][lcl_port], 1,
+ __ATOMIC_RELAXED);
}
static void
udp_connection_unregister_port (u16 lcl_port, u8 is_ip4)
{
udp_main_t *um = &udp_main;
- udp_dst_port_info_t *pi;
+ u16 *n;
- pi = udp_get_dst_port_info (um, lcl_port, is_ip4);
- if (!pi)
+ /* Needed because listeners are not tracked as local endpoints */
+ if (__atomic_sub_fetch (&um->transport_ports_refcnt[is_ip4][lcl_port], 1,
+ __ATOMIC_RELAXED))
return;
- if (!pi->n_connections)
- {
- clib_warning ("no connections using port %u", lcl_port);
- return;
- }
-
- if (!clib_atomic_sub_fetch (&pi->n_connections, 1))
- udp_unregister_dst_port (0, lcl_port, is_ip4);
-}
-
-void
-udp_connection_share_port (u16 lcl_port, u8 is_ip4)
-{
- udp_main_t *um = &udp_main;
- udp_dst_port_info_t *pi;
+ if (is_ip4)
+ n = sparse_vec_validate (um->next_by_dst_port4, lcl_port);
+ else
+ n = sparse_vec_validate (um->next_by_dst_port6, lcl_port);
- /* Done without a lock but the operation is atomic. Writers to pi hash
- * table and vector should be guarded by a barrier sync */
- pi = udp_get_dst_port_info (um, lcl_port, is_ip4);
- clib_atomic_fetch_add_rel (&pi->n_connections, 1);
+ n[0] = UDP_NO_NODE_SET;
}
udp_connection_t *
udp_connection_alloc (u32 thread_index)
{
- udp_main_t *um = &udp_main;
+ udp_worker_t *wrk = udp_worker_get (thread_index);
udp_connection_t *uc;
- pool_get_aligned_safe (um->connections[thread_index], uc,
- CLIB_CACHE_LINE_BYTES);
+ pool_get_aligned_safe (wrk->connections, uc, CLIB_CACHE_LINE_BYTES);
clib_memset (uc, 0, sizeof (*uc));
- uc->c_c_index = uc - um->connections[thread_index];
+ uc->c_c_index = uc - wrk->connections;
uc->c_thread_index = thread_index;
uc->c_proto = TRANSPORT_PROTO_UDP;
return uc;
@@ -108,20 +88,20 @@ udp_connection_alloc (u32 thread_index)
void
udp_connection_free (udp_connection_t * uc)
{
- u32 thread_index = uc->c_thread_index;
+ udp_worker_t *wrk = udp_worker_get (uc->c_thread_index);
+
clib_spinlock_free (&uc->rx_lock);
if (CLIB_DEBUG)
clib_memset (uc, 0xFA, sizeof (*uc));
- pool_put (udp_main.connections[thread_index], uc);
+ pool_put (wrk->connections, uc);
}
static void
udp_connection_cleanup (udp_connection_t * uc)
{
- transport_endpoint_cleanup (TRANSPORT_PROTO_UDP, &uc->c_lcl_ip,
- uc->c_lcl_port);
- udp_connection_unregister_port (clib_net_to_host_u16 (uc->c_lcl_port),
- uc->c_is_ip4);
+ transport_release_local_endpoint (TRANSPORT_PROTO_UDP, &uc->c_lcl_ip,
+ uc->c_lcl_port);
+ udp_connection_unregister_port (uc->c_lcl_port, uc->c_is_ip4);
udp_connection_free (uc);
}
@@ -132,6 +112,38 @@ udp_connection_delete (udp_connection_t * uc)
udp_connection_cleanup (uc);
}
+static void
+udp_handle_cleanups (void *args)
+{
+ u32 thread_index = (u32) pointer_to_uword (args);
+ udp_connection_t *uc;
+ udp_worker_t *wrk;
+ u32 *uc_index;
+
+ wrk = udp_worker_get (thread_index);
+ vec_foreach (uc_index, wrk->pending_cleanups)
+ {
+ uc = udp_connection_get (*uc_index, thread_index);
+ udp_connection_delete (uc);
+ }
+ vec_reset_length (wrk->pending_cleanups);
+}
+
+static void
+udp_connection_program_cleanup (udp_connection_t *uc)
+{
+ uword thread_index = uc->c_thread_index;
+ udp_worker_t *wrk;
+
+ wrk = udp_worker_get (uc->c_thread_index);
+ vec_add1 (wrk->pending_cleanups, uc->c_c_index);
+
+ if (vec_len (wrk->pending_cleanups) == 1)
+ session_send_rpc_evt_to_thread_force (
+ thread_index, udp_handle_cleanups,
+ uword_to_pointer (thread_index, void *));
+}
+
static u8
udp_connection_port_used_extern (u16 lcl_port, u8 is_ip4)
{
@@ -139,8 +151,7 @@ udp_connection_port_used_extern (u16 lcl_port, u8 is_ip4)
udp_dst_port_info_t *pi;
pi = udp_get_dst_port_info (um, lcl_port, is_ip4);
- return (pi && !pi->n_connections
- && udp_is_valid_dst_port (lcl_port, is_ip4));
+ return (pi && udp_is_valid_dst_port (lcl_port, is_ip4));
}
static u16
@@ -156,12 +167,10 @@ udp_session_bind (u32 session_index, transport_endpoint_cfg_t *lcl)
udp_main_t *um = vnet_get_udp_main ();
transport_endpoint_cfg_t *lcl_ext;
udp_connection_t *listener;
- u16 lcl_port_ho;
void *iface_ip;
- lcl_port_ho = clib_net_to_host_u16 (lcl->port);
-
- if (udp_connection_port_used_extern (lcl_port_ho, lcl->is_ip4))
+ if (udp_connection_port_used_extern (clib_net_to_host_u16 (lcl->port),
+ lcl->is_ip4))
{
clib_warning ("port already used");
return SESSION_E_PORTINUSE;
@@ -194,8 +203,10 @@ udp_session_bind (u32 session_index, transport_endpoint_cfg_t *lcl)
else
listener->c_flags |= TRANSPORT_CONNECTION_F_CLESS;
clib_spinlock_init (&listener->rx_lock);
+ if (!um->csum_offload)
+ listener->cfg_flags |= UDP_CFG_F_NO_CSUM_OFFLOAD;
- udp_connection_register_port (lcl_port_ho, lcl->is_ip4);
+ udp_connection_register_port (listener->c_lcl_port, lcl->is_ip4);
return listener->c_c_index;
}
@@ -206,8 +217,7 @@ udp_session_unbind (u32 listener_index)
udp_connection_t *listener;
listener = udp_listener_get (listener_index);
- udp_connection_unregister_port (clib_net_to_host_u16 (listener->c_lcl_port),
- listener->c_is_ip4);
+ udp_connection_unregister_port (listener->c_lcl_port, listener->c_is_ip4);
clib_spinlock_free (&listener->rx_lock);
pool_put (um->listener_pool, listener);
return 0;
@@ -223,38 +233,68 @@ udp_session_get_listener (u32 listener_index)
}
always_inline u32
-udp_push_one_header (vlib_main_t *vm, udp_connection_t *uc, vlib_buffer_t *b)
+udp_push_one_header (vlib_main_t *vm, udp_connection_t *uc, vlib_buffer_t *b,
+ u8 is_cless)
{
- vlib_buffer_push_udp (b, uc->c_lcl_port, uc->c_rmt_port, 1);
- if (uc->c_is_ip4)
- vlib_buffer_push_ip4_custom (vm, b, &uc->c_lcl_ip4, &uc->c_rmt_ip4,
- IP_PROTOCOL_UDP, 1 /* csum offload */,
- 0 /* is_df */, uc->c_dscp);
- else
- vlib_buffer_push_ip6 (vm, b, &uc->c_lcl_ip6, &uc->c_rmt_ip6,
- IP_PROTOCOL_UDP);
- vnet_buffer (b)->sw_if_index[VLIB_RX] = uc->sw_if_index;
- vnet_buffer (b)->sw_if_index[VLIB_TX] = uc->c_fib_index;
b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ /* reuse tcp medatada for now */
+ vnet_buffer (b)->tcp.connection_index = uc->c_c_index;
+
+ if (!is_cless)
+ {
+ vlib_buffer_push_udp (b, uc->c_lcl_port, uc->c_rmt_port,
+ udp_csum_offload (uc));
+
+ if (uc->c_is_ip4)
+ vlib_buffer_push_ip4_custom (vm, b, &uc->c_lcl_ip4, &uc->c_rmt_ip4,
+ IP_PROTOCOL_UDP, udp_csum_offload (uc),
+ 0 /* is_df */, uc->c_dscp);
+ else
+ vlib_buffer_push_ip6 (vm, b, &uc->c_lcl_ip6, &uc->c_rmt_ip6,
+ IP_PROTOCOL_UDP);
+
+ vnet_buffer (b)->tcp.flags = 0;
+ }
+ else
+ {
+ u8 *data = vlib_buffer_get_current (b);
+ session_dgram_hdr_t hdr;
+
+ hdr = *(session_dgram_hdr_t *) (data - sizeof (hdr));
+
+ /* Local port assumed to be bound, not overwriting it */
+ vlib_buffer_push_udp (b, uc->c_lcl_port, hdr.rmt_port,
+ udp_csum_offload (uc));
+
+ if (uc->c_is_ip4)
+ vlib_buffer_push_ip4_custom (vm, b, &hdr.lcl_ip.ip4, &hdr.rmt_ip.ip4,
+ IP_PROTOCOL_UDP, udp_csum_offload (uc),
+ 0 /* is_df */, uc->c_dscp);
+ else
+ vlib_buffer_push_ip6 (vm, b, &hdr.lcl_ip.ip6, &hdr.rmt_ip.ip6,
+ IP_PROTOCOL_UDP);
+
+ /* Not connected udp session. Mark buffer for custom handling in
+ * udp_output */
+ vnet_buffer (b)->tcp.flags |= UDP_CONN_F_LISTEN;
+ }
return 0;
}
-static u32
-udp_push_header (transport_connection_t *tc, vlib_buffer_t **bs, u32 n_bufs)
+always_inline void
+udp_push_header_batch (udp_connection_t *uc, vlib_buffer_t **bs, u32 n_bufs,
+ u8 is_cless)
{
vlib_main_t *vm = vlib_get_main ();
- udp_connection_t *uc;
-
- uc = udp_connection_from_transport (tc);
while (n_bufs >= 4)
{
vlib_prefetch_buffer_header (bs[2], STORE);
vlib_prefetch_buffer_header (bs[3], STORE);
- udp_push_one_header (vm, uc, bs[0]);
- udp_push_one_header (vm, uc, bs[1]);
+ udp_push_one_header (vm, uc, bs[0], is_cless);
+ udp_push_one_header (vm, uc, bs[1], is_cless);
n_bufs -= 2;
bs += 2;
@@ -264,16 +304,28 @@ udp_push_header (transport_connection_t *tc, vlib_buffer_t **bs, u32 n_bufs)
if (n_bufs > 1)
vlib_prefetch_buffer_header (bs[1], STORE);
- udp_push_one_header (vm, uc, bs[0]);
+ udp_push_one_header (vm, uc, bs[0], is_cless);
n_bufs -= 1;
bs += 1;
}
+}
+
+static u32
+udp_push_header (transport_connection_t *tc, vlib_buffer_t **bs, u32 n_bufs)
+{
+ udp_connection_t *uc;
+
+ uc = udp_connection_from_transport (tc);
+ if (uc->flags & UDP_CONN_F_CONNECTED)
+ udp_push_header_batch (uc, bs, n_bufs, 0 /* is_cless */);
+ else
+ udp_push_header_batch (uc, bs, n_bufs, 1 /* is_cless */);
if (PREDICT_FALSE (uc->flags & UDP_CONN_F_CLOSING))
{
- if (!transport_max_tx_dequeue (&uc->connection))
- udp_connection_delete (uc);
+ if (!transport_tx_fifo_has_dgram (&uc->connection))
+ udp_connection_program_cleanup (uc);
}
return 0;
@@ -298,8 +350,8 @@ udp_session_close (u32 connection_index, u32 thread_index)
if (!uc || (uc->flags & UDP_CONN_F_MIGRATED))
return;
- if (!transport_max_tx_dequeue (&uc->connection))
- udp_connection_delete (uc);
+ if (!transport_tx_fifo_has_dgram (&uc->connection))
+ udp_connection_program_cleanup (uc);
else
uc->flags |= UDP_CONN_F_CLOSING;
}
@@ -347,46 +399,32 @@ udp_open_connection (transport_endpoint_cfg_t * rmt)
rv = transport_alloc_local_endpoint (TRANSPORT_PROTO_UDP, rmt, &lcl_addr,
&lcl_port);
if (rv)
- {
- if (rv != SESSION_E_PORTINUSE)
- return rv;
-
- if (udp_connection_port_used_extern (lcl_port, rmt->is_ip4))
- return SESSION_E_PORTINUSE;
-
- /* If port in use, check if 5-tuple is also in use */
- if (session_lookup_connection (rmt->fib_index, &lcl_addr, &rmt->ip,
- lcl_port, rmt->port, TRANSPORT_PROTO_UDP,
- rmt->is_ip4))
- return SESSION_E_PORTINUSE;
-
- /* 5-tuple is available so increase lcl endpoint refcount and proceed
- * with connection allocation */
- transport_share_local_endpoint (TRANSPORT_PROTO_UDP, &lcl_addr,
- lcl_port);
- goto conn_alloc;
- }
+ return rv;
- if (udp_is_valid_dst_port (lcl_port, rmt->is_ip4))
+ if (udp_connection_port_used_extern (clib_net_to_host_u16 (lcl_port),
+ rmt->is_ip4))
{
/* If specific source port was requested abort */
if (rmt->peer.port)
- return SESSION_E_PORTINUSE;
+ {
+ transport_release_local_endpoint (TRANSPORT_PROTO_UDP, &lcl_addr,
+ lcl_port);
+ return SESSION_E_PORTINUSE;
+ }
/* Try to find a port that's not used */
- while (udp_is_valid_dst_port (lcl_port, rmt->is_ip4))
+ while (udp_connection_port_used_extern (clib_net_to_host_u16 (lcl_port),
+ rmt->is_ip4))
{
- lcl_port = transport_alloc_local_port (TRANSPORT_PROTO_UDP,
- &lcl_addr);
- if (lcl_port < 1)
+ transport_release_local_endpoint (TRANSPORT_PROTO_UDP, &lcl_addr,
+ lcl_port);
+ lcl_port =
+ transport_alloc_local_port (TRANSPORT_PROTO_UDP, &lcl_addr, rmt);
+ if ((int) lcl_port < 1)
return SESSION_E_PORTINUSE;
}
}
-conn_alloc:
-
- udp_connection_register_port (lcl_port, rmt->is_ip4);
-
/* We don't poll main thread if we have workers */
thread_index = transport_cl_thread ();
@@ -394,7 +432,7 @@ conn_alloc:
ip_copy (&uc->c_rmt_ip, &rmt->ip, rmt->is_ip4);
ip_copy (&uc->c_lcl_ip, &lcl_addr, rmt->is_ip4);
uc->c_rmt_port = rmt->port;
- uc->c_lcl_port = clib_host_to_net_u16 (lcl_port);
+ uc->c_lcl_port = lcl_port;
uc->c_is_ip4 = rmt->is_ip4;
uc->c_proto = TRANSPORT_PROTO_UDP;
uc->c_fib_index = rmt->fib_index;
@@ -412,6 +450,12 @@ conn_alloc:
clib_spinlock_init (&uc->rx_lock);
uc->c_flags |= TRANSPORT_CONNECTION_F_CLESS;
}
+ if (!um->csum_offload)
+ uc->cfg_flags |= UDP_CFG_F_NO_CSUM_OFFLOAD;
+ uc->next_node_index = rmt->next_node_index;
+ uc->next_node_opaque = rmt->next_node_opaque;
+
+ udp_connection_register_port (uc->c_lcl_port, rmt->is_ip4);
return uc->c_c_index;
}
@@ -461,8 +505,90 @@ format_udp_listener_session (u8 * s, va_list * args)
return format (s, "%U", format_udp_connection, uc, verbose);
}
-/* *INDENT-OFF* */
+static void
+udp_realloc_ports_sv (u16 **ports_nh_svp)
+{
+ u16 port, port_no, *ports_nh_sv, *mc;
+ u32 *ports = 0, *nh = 0, msum, i;
+ sparse_vec_header_t *h;
+ uword sv_index, *mb;
+
+ ports_nh_sv = *ports_nh_svp;
+
+ for (port = 1; port < 65535; port++)
+ {
+ port_no = clib_host_to_net_u16 (port);
+
+ sv_index = sparse_vec_index (ports_nh_sv, port_no);
+ if (sv_index != SPARSE_VEC_INVALID_INDEX)
+ {
+ vec_add1 (ports, port_no);
+ vec_add1 (nh, ports_nh_sv[sv_index]);
+ }
+ }
+
+ sparse_vec_free (ports_nh_sv);
+
+ ports_nh_sv =
+ sparse_vec_new (/* elt bytes */ sizeof (ports_nh_sv[0]),
+ /* bits in index */ BITS (((udp_header_t *) 0)->dst_port));
+
+ vec_resize (ports_nh_sv, 65535);
+
+ for (port = 1; port < 65535; port++)
+ ports_nh_sv[port] = UDP_NO_NODE_SET;
+
+ for (i = 0; i < vec_len (ports); i++)
+ ports_nh_sv[ports[i]] = nh[i];
+
+ h = sparse_vec_header (ports_nh_sv);
+ vec_foreach (mb, h->is_member_bitmap)
+ *mb = (uword) ~0;
+
+ msum = 0;
+ vec_foreach (mc, h->member_counts)
+ {
+ *mc = msum;
+ msum += msum == 0 ? 63 : 64;
+ }
+
+ vec_free (ports);
+ vec_free (nh);
+
+ *ports_nh_svp = ports_nh_sv;
+}
+
+static clib_error_t *
+udp_enable_disable (vlib_main_t *vm, u8 is_en)
+{
+ udp_main_t *um = &udp_main;
+
+ /* Not ideal. The sparse vector used to map ports to next nodes assumes
+ * only a few ports are ever used. When udp transport is enabled this does
+ * not hold and, to make matters worse, ports are consumed in a random
+ * order.
+ *
+ * This can lead to a lot of slow updates to internal data structures
+ * which in turn can slow udp connection allocations until all ports are
+ * eventually consumed.
+ *
+ * Consequently, reallocate sparse vector, preallocate all ports and have
+ * them point to UDP_NO_NODE_SET. We could consider switching the sparse
+ * vector to a preallocated vector but that would increase memory
+ * consumption for vpp deployments that do not rely on host stack.
+ */
+
+ udp_realloc_ports_sv (&um->next_by_dst_port4);
+ udp_realloc_ports_sv (&um->next_by_dst_port6);
+
+ vec_validate (um->transport_ports_refcnt[0], 65535);
+ vec_validate (um->transport_ports_refcnt[1], 65535);
+
+ return 0;
+}
+
static const transport_proto_vft_t udp_proto = {
+ .enable = udp_enable_disable,
.start_listen = udp_session_bind,
.connect = udp_open_connection,
.stop_listen = udp_session_unbind,
@@ -483,7 +609,6 @@ static const transport_proto_vft_t udp_proto = {
.service_type = TRANSPORT_SERVICE_CL,
},
};
-/* *INDENT-ON* */
static clib_error_t *
udp_init (vlib_main_t * vm)
@@ -505,18 +630,18 @@ udp_init (vlib_main_t * vm)
pi->format_header = format_udp_header;
pi->unformat_pg_edit = unformat_pg_udp_header;
- /* Register as transport with URI */
+ /* Register as transport with session layer */
transport_register_protocol (TRANSPORT_PROTO_UDP, &udp_proto,
- FIB_PROTOCOL_IP4, ip4_lookup_node.index);
+ FIB_PROTOCOL_IP4, udp4_output_node.index);
transport_register_protocol (TRANSPORT_PROTO_UDP, &udp_proto,
- FIB_PROTOCOL_IP6, ip6_lookup_node.index);
+ FIB_PROTOCOL_IP6, udp6_output_node.index);
/*
* Initialize data structures
*/
num_threads = 1 /* main thread */ + tm->n_threads;
- vec_validate (um->connections, num_threads - 1);
+ vec_validate (um->wrk, num_threads - 1);
um->local_to_input_edge[UDP_IP4] =
vlib_node_add_next (vm, udp4_local_node.index, udp4_input_node.index);
@@ -524,16 +649,15 @@ udp_init (vlib_main_t * vm)
vlib_node_add_next (vm, udp6_local_node.index, udp6_input_node.index);
um->default_mtu = 1500;
+ um->csum_offload = 1;
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (udp_init) =
{
.runs_after = VLIB_INITS("ip_main_init", "ip4_lookup_init",
"ip6_lookup_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/udp/udp.h b/src/vnet/udp/udp.h
index d51805099ce..8e4e87f85a8 100644
--- a/src/vnet/udp/udp.h
+++ b/src/vnet/udp/udp.h
@@ -25,6 +25,8 @@
#include <vnet/ip/ip.h>
#include <vnet/session/transport.h>
+#define UDP_NO_NODE_SET ((u16) ~0)
+
typedef enum
{
#define udp_error(f, n, s, d) UDP_ERROR_##f,
@@ -55,6 +57,24 @@ typedef enum udp_conn_flags_
#undef _
} udp_conn_flags_t;
+#define foreach_udp_cfg_flag _ (NO_CSUM_OFFLOAD, "no-csum-offload")
+
+typedef enum udp_cfg_flag_bits_
+{
+#define _(sym, str) UDP_CFG_F_##sym##_BIT,
+ foreach_udp_cfg_flag
+#undef _
+ UDP_CFG_N_FLAG_BITS
+} udp_cfg_flag_bits_e;
+
+typedef enum udp_cfg_flag_
+{
+#define _(sym, str) UDP_CFG_F_##sym = 1 << UDP_CFG_F_##sym##_BIT,
+ foreach_udp_cfg_flag
+#undef _
+ UDP_CFG_N_FLAGS
+} __clib_packed udp_cfg_flags_t;
+
typedef struct
{
/** Required for pool_get_aligned */
@@ -62,10 +82,15 @@ typedef struct
transport_connection_t connection; /**< must be first */
clib_spinlock_t rx_lock; /**< rx fifo lock */
u8 flags; /**< connection flags */
+ udp_cfg_flags_t cfg_flags; /**< configuration flags */
u16 mss; /**< connection mss */
u32 sw_if_index; /**< connection sw_if_index */
+ u32 next_node_index; /**< Can be used to control next node in output */
+ u32 next_node_opaque; /**< Opaque to pass to next node */
} udp_connection_t;
+#define udp_csum_offload(uc) (!((uc)->cfg_flags & UDP_CFG_F_NO_CSUM_OFFLOAD))
+
typedef struct
{
/* Name (a c string). */
@@ -80,9 +105,6 @@ typedef struct
/* Next index for this type. */
u32 next_index;
- /* UDP sessions refcount (not tunnels) */
- u32 n_connections;
-
/* Parser for packet generator edits for this protocol */
unformat_function_t *unformat_pg_edit;
} udp_dst_port_info_t;
@@ -94,6 +116,12 @@ typedef enum
N_UDP_AF,
} udp_af_t;
+typedef struct udp_worker_
+{
+ udp_connection_t *connections;
+ u32 *pending_cleanups;
+} udp_worker_t;
+
typedef struct
{
udp_dst_port_info_t *dst_port_infos[N_UDP_AF];
@@ -113,13 +141,19 @@ typedef struct
u32 local_to_input_edge[N_UDP_AF];
/*
- * Per-worker thread udp connection pools used with session layer
+ * UDP transport layer per-thread context
*/
- udp_connection_t **connections;
+
+ udp_worker_t *wrk;
udp_connection_t *listener_pool;
+ /* Refcounts for ports consumed by udp transports to handle
+ * both passive and active opens using the same port */
+ u16 *transport_ports_refcnt[N_UDP_AF];
+
u16 default_mtu;
u16 msg_id_base;
+ u8 csum_offload;
u8 icmp_send_unreachable_disabled;
} udp_main_t;
@@ -129,16 +163,26 @@ extern vlib_node_registration_t udp4_input_node;
extern vlib_node_registration_t udp6_input_node;
extern vlib_node_registration_t udp4_local_node;
extern vlib_node_registration_t udp6_local_node;
+extern vlib_node_registration_t udp4_output_node;
+extern vlib_node_registration_t udp6_output_node;
void udp_add_dst_port (udp_main_t * um, udp_dst_port_t dst_port,
char *dst_port_name, u8 is_ip4);
+always_inline udp_worker_t *
+udp_worker_get (u32 thread_index)
+{
+ return vec_elt_at_index (udp_main.wrk, thread_index);
+}
+
always_inline udp_connection_t *
udp_connection_get (u32 conn_index, u32 thread_index)
{
- if (pool_is_free_index (udp_main.connections[thread_index], conn_index))
+ udp_worker_t *wrk = udp_worker_get (thread_index);
+
+ if (pool_is_free_index (wrk->connections, conn_index))
return 0;
- return pool_elt_at_index (udp_main.connections[thread_index], conn_index);
+ return pool_elt_at_index (wrk->connections, conn_index);
}
always_inline udp_connection_t *
@@ -161,6 +205,7 @@ udp_connection_from_transport (transport_connection_t * tc)
void udp_connection_free (udp_connection_t * uc);
udp_connection_t *udp_connection_alloc (u32 thread_index);
+void udp_connection_share_port (u16 lcl_port, u8 is_ip4);
always_inline udp_connection_t *
udp_connection_clone_safe (u32 connection_index, u32 thread_index)
@@ -171,7 +216,7 @@ udp_connection_clone_safe (u32 connection_index, u32 thread_index)
new_c = udp_connection_alloc (current_thread_index);
new_index = new_c->c_c_index;
/* Connection pool always realloced with barrier */
- old_c = udp_main.connections[thread_index] + connection_index;
+ old_c = udp_main.wrk[thread_index].connections + connection_index;
clib_memcpy_fast (new_c, old_c, sizeof (*new_c));
old_c->flags |= UDP_CONN_F_MIGRATED;
new_c->c_thread_index = current_thread_index;
@@ -195,8 +240,6 @@ format_function_t format_udp_connection;
unformat_function_t unformat_udp_header;
unformat_function_t unformat_udp_port;
-void udp_connection_share_port (u16 lcl_port, u8 is_ip4);
-
void udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
/*
diff --git a/src/vnet/udp/udp_api.c b/src/vnet/udp/udp_api.c
index 0f2d014946f..1f952aa36ea 100644
--- a/src/vnet/udp/udp_api.c
+++ b/src/vnet/udp/udp_api.c
@@ -86,12 +86,10 @@ vl_api_udp_encap_dump_t_handler (vl_api_udp_encap_dump_t *mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (ue, udp_encap_pool)
{
send_udp_encap_details(ue, reg, mp->context);
}
- /* *INDENT-ON* */
}
static void
@@ -99,6 +97,7 @@ vl_api_udp_encap_add_t_handler (vl_api_udp_encap_add_t *mp)
{
vl_api_udp_encap_add_reply_t *rmp;
ip46_address_t src_ip, dst_ip;
+ udp_encap_fixup_flags_t flags;
u32 fib_index, table_id;
fib_protocol_t fproto;
ip46_type_t itype;
@@ -119,19 +118,19 @@ vl_api_udp_encap_add_t_handler (vl_api_udp_encap_add_t *mp)
goto done;
}
- uei = udp_encap_add_and_lock (fproto, fib_index,
- &src_ip, &dst_ip,
+ flags = UDP_ENCAP_FIXUP_NONE;
+ if (mp->udp_encap.src_port == 0)
+ flags |= UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY;
+
+ uei = udp_encap_add_and_lock (fproto, fib_index, &src_ip, &dst_ip,
ntohs (mp->udp_encap.src_port),
- ntohs (mp->udp_encap.dst_port),
- UDP_ENCAP_FIXUP_NONE);
+ ntohs (mp->udp_encap.dst_port), flags);
done:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_UDP_ENCAP_ADD_REPLY,
({
rmp->id = ntohl (uei);
}));
- /* *INDENT-ON* */
}
@@ -189,11 +188,19 @@ vl_api_udp_decap_add_del_t_handler (vl_api_udp_decap_add_del_t *mp)
static clib_error_t *
udp_api_hookup (vlib_main_t * vm)
{
+ api_main_t *am = vlibapi_get_main ();
+
/*
* Set up the (msg_name, crc, message-id) table
*/
REPLY_MSG_ID_BASE = setup_message_id_table ();
+ /* Mark these APIs as mp safe */
+ vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_UDP_ENCAP_ADD, 1);
+ vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_UDP_ENCAP_DEL, 1);
+ vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_UDP_ENCAP_DUMP,
+ 1);
+
return 0;
}
diff --git a/src/vnet/udp/udp_cli.c b/src/vnet/udp/udp_cli.c
index 9787eedf933..6c8992cd0de 100644
--- a/src/vnet/udp/udp_cli.c
+++ b/src/vnet/udp/udp_cli.c
@@ -38,6 +38,33 @@ format_udp_connection_id (u8 * s, va_list * args)
return s;
}
+static const char *udp_cfg_flags_str[] = {
+#define _(sym, str) str,
+ foreach_udp_cfg_flag
+#undef _
+};
+
+static u8 *
+format_udp_cfg_flags (u8 *s, va_list *args)
+{
+ udp_connection_t *tc = va_arg (*args, udp_connection_t *);
+ int i, last = -1;
+
+ for (i = 0; i < UDP_CFG_N_FLAG_BITS; i++)
+ if (tc->cfg_flags & (1 << i))
+ last = i;
+ if (last >= 0)
+ s = format (s, " cfg: ");
+ for (i = 0; i < last; i++)
+ {
+ if (tc->cfg_flags & (1 << i))
+ s = format (s, "%s, ", udp_cfg_flags_str[i]);
+ }
+ if (last >= 0)
+ s = format (s, "%s", udp_cfg_flags_str[last]);
+ return s;
+}
+
static const char *udp_connection_flags_str[] = {
#define _(sym, str) str,
foreach_udp_connection_flag
@@ -68,10 +95,13 @@ format_udp_vars (u8 * s, va_list * args)
{
udp_connection_t *uc = va_arg (*args, udp_connection_t *);
- s = format (s, " index %u flags: %U", uc->c_c_index,
- format_udp_connection_flags, uc);
+ s = format (s, " index %u%U flags: %U\n", uc->c_c_index,
+ format_udp_cfg_flags, uc, format_udp_connection_flags, uc);
+ s = format (s, " fib_index: %u next_node: %u opaque: %u ", uc->c_fib_index);
if (!(uc->flags & UDP_CONN_F_LISTEN))
- s = format (s, " \n sw_if_index: %d, mss: %u\n", uc->sw_if_index, uc->mss);
+ s = format (s, " sw_if_index: %d mss: %u\n", uc->sw_if_index, uc->mss);
+ else
+ s = format (s, "\n");
return s;
}
@@ -106,6 +136,8 @@ udp_config_fn (vlib_main_t * vm, unformat_input_t * input)
um->default_mtu = tmp;
else if (unformat (input, "icmp-unreachable-disabled"))
um->icmp_send_unreachable_disabled = 1;
+ else if (unformat (input, "no-csum-offload"))
+ um->csum_offload = 0;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
@@ -155,7 +187,7 @@ show_udp_punt_fn (vlib_main_t * vm, unformat_input_t * input,
u8 *s = NULL;
vec_foreach (port_info, um->dst_port_infos[UDP_IP6])
{
- if (udp_is_valid_dst_port (port_info->dst_port, 01))
+ if (udp_is_valid_dst_port (port_info->dst_port, 0))
{
s = format (s, (!s) ? "%d" : ", %d", port_info->dst_port);
}
@@ -166,29 +198,32 @@ show_udp_punt_fn (vlib_main_t * vm, unformat_input_t * input,
return (error);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_tcp_punt_command, static) =
{
.path = "show udp punt",
.short_help = "show udp punt [ipv4|ipv6]",
.function = show_udp_punt_fn,
};
-/* *INDENT-ON* */
static void
table_format_udp_port_ (vlib_main_t *vm, udp_main_t *um, table_t *t, int *c,
int port, int bind, int is_ip4)
{
- const udp_dst_port_info_t *pi = udp_get_dst_port_info (um, port, is_ip4);
- if (!pi)
+ const udp_dst_port_info_t *pi;
+
+ if (bind && !udp_is_valid_dst_port (port, is_ip4))
return;
- if (bind && ~0 == pi->node_index)
+
+ pi = udp_get_dst_port_info (um, port, is_ip4);
+ if (!pi)
return;
+
table_format_cell (t, *c, 0, "%d", pi->dst_port);
table_format_cell (t, *c, 1, is_ip4 ? "ip4" : "ip6");
table_format_cell (t, *c, 2, ~0 == pi->node_index ? "none" : "%U",
format_vlib_node_name, vm, pi->node_index);
table_format_cell (t, *c, 3, "%s", pi->name);
+
(*c)++;
}
@@ -265,6 +300,98 @@ VLIB_CLI_COMMAND (show_udp_ports_cmd, static) = {
.is_mp_safe = 1,
};
+static void
+table_format_udp_transport_port_ (vlib_main_t *vm, table_t *t, int *c,
+ int port, int is_ip4)
+{
+ udp_main_t *um = &udp_main;
+ u32 refcnt;
+ u16 port_ne;
+
+ port_ne = clib_host_to_net_u16 (port);
+ refcnt = um->transport_ports_refcnt[is_ip4][port_ne];
+ if (!refcnt)
+ return;
+
+ if (!udp_is_valid_dst_port (port, is_ip4))
+ {
+ clib_warning ("Port %u is not registered refcnt %u!", port, refcnt);
+ return;
+ }
+
+ table_format_cell (t, *c, 0, "%d", port);
+ table_format_cell (t, *c, 1, is_ip4 ? "ip4" : "ip6");
+ table_format_cell (t, *c, 2, "%d", refcnt);
+
+ (*c)++;
+}
+
+static void
+table_format_udp_transport_port (vlib_main_t *vm, table_t *t, int *c, int port,
+ int ipv)
+{
+ if (ipv == -1 || ipv == 0)
+ table_format_udp_transport_port_ (vm, t, c, port, 1 /* is_ip4 */);
+ if (ipv == -1 || ipv == 1)
+ table_format_udp_transport_port_ (vm, t, c, port, 0 /* is_ip4 */);
+}
+
+static clib_error_t *
+show_udp_transport_ports (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ table_t table = {}, *t = &table;
+ int ipv = -1, port = -1, c = 0;
+ clib_error_t *err = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "ip4"))
+ ipv = 0;
+ else if (unformat (input, "ip6"))
+ ipv = 1;
+ else if (unformat (input, "%d", &port))
+ ;
+ else
+ {
+ err = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto out;
+ }
+ }
+
+ table_add_header_col (t, 3, "port", "proto", "ref-cnt");
+
+ if (port > 65535)
+ {
+ err = clib_error_return (0, "wrong port %d", port);
+ goto out;
+ }
+
+ if (port < 0)
+ {
+ for (port = 0; port < 65536; port++)
+ table_format_udp_transport_port (vm, t, &c, port, ipv);
+ }
+ else
+ {
+ table_format_udp_transport_port (vm, t, &c, port, ipv);
+ }
+
+ vlib_cli_output (vm, "%U\n", format_table, t);
+
+out:
+ table_free (t);
+ return err;
+}
+
+VLIB_CLI_COMMAND (show_udp_transport_ports_cmd, static) = {
+ .path = "show udp transport ports",
+ .function = show_udp_transport_ports,
+ .short_help = "show udp transport ports [ip4|ip6] [<port>]",
+ .is_mp_safe = 1,
+};
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/udp/udp_encap.c b/src/vnet/udp/udp_encap.c
index a0f5a50c223..e4e5271da63 100644
--- a/src/vnet/udp/udp_encap.c
+++ b/src/vnet/udp/udp_encap.c
@@ -195,6 +195,20 @@ udp_encap_dpo_unlock (dpo_id_t * dpo)
fib_node_unlock (&ue->ue_fib_node);
}
+u8 *
+format_udp_encap_fixup_flags (u8 *s, va_list *args)
+{
+ udp_encap_fixup_flags_t flags = va_arg (*args, udp_encap_fixup_flags_t);
+
+ if (flags == UDP_ENCAP_FIXUP_NONE)
+ return format (s, "none");
+
+ if (flags & UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY)
+ s = format (s, "%s", "src-port-is-entropy");
+
+ return (s);
+}
+
static u8 *
format_udp_encap_i (u8 * s, va_list * args)
{
@@ -210,23 +224,21 @@ format_udp_encap_i (u8 * s, va_list * args)
s = format (s, "udp-encap:[%d]: ip-fib-index:%d ", uei, ue->ue_fib_index);
if (FIB_PROTOCOL_IP4 == ue->ue_ip_proto)
{
- s = format (s, "ip:[src:%U, dst:%U] udp:[src:%d, dst:%d]",
- format_ip4_address,
- &ue->ue_hdrs.ip4.ue_ip4.src_address,
- format_ip4_address,
- &ue->ue_hdrs.ip4.ue_ip4.dst_address,
+ s = format (s, "ip:[src:%U, dst:%U] udp:[src:%d, dst:%d] flags:%U",
+ format_ip4_address, &ue->ue_hdrs.ip4.ue_ip4.src_address,
+ format_ip4_address, &ue->ue_hdrs.ip4.ue_ip4.dst_address,
clib_net_to_host_u16 (ue->ue_hdrs.ip4.ue_udp.src_port),
- clib_net_to_host_u16 (ue->ue_hdrs.ip4.ue_udp.dst_port));
+ clib_net_to_host_u16 (ue->ue_hdrs.ip4.ue_udp.dst_port),
+ format_udp_encap_fixup_flags, ue->ue_flags);
}
else
{
- s = format (s, "ip:[src:%U, dst:%U] udp:[src:%d dst:%d]",
- format_ip6_address,
- &ue->ue_hdrs.ip6.ue_ip6.src_address,
- format_ip6_address,
- &ue->ue_hdrs.ip6.ue_ip6.dst_address,
+ s = format (s, "ip:[src:%U, dst:%U] udp:[src:%d dst:%d] flags:%U",
+ format_ip6_address, &ue->ue_hdrs.ip6.ue_ip6.src_address,
+ format_ip6_address, &ue->ue_hdrs.ip6.ue_ip6.dst_address,
clib_net_to_host_u16 (ue->ue_hdrs.ip6.ue_udp.src_port),
- clib_net_to_host_u16 (ue->ue_hdrs.ip6.ue_udp.dst_port));
+ clib_net_to_host_u16 (ue->ue_hdrs.ip6.ue_udp.dst_port),
+ format_udp_encap_fixup_flags, ue->ue_flags);
}
vlib_get_combined_counter (&(udp_encap_counters), uei, &to);
s = format (s, " to:[%Ld:%Ld]]", to.packets, to.bytes);
@@ -506,13 +518,11 @@ udp_encap_walk (udp_encap_walk_cb_t cb, void *ctx)
{
index_t uei;
- /* *INDENT-OFF* */
pool_foreach_index (uei, udp_encap_pool)
{
if (WALK_STOP == cb(uei, ctx))
break;
}
- /* *INDENT-ON* */
}
clib_error_t *
@@ -535,12 +545,10 @@ udp_encap_show (vlib_main_t * vm,
if (INDEX_INVALID == uei)
{
- /* *INDENT-OFF* */
pool_foreach_index (uei, udp_encap_pool)
{
vlib_cli_output(vm, "%U", format_udp_encap, uei, 0);
}
- /* *INDENT-ON* */
}
else
{
@@ -550,20 +558,20 @@ udp_encap_show (vlib_main_t * vm,
return NULL;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (udp_encap_add_command, static) = {
.path = "udp encap",
- .short_help = "udp encap [add|del] <id ID> <src-ip> <dst-ip> [<src-port>] <dst-port> [src-port-is-entropy] [table-id <table>]",
+ .short_help = "udp encap [add|del] <id ID> <src-ip> <dst-ip> [<src-port>] "
+ "<dst-port> [src-port-is-entropy] [table-id <table>]",
.function = udp_encap_cli,
.is_mp_safe = 1,
};
+
VLIB_CLI_COMMAND (udp_encap_show_command, static) = {
.path = "show udp encap",
.short_help = "show udp encap [ID]",
.function = udp_encap_show,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/udp/udp_encap.h b/src/vnet/udp/udp_encap.h
index 648e3b59e6d..c8b42ffa92c 100644
--- a/src/vnet/udp/udp_encap.h
+++ b/src/vnet/udp/udp_encap.h
@@ -115,6 +115,7 @@ extern index_t udp_encap_add_and_lock (fib_protocol_t proto,
extern void udp_encap_lock (index_t uei);
extern void udp_encap_unlock (index_t uei);
extern u8 *format_udp_encap (u8 * s, va_list * args);
+extern u8 *format_udp_encap_fixup_flags (u8 *s, va_list *args);
extern void udp_encap_contribute_forwarding (index_t uei,
dpo_proto_t proto,
dpo_id_t * dpo);
diff --git a/src/vnet/udp/udp_encap_node.c b/src/vnet/udp/udp_encap_node.c
index 1ebe79532f4..a86614f5475 100644
--- a/src/vnet/udp/udp_encap_node.c
+++ b/src/vnet/udp/udp_encap_node.c
@@ -20,12 +20,16 @@ typedef struct udp4_encap_trace_t_
{
udp_header_t udp;
ip4_header_t ip;
+ u32 flow_hash;
+ udp_encap_fixup_flags_t flags;
} udp4_encap_trace_t;
typedef struct udp6_encap_trace_t_
{
udp_header_t udp;
ip6_header_t ip;
+ u32 flow_hash;
+ udp_encap_fixup_flags_t flags;
} udp6_encap_trace_t;
extern vlib_combined_counter_main_t udp_encap_counters;
@@ -35,13 +39,16 @@ format_udp4_encap_trace (u8 * s, va_list * args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ u32 indent = format_get_indent (s);
udp4_encap_trace_t *t;
t = va_arg (*args, udp4_encap_trace_t *);
- s = format (s, "%U\n %U",
- format_ip4_header, &t->ip, sizeof (t->ip),
- format_udp_header, &t->udp, sizeof (t->udp));
+ s = format (s, "flags: %U, flow hash: 0x%08x\n%U%U\n%U%U",
+ format_udp_encap_fixup_flags, t->flags, t->flow_hash,
+ format_white_space, indent, format_ip4_header, &t->ip,
+ sizeof (t->ip), format_white_space, indent, format_udp_header,
+ &t->udp, sizeof (t->udp));
return (s);
}
@@ -50,13 +57,16 @@ format_udp6_encap_trace (u8 * s, va_list * args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ u32 indent = format_get_indent (s);
udp6_encap_trace_t *t;
t = va_arg (*args, udp6_encap_trace_t *);
- s = format (s, "%U\n %U",
- format_ip6_header, &t->ip, sizeof (t->ip),
- format_udp_header, &t->udp, sizeof (t->udp));
+ s = format (s, "flags: %U, flow hash: 0x%08x\n%U%U\n%U%U",
+ format_udp_encap_fixup_flags, t->flags, t->flow_hash,
+ format_white_space, indent, format_ip6_header, &t->ip,
+ sizeof (t->ip), format_white_space, indent, format_udp_header,
+ &t->udp, sizeof (t->udp));
return (s);
}
@@ -127,13 +137,16 @@ udp_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
sizeof (udp_header_t) + sizeof (ip6_header_t);
ip_udp_encap_two (vm, b0, b1, (u8 *) &ue0->ue_hdrs,
(u8 *) &ue1->ue_hdrs, n_bytes, encap_family,
- payload_family);
+ payload_family, ue0->ue_flags, ue1->ue_flags);
+
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
udp6_encap_trace_t *tr =
vlib_add_trace (vm, node, b0, sizeof (*tr));
tr->udp = ue0->ue_hdrs.ip6.ue_udp;
tr->ip = ue0->ue_hdrs.ip6.ue_ip6;
+ tr->flags = ue0->ue_flags;
+ tr->flow_hash = vnet_buffer (b0)->ip.flow_hash;
}
if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -141,6 +154,8 @@ udp_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_add_trace (vm, node, b1, sizeof (*tr));
tr->udp = ue1->ue_hdrs.ip6.ue_udp;
tr->ip = ue1->ue_hdrs.ip6.ue_ip6;
+ tr->flags = ue1->ue_flags;
+ tr->flow_hash = vnet_buffer (b1)->ip.flow_hash;
}
}
else
@@ -150,7 +165,7 @@ udp_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
ip_udp_encap_two (vm, b0, b1, (u8 *) &ue0->ue_hdrs,
(u8 *) &ue1->ue_hdrs, n_bytes, encap_family,
- payload_family);
+ payload_family, ue0->ue_flags, ue1->ue_flags);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -158,6 +173,8 @@ udp_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_add_trace (vm, node, b0, sizeof (*tr));
tr->udp = ue0->ue_hdrs.ip4.ue_udp;
tr->ip = ue0->ue_hdrs.ip4.ue_ip4;
+ tr->flags = ue0->ue_flags;
+ tr->flow_hash = vnet_buffer (b0)->ip.flow_hash;
}
if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -165,6 +182,8 @@ udp_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_add_trace (vm, node, b1, sizeof (*tr));
tr->udp = ue1->ue_hdrs.ip4.ue_udp;
tr->ip = ue1->ue_hdrs.ip4.ue_ip4;
+ tr->flags = ue1->ue_flags;
+ tr->flow_hash = vnet_buffer (b1)->ip.flow_hash;
}
}
@@ -208,7 +227,7 @@ udp_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
const u8 n_bytes =
sizeof (udp_header_t) + sizeof (ip6_header_t);
ip_udp_encap_one (vm, b0, (u8 *) &ue0->ue_hdrs.ip6, n_bytes,
- encap_family, payload_family);
+ encap_family, payload_family, ue0->ue_flags);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -216,6 +235,8 @@ udp_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_add_trace (vm, node, b0, sizeof (*tr));
tr->udp = ue0->ue_hdrs.ip6.ue_udp;
tr->ip = ue0->ue_hdrs.ip6.ue_ip6;
+ tr->flags = ue0->ue_flags;
+ tr->flow_hash = vnet_buffer (b0)->ip.flow_hash;
}
}
else
@@ -224,7 +245,7 @@ udp_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
sizeof (udp_header_t) + sizeof (ip4_header_t);
ip_udp_encap_one (vm, b0, (u8 *) &ue0->ue_hdrs.ip4, n_bytes,
- encap_family, payload_family);
+ encap_family, payload_family, ue0->ue_flags);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -232,6 +253,8 @@ udp_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_add_trace (vm, node, b0, sizeof (*tr));
tr->udp = ue0->ue_hdrs.ip4.ue_udp;
tr->ip = ue0->ue_hdrs.ip4.ue_ip4;
+ tr->flags = ue0->ue_flags;
+ tr->flow_hash = vnet_buffer (b0)->ip.flow_hash;
}
}
@@ -285,7 +308,6 @@ VLIB_NODE_FN (udp6_encap_node)
return udp_encap_inline (vm, node, frame, AF_IP6, N_AF);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp4o4_encap_node) = {
.name = "udp4o4-encap",
.vector_size = sizeof (u32),
@@ -296,7 +318,7 @@ VLIB_REGISTER_NODE (udp4o4_encap_node) = {
VLIB_REGISTER_NODE (udp6o4_encap_node) = {
.name = "udp6o4-encap",
.vector_size = sizeof (u32),
- .format_trace = format_udp6_encap_trace,
+ .format_trace = format_udp4_encap_trace,
.n_next_nodes = 0,
.sibling_of = "udp4o4-encap",
};
@@ -319,7 +341,7 @@ VLIB_REGISTER_NODE (udp6o6_encap_node) = {
VLIB_REGISTER_NODE (udp4o6_encap_node) = {
.name = "udp4o6-encap",
.vector_size = sizeof (u32),
- .format_trace = format_udp4_encap_trace,
+ .format_trace = format_udp6_encap_trace,
.n_next_nodes = 0,
.sibling_of = "udp6o6-encap",
};
@@ -331,7 +353,6 @@ VLIB_REGISTER_NODE (udp6_encap_node) = {
.n_next_nodes = 0,
.sibling_of = "udp6o6-encap",
};
-/* *INDENT-ON* */
/*
diff --git a/src/vnet/udp/udp_error.def b/src/vnet/udp/udp_error.def
index 178d5c96b2c..ef19970ce72 100644
--- a/src/vnet/udp/udp_error.def
+++ b/src/vnet/udp/udp_error.def
@@ -21,7 +21,10 @@ udp_error (LENGTH_ERROR, length_error, ERROR, "Packets with length errors")
udp_error (PUNT, punt, ERROR, "No listener punt")
udp_error (ENQUEUED, enqueued, INFO, "Packets enqueued")
udp_error (FIFO_FULL, fifo_full, ERROR, "Fifo full")
+udp_error (FIFO_NOMEM, fifo_nomem, ERROR, "Fifo no mem")
udp_error (NOT_READY, not_ready, ERROR, "Connection not ready")
udp_error (ACCEPT, accept, INFO, "Accepted session")
udp_error (CREATE_SESSION, create_session, ERROR, "Failed to create session")
udp_error (MQ_FULL, mq_full, ERROR, "Application msg queue full")
+udp_error (INVALID_CONNECTION, invalid_connection, ERROR, "Invalid connection")
+udp_error (PKTS_SENT, pkts_sent, INFO, "Packets sent")
diff --git a/src/vnet/udp/udp_inlines.h b/src/vnet/udp/udp_inlines.h
index 025809e1873..f0dd44f48b5 100644
--- a/src/vnet/udp/udp_inlines.h
+++ b/src/vnet/udp/udp_inlines.h
@@ -21,6 +21,9 @@
#include <vnet/ip/ip6.h>
#include <vnet/udp/udp_packet.h>
#include <vnet/interface_output.h>
+#include <vnet/ip/ip4_inlines.h>
+#include <vnet/ip/ip6_inlines.h>
+#include <vnet/udp/udp_encap.h>
always_inline void *
vlib_buffer_push_udp (vlib_buffer_t * b, u16 sp, u16 dp, u8 offload_csum)
@@ -42,8 +45,39 @@ vlib_buffer_push_udp (vlib_buffer_t * b, u16 sp, u16 dp, u8 offload_csum)
return uh;
}
+/*
+ * Encode udp source port entropy value per
+ * https://datatracker.ietf.org/doc/html/rfc7510#section-3
+ */
+always_inline u16
+ip_udp_sport_entropy (vlib_buffer_t *b0)
+{
+ u16 port = clib_host_to_net_u16 (0x03 << 14);
+ port |= vnet_buffer (b0)->ip.flow_hash & 0xffff;
+ return port;
+}
+
+always_inline u32
+ip_udp_compute_flow_hash (vlib_buffer_t *b0, u8 is_ip4)
+{
+ ip4_header_t *ip4;
+ ip6_header_t *ip6;
+
+ if (is_ip4)
+ {
+ ip4 = (ip4_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset);
+ return ip4_compute_flow_hash (ip4, IP_FLOW_HASH_DEFAULT);
+ }
+ else
+ {
+ ip6 = (ip6_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset);
+ return ip6_compute_flow_hash (ip6, IP_FLOW_HASH_DEFAULT);
+ }
+}
+
always_inline void
-ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4)
+ip_udp_fixup_one (vlib_main_t *vm, vlib_buffer_t *b0, u8 is_ip4,
+ u8 sport_entropy)
{
u16 new_l0;
udp_header_t *udp0;
@@ -71,6 +105,9 @@ ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4)
new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
- sizeof (*ip0));
udp0->length = new_l0;
+
+ if (sport_entropy)
+ udp0->src_port = ip_udp_sport_entropy (b0);
}
else
{
@@ -87,6 +124,9 @@ ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4)
udp0 = (udp_header_t *) (ip0 + 1);
udp0->length = new_l0;
+ if (sport_entropy)
+ udp0->src_port = ip_udp_sport_entropy (b0);
+
udp0->checksum =
ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0);
ASSERT (bogus0 == 0);
@@ -99,13 +139,20 @@ ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4)
always_inline void
ip_udp_encap_one (vlib_main_t *vm, vlib_buffer_t *b0, u8 *ec0, word ec_len,
ip_address_family_t encap_family,
- ip_address_family_t payload_family)
+ ip_address_family_t payload_family,
+ udp_encap_fixup_flags_t flags)
{
+ u8 sport_entropy = (flags & UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY) != 0;
if (payload_family < N_AF)
{
vnet_calc_checksums_inline (vm, b0, payload_family == AF_IP4,
payload_family == AF_IP6);
+
+ /* Сalculate flow hash to be used for entropy */
+ if (sport_entropy && 0 == vnet_buffer (b0)->ip.flow_hash)
+ vnet_buffer (b0)->ip.flow_hash =
+ ip_udp_compute_flow_hash (b0, payload_family == AF_IP4);
}
vlib_buffer_advance (b0, -ec_len);
@@ -118,7 +165,7 @@ ip_udp_encap_one (vlib_main_t *vm, vlib_buffer_t *b0, u8 *ec0, word ec_len,
/* Apply the encap string. */
clib_memcpy_fast (ip0, ec0, ec_len);
- ip_udp_fixup_one (vm, b0, 1);
+ ip_udp_fixup_one (vm, b0, 1, sport_entropy);
}
else
{
@@ -128,7 +175,7 @@ ip_udp_encap_one (vlib_main_t *vm, vlib_buffer_t *b0, u8 *ec0, word ec_len,
/* Apply the encap string. */
clib_memcpy_fast (ip0, ec0, ec_len);
- ip_udp_fixup_one (vm, b0, 0);
+ ip_udp_fixup_one (vm, b0, 0, sport_entropy);
}
}
@@ -136,16 +183,28 @@ always_inline void
ip_udp_encap_two (vlib_main_t *vm, vlib_buffer_t *b0, vlib_buffer_t *b1,
u8 *ec0, u8 *ec1, word ec_len,
ip_address_family_t encap_family,
- ip_address_family_t payload_family)
+ ip_address_family_t payload_family,
+ udp_encap_fixup_flags_t flags0,
+ udp_encap_fixup_flags_t flags1)
{
u16 new_l0, new_l1;
udp_header_t *udp0, *udp1;
int payload_ip4 = (payload_family == AF_IP4);
+ int sport_entropy0 = (flags0 & UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY) != 0;
+ int sport_entropy1 = (flags1 & UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY) != 0;
if (payload_family < N_AF)
{
vnet_calc_checksums_inline (vm, b0, payload_ip4, !payload_ip4);
vnet_calc_checksums_inline (vm, b1, payload_ip4, !payload_ip4);
+
+ /* Сalculate flow hash to be used for entropy */
+ if (sport_entropy0 && 0 == vnet_buffer (b0)->ip.flow_hash)
+ vnet_buffer (b0)->ip.flow_hash =
+ ip_udp_compute_flow_hash (b0, payload_ip4);
+ if (sport_entropy1 && 0 == vnet_buffer (b1)->ip.flow_hash)
+ vnet_buffer (b1)->ip.flow_hash =
+ ip_udp_compute_flow_hash (b1, payload_ip4);
}
vlib_buffer_advance (b0, -ec_len);
@@ -195,6 +254,11 @@ ip_udp_encap_two (vlib_main_t *vm, vlib_buffer_t *b0, vlib_buffer_t *b1,
sizeof (*ip1));
udp0->length = new_l0;
udp1->length = new_l1;
+
+ if (sport_entropy0)
+ udp0->src_port = ip_udp_sport_entropy (b0);
+ if (sport_entropy1)
+ udp1->src_port = ip_udp_sport_entropy (b1);
}
else
{
@@ -222,6 +286,11 @@ ip_udp_encap_two (vlib_main_t *vm, vlib_buffer_t *b0, vlib_buffer_t *b1,
udp0->length = new_l0;
udp1->length = new_l1;
+ if (sport_entropy0)
+ udp0->src_port = ip_udp_sport_entropy (b0);
+ if (sport_entropy1)
+ udp1->src_port = ip_udp_sport_entropy (b1);
+
udp0->checksum =
ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0);
udp1->checksum =
diff --git a/src/vnet/udp/udp_input.c b/src/vnet/udp/udp_input.c
index c11c0d51214..a90461186c1 100644
--- a/src/vnet/udp/udp_input.c
+++ b/src/vnet/udp/udp_input.c
@@ -115,6 +115,7 @@ udp_connection_accept (udp_connection_t * listener, session_dgram_hdr_t * hdr,
uc->c_fib_index = listener->c_fib_index;
uc->mss = listener->mss;
uc->flags |= UDP_CONN_F_CONNECTED;
+ uc->cfg_flags = listener->cfg_flags;
if (session_dgram_accept (&uc->connection, listener->c_s_index,
listener->c_thread_index))
@@ -122,8 +123,8 @@ udp_connection_accept (udp_connection_t * listener, session_dgram_hdr_t * hdr,
udp_connection_free (uc);
return 0;
}
- udp_connection_share_port (clib_net_to_host_u16
- (uc->c_lcl_port), uc->c_is_ip4);
+
+ udp_connection_share_port (uc->c_lcl_port, uc->c_is_ip4);
return uc;
}
@@ -135,37 +136,46 @@ udp_connection_enqueue (udp_connection_t * uc0, session_t * s0,
int wrote0;
if (!(uc0->flags & UDP_CONN_F_CONNECTED))
- clib_spinlock_lock (&uc0->rx_lock);
+ {
+ clib_spinlock_lock (&uc0->rx_lock);
+
+ wrote0 = session_enqueue_dgram_connection_cl (
+ s0, hdr0, b, TRANSPORT_PROTO_UDP, queue_event);
+
+ clib_spinlock_unlock (&uc0->rx_lock);
+
+ /* Expect cl udp enqueue to fail because fifo enqueue */
+ if (PREDICT_FALSE (wrote0 == 0))
+ *error0 = UDP_ERROR_FIFO_FULL;
+
+ return;
+ }
if (svm_fifo_max_enqueue_prod (s0->rx_fifo)
< hdr0->data_length + sizeof (session_dgram_hdr_t))
{
*error0 = UDP_ERROR_FIFO_FULL;
- goto unlock_rx_lock;
+ return;
}
/* If session is owned by another thread and rx event needed,
* enqueue event now while we still have the peeker lock */
if (s0->thread_index != thread_index)
{
- wrote0 = session_enqueue_dgram_connection (s0, hdr0, b,
- TRANSPORT_PROTO_UDP,
- /* queue event */ 0);
- if (queue_event && !svm_fifo_has_event (s0->rx_fifo))
- session_enqueue_notify (s0);
+ wrote0 = session_enqueue_dgram_connection2 (
+ s0, hdr0, b, TRANSPORT_PROTO_UDP,
+ queue_event && !svm_fifo_has_event (s0->rx_fifo));
}
else
{
- wrote0 = session_enqueue_dgram_connection (s0, hdr0, b,
- TRANSPORT_PROTO_UDP,
- queue_event);
+ wrote0 = session_enqueue_dgram_connection (
+ s0, hdr0, b, TRANSPORT_PROTO_UDP, queue_event);
}
- ASSERT (wrote0 > 0);
-
-unlock_rx_lock:
- if (!(uc0->flags & UDP_CONN_F_CONNECTED))
- clib_spinlock_unlock (&uc0->rx_lock);
+ /* In some rare cases, session_enqueue_dgram_connection can fail because a
+ * chunk cannot be allocated in the RX FIFO */
+ if (PREDICT_FALSE (wrote0 == 0))
+ *error0 = UDP_ERROR_FIFO_NOMEM;
}
always_inline session_t *
@@ -184,6 +194,7 @@ udp_parse_and_lookup_buffer (vlib_buffer_t * b, session_dgram_hdr_t * hdr,
hdr->lcl_port = udp->dst_port;
hdr->rmt_port = udp->src_port;
hdr->is_ip4 = is_ip4;
+ hdr->gso_size = 0;
if (is_ip4)
{
@@ -213,6 +224,10 @@ udp_parse_and_lookup_buffer (vlib_buffer_t * b, session_dgram_hdr_t * hdr,
udp->src_port, TRANSPORT_PROTO_UDP);
}
+ /* Set the sw_if_index[VLIB_RX] to the interface we received
+ * the connection on (the local interface) */
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->ip.rx_sw_if_index;
+
if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT)))
b->current_length = hdr->data_length;
else
@@ -226,10 +241,9 @@ always_inline uword
udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame, u8 is_ip4)
{
- u32 n_left_from, *from, errors, *first_buffer;
+ u32 thread_index = vm->thread_index, n_left_from, *from, *first_buffer;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u16 err_counters[UDP_N_ERROR] = { 0 };
- u32 thread_index = vm->thread_index;
from = first_buffer = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -281,7 +295,8 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0],
queue_event, &error0);
}
- else if (s0->session_state == SESSION_STATE_READY)
+ else if (s0->session_state == SESSION_STATE_READY ||
+ s0->session_state == SESSION_STATE_ACCEPTING)
{
uc0 = udp_connection_from_transport (session_get_transport (s0));
udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0], 1,
@@ -321,9 +336,7 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
vlib_buffer_free (vm, first_buffer, frame->n_vectors);
- errors = session_main_flush_enqueue_events (TRANSPORT_PROTO_UDP,
- thread_index);
- err_counters[UDP_ERROR_MQ_FULL] = errors;
+ session_main_flush_enqueue_events (TRANSPORT_PROTO_UDP, thread_index);
udp_store_err_counters (vm, is_ip4, err_counters);
return frame->n_vectors;
}
@@ -335,7 +348,6 @@ udp4_input (vlib_main_t * vm, vlib_node_runtime_t * node,
return udp46_input_inline (vm, node, frame, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp4_input_node) =
{
.function = udp4_input,
@@ -352,7 +364,6 @@ VLIB_REGISTER_NODE (udp4_input_node) =
#undef _
},
};
-/* *INDENT-ON* */
static uword
udp6_input (vlib_main_t * vm, vlib_node_runtime_t * node,
@@ -361,7 +372,6 @@ udp6_input (vlib_main_t * vm, vlib_node_runtime_t * node,
return udp46_input_inline (vm, node, frame, 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp6_input_node) =
{
.function = udp6_input,
@@ -378,7 +388,6 @@ VLIB_REGISTER_NODE (udp6_input_node) =
#undef _
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/udp/udp_local.c b/src/vnet/udp/udp_local.c
index 06bafbb4be8..6531b73cd11 100644
--- a/src/vnet/udp/udp_local.c
+++ b/src/vnet/udp/udp_local.c
@@ -42,8 +42,6 @@ static vlib_error_desc_t udp_error_counters[] = {
#undef udp_error
};
-#define UDP_NO_NODE_SET ((u16) ~0)
-
#ifndef CLIB_MARCH_VARIANT
u8 *
format_udp_rx_trace (u8 * s, va_list * args)
@@ -127,9 +125,8 @@ udp46_local_inline (vlib_main_t * vm,
u32 bi0, bi1;
vlib_buffer_t *b0, *b1;
udp_header_t *h0 = 0, *h1 = 0;
- u32 i0, i1, dst_port0, dst_port1;
+ u32 i0, i1, next0, next1;
u32 advance0, advance1;
- u32 error0, next0, error1, next1;
/* Prefetch next iteration. */
{
@@ -171,72 +168,106 @@ udp46_local_inline (vlib_main_t * vm,
if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0)))
{
- error0 = UDP_ERROR_LENGTH_ERROR;
+ b0->error = node->errors[UDP_ERROR_LENGTH_ERROR];
next0 = UDP_LOCAL_NEXT_DROP;
}
else
{
vlib_buffer_advance (b0, advance0);
h0 = vlib_buffer_get_current (b0);
- error0 = UDP_ERROR_NONE;
next0 = UDP_LOCAL_NEXT_PUNT;
if (PREDICT_FALSE (clib_net_to_host_u16 (h0->length) >
vlib_buffer_length_in_chain (vm, b0)))
{
- error0 = UDP_ERROR_LENGTH_ERROR;
+ b0->error = node->errors[UDP_ERROR_LENGTH_ERROR];
next0 = UDP_LOCAL_NEXT_DROP;
}
}
if (PREDICT_FALSE (b1->current_length < advance1 + sizeof (*h1)))
{
- error1 = UDP_ERROR_LENGTH_ERROR;
+ b1->error = node->errors[UDP_ERROR_LENGTH_ERROR];
next1 = UDP_LOCAL_NEXT_DROP;
}
else
{
vlib_buffer_advance (b1, advance1);
h1 = vlib_buffer_get_current (b1);
- error1 = UDP_ERROR_NONE;
next1 = UDP_LOCAL_NEXT_PUNT;
if (PREDICT_FALSE (clib_net_to_host_u16 (h1->length) >
vlib_buffer_length_in_chain (vm, b1)))
{
- error1 = UDP_ERROR_LENGTH_ERROR;
+ b1->error = node->errors[UDP_ERROR_LENGTH_ERROR];
next1 = UDP_LOCAL_NEXT_DROP;
}
}
/* Index sparse array with network byte order. */
- dst_port0 = (error0 == 0) ? h0->dst_port : 0;
- dst_port1 = (error1 == 0) ? h1->dst_port : 0;
- sparse_vec_index2 (next_by_dst_port, dst_port0, dst_port1, &i0,
- &i1);
- next0 = (error0 == 0) ? vec_elt (next_by_dst_port, i0) : next0;
- next1 = (error1 == 0) ? vec_elt (next_by_dst_port, i1) : next1;
-
- if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX ||
- next0 == UDP_NO_NODE_SET))
+ if (PREDICT_TRUE (next0 == UDP_LOCAL_NEXT_PUNT &&
+ next1 == UDP_LOCAL_NEXT_PUNT))
{
- udp_dispatch_error (node, b0, advance0, is_ip4, &next0);
+ sparse_vec_index2 (next_by_dst_port, h0->dst_port, h1->dst_port,
+ &i0, &i1);
+ next0 = vec_elt (next_by_dst_port, i0);
+ next1 = vec_elt (next_by_dst_port, i1);
+
+ if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX ||
+ next0 == UDP_NO_NODE_SET))
+ {
+ udp_dispatch_error (node, b0, advance0, is_ip4, &next0);
+ }
+ else
+ {
+ b0->error = node->errors[UDP_ERROR_NONE];
+ // advance to the payload
+ vlib_buffer_advance (b0, sizeof (*h0));
+ }
+
+ if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX ||
+ next1 == UDP_NO_NODE_SET))
+ {
+ udp_dispatch_error (node, b1, advance1, is_ip4, &next1);
+ }
+ else
+ {
+ b1->error = node->errors[UDP_ERROR_NONE];
+ // advance to the payload
+ vlib_buffer_advance (b1, sizeof (*h1));
+ }
}
- else
+ else if (next0 == UDP_LOCAL_NEXT_PUNT)
{
- b0->error = node->errors[UDP_ERROR_NONE];
- // advance to the payload
- vlib_buffer_advance (b0, sizeof (*h0));
- }
+ i0 = sparse_vec_index (next_by_dst_port, h0->dst_port);
+ next0 = vec_elt (next_by_dst_port, i0);
- if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX ||
- next1 == UDP_NO_NODE_SET))
- {
- udp_dispatch_error (node, b1, advance1, is_ip4, &next1);
+ if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX ||
+ next0 == UDP_NO_NODE_SET))
+ {
+ udp_dispatch_error (node, b0, advance0, is_ip4, &next0);
+ }
+ else
+ {
+ b0->error = node->errors[UDP_ERROR_NONE];
+ // advance to the payload
+ vlib_buffer_advance (b0, sizeof (*h0));
+ }
}
- else
+ else if (next1 == UDP_LOCAL_NEXT_PUNT)
{
- b1->error = node->errors[UDP_ERROR_NONE];
- // advance to the payload
- vlib_buffer_advance (b1, sizeof (*h1));
+ i1 = sparse_vec_index (next_by_dst_port, h1->dst_port);
+ next1 = vec_elt (next_by_dst_port, i1);
+
+ if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX ||
+ next1 == UDP_NO_NODE_SET))
+ {
+ udp_dispatch_error (node, b1, advance1, is_ip4, &next1);
+ }
+ else
+ {
+ b1->error = node->errors[UDP_ERROR_NONE];
+ // advance to the payload
+ vlib_buffer_advance (b1, sizeof (*h1));
+ }
}
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -362,7 +393,6 @@ VLIB_NODE_FN (udp6_local_node) (vlib_main_t * vm,
return udp46_local_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp4_local_node) = {
.name = "ip4-udp-lookup",
/* Takes a vector of packets. */
@@ -382,9 +412,7 @@ VLIB_REGISTER_NODE (udp4_local_node) = {
.format_trace = format_udp_rx_trace,
.unformat_buffer = unformat_udp_header,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp6_local_node) = {
.name = "ip6-udp-lookup",
/* Takes a vector of packets. */
@@ -404,7 +432,6 @@ VLIB_REGISTER_NODE (udp6_local_node) = {
.format_trace = format_udp_rx_trace,
.unformat_buffer = unformat_udp_header,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
void
@@ -492,16 +519,12 @@ u8
udp_is_valid_dst_port (udp_dst_port_t dst_port, u8 is_ip4)
{
udp_main_t *um = &udp_main;
- u16 *n;
-
- if (is_ip4)
- n = sparse_vec_validate (um->next_by_dst_port4,
- clib_host_to_net_u16 (dst_port));
- else
- n = sparse_vec_validate (um->next_by_dst_port6,
- clib_host_to_net_u16 (dst_port));
-
- return (n[0] != SPARSE_VEC_INVALID_INDEX && n[0] != UDP_NO_NODE_SET);
+ u16 *next_by_dst_port =
+ is_ip4 ? um->next_by_dst_port4 : um->next_by_dst_port6;
+ uword index =
+ sparse_vec_index (next_by_dst_port, clib_host_to_net_u16 (dst_port));
+ return (index != SPARSE_VEC_INVALID_INDEX &&
+ vec_elt (next_by_dst_port, index) != UDP_NO_NODE_SET);
}
void
diff --git a/src/vnet/udp/udp_output.c b/src/vnet/udp/udp_output.c
new file mode 100644
index 00000000000..22b94141365
--- /dev/null
+++ b/src/vnet/udp/udp_output.c
@@ -0,0 +1,254 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vnet/udp/udp.h>
+#include <vnet/ip/ip4_inlines.h>
+#include <vnet/ip/ip6_inlines.h>
+
+#define udp_node_index(node_id, is_ip4) \
+ ((is_ip4) ? udp4_##node_id##_node.index : udp6_##node_id##_node.index)
+
+typedef enum udp_output_next_
+{
+ UDP_OUTPUT_NEXT_DROP,
+ UDP_OUTPUT_NEXT_IP_LOOKUP,
+ UDP_OUTPUT_N_NEXT
+} udp_output_next_t;
+
+#define foreach_udp4_output_next \
+ _ (DROP, "error-drop") \
+ _ (IP_LOOKUP, "ip4-lookup")
+
+#define foreach_udp6_output_next \
+ _ (DROP, "error-drop") \
+ _ (IP_LOOKUP, "ip6-lookup")
+
+static vlib_error_desc_t udp_output_error_counters[] = {
+#define udp_error(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s },
+#include <vnet/udp/udp_error.def>
+#undef udp_error
+};
+
+typedef struct udp_tx_trace_
+{
+ udp_header_t udp_header;
+ udp_connection_t udp_connection;
+} udp_tx_trace_t;
+
+static u8 *
+format_udp_tx_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ udp_tx_trace_t *t = va_arg (*args, udp_tx_trace_t *);
+ udp_connection_t *uc = &t->udp_connection;
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "%U\n%U%U", format_udp_connection, uc, 1, format_white_space,
+ indent, format_udp_header, &t->udp_header, 128);
+
+ return s;
+}
+
+always_inline udp_connection_t *
+udp_output_get_connection (vlib_buffer_t *b, u32 thread_index)
+{
+ if (PREDICT_FALSE (vnet_buffer (b)->tcp.flags & UDP_CONN_F_LISTEN))
+ return udp_listener_get (vnet_buffer (b)->tcp.connection_index);
+
+ return udp_connection_get (vnet_buffer (b)->tcp.connection_index,
+ thread_index);
+}
+
+static void
+udp46_output_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
+ u32 *to_next, u32 n_bufs)
+{
+ udp_connection_t *uc;
+ udp_tx_trace_t *t;
+ vlib_buffer_t *b;
+ udp_header_t *uh;
+ int i;
+
+ for (i = 0; i < n_bufs; i++)
+ {
+ b = vlib_get_buffer (vm, to_next[i]);
+ if (!(b->flags & VLIB_BUFFER_IS_TRACED))
+ continue;
+ uh = vlib_buffer_get_current (b);
+ uc = udp_output_get_connection (b, vm->thread_index);
+ t = vlib_add_trace (vm, node, b, sizeof (*t));
+ clib_memcpy_fast (&t->udp_header, uh, sizeof (t->udp_header));
+ clib_memcpy_fast (&t->udp_connection, uc, sizeof (t->udp_connection));
+ }
+}
+
+always_inline void
+udp_output_handle_packet (udp_connection_t *uc0, vlib_buffer_t *b0,
+ vlib_node_runtime_t *error_node, u16 *next0,
+ u8 is_ip4)
+{
+ /* If next_index is not drop use it */
+ if (uc0->next_node_index)
+ {
+ *next0 = uc0->next_node_index;
+ vnet_buffer (b0)->tcp.next_node_opaque = uc0->next_node_opaque;
+ }
+ else
+ {
+ *next0 = UDP_OUTPUT_NEXT_IP_LOOKUP;
+ }
+
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = uc0->c_fib_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = uc0->sw_if_index;
+}
+
+always_inline uword
+udp46_output_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, int is_ip4)
+{
+ u32 n_left_from, *from, thread_index = vm->thread_index;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+ u16 nexts[VLIB_FRAME_SIZE], *next;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
+ udp46_output_trace_frame (vm, node, from, n_left_from);
+
+ vlib_get_buffers (vm, from, bufs, n_left_from);
+ b = bufs;
+ next = nexts;
+
+ while (n_left_from >= 4)
+ {
+ udp_connection_t *uc0, *uc1;
+
+ vlib_prefetch_buffer_header (b[2], STORE);
+ CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
+
+ vlib_prefetch_buffer_header (b[3], STORE);
+ CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
+
+ uc0 = udp_output_get_connection (b[0], thread_index);
+ uc1 = udp_output_get_connection (b[1], thread_index);
+
+ if (PREDICT_TRUE (!uc0 + !uc1 == 0))
+ {
+ udp_output_handle_packet (uc0, b[0], node, &next[0], is_ip4);
+ udp_output_handle_packet (uc1, b[1], node, &next[1], is_ip4);
+ }
+ else
+ {
+ if (uc0 != 0)
+ {
+ udp_output_handle_packet (uc0, b[0], node, &next[0], is_ip4);
+ }
+ else
+ {
+ b[0]->error = node->errors[UDP_ERROR_INVALID_CONNECTION];
+ next[0] = UDP_OUTPUT_NEXT_DROP;
+ }
+ if (uc1 != 0)
+ {
+ udp_output_handle_packet (uc1, b[1], node, &next[1], is_ip4);
+ }
+ else
+ {
+ b[1]->error = node->errors[UDP_ERROR_INVALID_CONNECTION];
+ next[1] = UDP_OUTPUT_NEXT_DROP;
+ }
+ }
+
+ b += 2;
+ next += 2;
+ n_left_from -= 2;
+ }
+ while (n_left_from > 0)
+ {
+ udp_connection_t *uc0;
+
+ if (n_left_from > 1)
+ {
+ vlib_prefetch_buffer_header (b[1], STORE);
+ CLIB_PREFETCH (b[1]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ uc0 = udp_output_get_connection (b[0], thread_index);
+
+ if (PREDICT_TRUE (uc0 != 0))
+ {
+ udp_output_handle_packet (uc0, b[0], node, &next[0], is_ip4);
+ }
+ else
+ {
+ b[0]->error = node->errors[UDP_ERROR_INVALID_CONNECTION];
+ next[0] = UDP_OUTPUT_NEXT_DROP;
+ }
+
+ b += 1;
+ next += 1;
+ n_left_from -= 1;
+ }
+
+ vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+ vlib_node_increment_counter (vm, udp_node_index (output, is_ip4),
+ UDP_ERROR_PKTS_SENT, frame->n_vectors);
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (udp4_output_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ return udp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */);
+}
+
+VLIB_NODE_FN (udp6_output_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ return udp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */);
+}
+
+VLIB_REGISTER_NODE (udp4_output_node) =
+{
+ .name = "udp4-output",
+ .vector_size = sizeof (u32),
+ .n_errors = UDP_N_ERROR,
+ .protocol_hint = VLIB_NODE_PROTO_HINT_UDP,
+ .error_counters = udp_output_error_counters,
+ .n_next_nodes = UDP_OUTPUT_N_NEXT,
+ .next_nodes = {
+#define _(s, n) [UDP_OUTPUT_NEXT_##s] = n,
+ foreach_udp4_output_next
+#undef _
+ },
+ .format_buffer = format_udp_header,
+ .format_trace = format_udp_tx_trace,
+};
+
+VLIB_REGISTER_NODE (udp6_output_node) =
+{
+ .name = "udp6-output",
+ .vector_size = sizeof (u32),
+ .n_errors = UDP_N_ERROR,
+ .protocol_hint = VLIB_NODE_PROTO_HINT_UDP,
+ .error_counters = udp_output_error_counters,
+ .n_next_nodes = UDP_OUTPUT_N_NEXT,
+ .next_nodes = {
+#define _(s, n) [UDP_OUTPUT_NEXT_##s] = n,
+ foreach_udp6_output_next
+#undef _
+ },
+ .format_buffer = format_udp_header,
+ .format_trace = format_udp_tx_trace,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/unix/gdb_funcs.c b/src/vnet/unix/gdb_funcs.c
index 91dabe394ba..d6fdc985bd9 100644
--- a/src/vnet/unix/gdb_funcs.c
+++ b/src/vnet/unix/gdb_funcs.c
@@ -318,13 +318,11 @@ show_gdb_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_gdb_funcs_command, static) = {
.path = "show gdb",
.short_help = "Describe functions which can be called from gdb",
.function = show_gdb_command_fn,
};
-/* *INDENT-ON* */
vlib_buffer_t *
vgb (u32 bi)
diff --git a/src/vnet/unix/tuntap.c b/src/vnet/unix/tuntap.c
index b75b1f670b9..f1102dc321e 100644
--- a/src/vnet/unix/tuntap.c
+++ b/src/vnet/unix/tuntap.c
@@ -217,14 +217,12 @@ tuntap_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
return n_packets;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tuntap_tx_node,static) = {
.function = tuntap_tx,
.name = "tuntap-tx",
.type = VLIB_NODE_TYPE_INTERNAL,
.vector_size = 4,
};
-/* *INDENT-ON* */
/**
* @brief TUNTAP receive node
@@ -366,7 +364,7 @@ tuntap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
next_index = VNET_DEVICE_INPUT_NEXT_DROP;
}
- vnet_feature_start_device_input_x1 (tm->sw_if_index, &next_index, b);
+ vnet_feature_start_device_input (tm->sw_if_index, &next_index, b);
vlib_set_next_frame_buffer (vm, node, next_index, bi);
@@ -385,7 +383,6 @@ static char *tuntap_rx_error_strings[] = {
"unknown packet type",
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tuntap_rx_node,static) = {
.function = tuntap_rx,
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
@@ -397,7 +394,6 @@ VLIB_REGISTER_NODE (tuntap_rx_node,static) = {
.n_errors = 1,
.error_strings = tuntap_rx_error_strings,
};
-/* *INDENT-ON* */
/**
* @brief Gets called when file descriptor is ready from epoll.
@@ -933,12 +929,10 @@ tuntap_nopunt_frame (vlib_main_t * vm,
vlib_frame_free (vm, frame);
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (tuntap_interface_class,static) = {
.name = "tuntap",
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
-/* *INDENT-ON* */
/**
* @brief Format tun/tap interface name
@@ -984,13 +978,11 @@ tuntap_intfc_tx (vlib_main_t * vm,
return n_buffers;
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (tuntap_dev_class,static) = {
.name = "tuntap",
.tx_function = tuntap_intfc_tx,
.format_device_name = format_tuntap_interface_name,
};
-/* *INDENT-ON* */
/**
* @brief tun/tap node init
@@ -1025,12 +1017,10 @@ tuntap_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (tuntap_init) =
{
.runs_after = VLIB_INITS("ip4_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/util/throttle.c b/src/vnet/util/throttle.c
index 0985b4a81a3..8b8e030bf53 100644
--- a/src/vnet/util/throttle.c
+++ b/src/vnet/util/throttle.c
@@ -16,17 +16,18 @@
#include <vnet/util/throttle.h>
void
-throttle_init (throttle_t * t, u32 n_threads, f64 time)
+throttle_init (throttle_t *t, u32 n_threads, u32 buckets, f64 time)
{
u32 i;
t->time = time;
+ t->buckets = 1 << max_log2 (buckets);
vec_validate (t->bitmaps, n_threads);
vec_validate (t->seeds, n_threads);
vec_validate (t->last_seed_change_time, n_threads);
for (i = 0; i < n_threads; i++)
- vec_validate (t->bitmaps[i], (THROTTLE_BITS / BITS (uword)) - 1);
+ clib_bitmap_alloc (t->bitmaps[i], t->buckets);
}
/*
diff --git a/src/vnet/util/throttle.h b/src/vnet/util/throttle.h
index 38ace280131..53435c4a359 100644
--- a/src/vnet/util/throttle.h
+++ b/src/vnet/util/throttle.h
@@ -31,11 +31,13 @@ typedef struct throttle_t_
uword **bitmaps;
u64 *seeds;
f64 *last_seed_change_time;
+ u32 buckets;
} throttle_t;
#define THROTTLE_BITS (512)
-extern void throttle_init (throttle_t * t, u32 n_threads, f64 time);
+extern void throttle_init (throttle_t *t, u32 n_threads, u32 buckets,
+ f64 time);
always_inline u64
throttle_seed (throttle_t * t, u32 thread_index, f64 time_now)
@@ -43,7 +45,7 @@ throttle_seed (throttle_t * t, u32 thread_index, f64 time_now)
if (time_now - t->last_seed_change_time[thread_index] > t->time)
{
(void) random_u64 (&t->seeds[thread_index]);
- clib_memset (t->bitmaps[thread_index], 0, THROTTLE_BITS / BITS (u8));
+ clib_bitmap_zero (t->bitmaps[thread_index]);
t->last_seed_change_time[thread_index] = time_now;
}
@@ -53,21 +55,14 @@ throttle_seed (throttle_t * t, u32 thread_index, f64 time_now)
always_inline int
throttle_check (throttle_t * t, u32 thread_index, u64 hash, u64 seed)
{
- int drop;
- uword m;
- u32 w;
+ ASSERT (is_pow2 (t->buckets));
hash = clib_xxhash (hash ^ seed);
/* Select bit number */
- hash &= THROTTLE_BITS - 1;
- w = hash / BITS (uword);
- m = (uword) 1 << (hash % BITS (uword));
+ hash &= t->buckets - 1;
- drop = (t->bitmaps[thread_index][w] & m) != 0;
- t->bitmaps[thread_index][w] |= m;
-
- return (drop);
+ return clib_bitmap_set_no_check (t->bitmaps[thread_index], hash, 1);
}
#endif
diff --git a/src/vnet/vnet.h b/src/vnet/vnet.h
index 227fa5be30c..54988aec667 100644
--- a/src/vnet/vnet.h
+++ b/src/vnet/vnet.h
@@ -71,6 +71,7 @@ typedef struct
u32 pcap_sw_if_index;
pcap_main_t pcap_main;
u32 filter_classify_table_index;
+ vlib_is_packet_traced_fn_t *current_filter_function;
vlib_error_t pcap_error_index;
} vnet_pcap_t;
diff --git a/src/vnet/vxlan-gpe/decap.c b/src/vnet/vxlan-gpe/decap.c
index 62513614389..d4c7424630d 100644
--- a/src/vnet/vxlan-gpe/decap.c
+++ b/src/vnet/vxlan-gpe/decap.c
@@ -622,7 +622,6 @@ static char *vxlan_gpe_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vxlan4_gpe_input_node) = {
.name = "vxlan4-gpe-input",
/* Takes a vector of packets. */
@@ -642,9 +641,7 @@ VLIB_REGISTER_NODE (vxlan4_gpe_input_node) = {
.format_trace = format_vxlan_gpe_rx_trace,
// $$$$ .unformat_buffer = unformat_vxlan_gpe_header,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vxlan6_gpe_input_node) = {
.name = "vxlan6-gpe-input",
/* Takes a vector of packets. */
@@ -664,7 +661,6 @@ VLIB_REGISTER_NODE (vxlan6_gpe_input_node) = {
.format_trace = format_vxlan_gpe_rx_trace,
// $$$$ .unformat_buffer = unformat_vxlan_gpe_header,
};
-/* *INDENT-ON* */
typedef enum
{
@@ -1105,7 +1101,6 @@ VLIB_NODE_FN (ip4_vxlan_gpe_bypass_node) (vlib_main_t * vm,
return ip_vxlan_gpe_bypass_inline (vm, node, frame, /* is_ip4 */ 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_vxlan_gpe_bypass_node) = {
.name = "ip4-vxlan-gpe-bypass",
.vector_size = sizeof (u32),
@@ -1119,7 +1114,6 @@ VLIB_REGISTER_NODE (ip4_vxlan_gpe_bypass_node) = {
.format_buffer = format_ip4_header,
.format_trace = format_ip4_forward_next_trace,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
/* Dummy init function to get us linked in. */
@@ -1139,7 +1133,6 @@ VLIB_NODE_FN (ip6_vxlan_gpe_bypass_node) (vlib_main_t * vm,
return ip_vxlan_gpe_bypass_inline (vm, node, frame, /* is_ip4 */ 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_vxlan_gpe_bypass_node) = {
.name = "ip6-vxlan-gpe-bypass",
.vector_size = sizeof (u32),
@@ -1153,7 +1146,6 @@ VLIB_REGISTER_NODE (ip6_vxlan_gpe_bypass_node) = {
.format_buffer = format_ip6_header,
.format_trace = format_ip6_forward_next_trace,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
/* Dummy init function to get us linked in. */
diff --git a/src/vnet/vxlan-gpe/encap.c b/src/vnet/vxlan-gpe/encap.c
index 35a5529e80b..a769861577d 100644
--- a/src/vnet/vxlan-gpe/encap.c
+++ b/src/vnet/vxlan-gpe/encap.c
@@ -96,7 +96,7 @@ vxlan_gpe_encap_one_inline (vxlan_gpe_main_t *ngm, vlib_buffer_t *b0,
ASSERT (sizeof (ip6_vxlan_gpe_header_t) == 56);
ip_udp_encap_one (ngm->vlib_main, b0, t0->rewrite, t0->rewrite_size, af,
- N_AF);
+ N_AF, UDP_ENCAP_FIXUP_NONE);
next0[0] = t0->encap_next_node;
}
@@ -123,9 +123,9 @@ vxlan_gpe_encap_two_inline (vxlan_gpe_main_t *ngm, vlib_buffer_t *b0,
ASSERT (sizeof (ip6_vxlan_gpe_header_t) == 56);
ip_udp_encap_one (ngm->vlib_main, b0, t0->rewrite, t0->rewrite_size, af,
- N_AF);
+ N_AF, UDP_ENCAP_FIXUP_NONE);
ip_udp_encap_one (ngm->vlib_main, b1, t1->rewrite, t1->rewrite_size, af,
- N_AF);
+ N_AF, UDP_ENCAP_FIXUP_NONE);
next0[0] = next1[0] = t0->encap_next_node;
}
@@ -404,7 +404,6 @@ vxlan_gpe_encap (vlib_main_t * vm,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vxlan_gpe_encap_node) = {
.function = vxlan_gpe_encap,
.name = "vxlan-gpe-encap",
@@ -423,7 +422,6 @@ VLIB_REGISTER_NODE (vxlan_gpe_encap_node) = {
[VXLAN_GPE_ENCAP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.c b/src/vnet/vxlan-gpe/vxlan_gpe.c
index a926847051f..5a5262ea9db 100644
--- a/src/vnet/vxlan-gpe/vxlan_gpe.c
+++ b/src/vnet/vxlan-gpe/vxlan_gpe.c
@@ -144,14 +144,12 @@ vxlan_gpe_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
return 0;
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (vxlan_gpe_device_class,static) = {
.name = "VXLAN_GPE",
.format_device_name = format_vxlan_gpe_name,
.format_tx_trace = format_vxlan_gpe_encap_trace,
.admin_up_down_function = vxlan_gpe_interface_admin_up_down,
};
-/* *INDENT-ON* */
/**
@@ -171,13 +169,11 @@ format_vxlan_gpe_header_with_length (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (vxlan_gpe_hw_class) = {
.name = "VXLAN_GPE",
.format_header = format_vxlan_gpe_header_with_length,
.build_rewrite = default_build_rewrite,
};
-/* *INDENT-ON* */
static void
vxlan_gpe_tunnel_restack_dpo (vxlan_gpe_tunnel_t * t)
@@ -388,7 +384,6 @@ vxlan6_gpe_rewrite (vxlan_gpe_tunnel_t * t, u32 extension_size,
return (0);
}
-/* *INDENT-OFF* */
typedef CLIB_PACKED(union {
struct {
fib_node_index_t mfib_entry_index;
@@ -396,7 +391,6 @@ typedef CLIB_PACKED(union {
};
u64 as_u64;
}) mcast_shared_t;
-/* *INDENT-ON* */
static inline mcast_shared_t
mcast_shared_get (ip46_address_t * ip)
@@ -496,7 +490,6 @@ int vnet_vxlan_gpe_add_del_tunnel
clib_memset (t, 0, sizeof (*t));
/* copy from arg structure */
-/* *INDENT-OFF* */
#define _(x) t->x = a->x;
foreach_gpe_copy_field;
if (!a->is_ip6)
@@ -504,7 +497,6 @@ int vnet_vxlan_gpe_add_del_tunnel
else
foreach_copy_ipv6
#undef _
-/* *INDENT-ON* */
if (!a->is_ip6)
t->flags |= VXLAN_GPE_TUNNEL_IS_IPV4;
@@ -594,7 +586,8 @@ int vnet_vxlan_gpe_add_del_tunnel
fib_prefix_t tun_remote_pfx;
vnet_flood_class_t flood_class = VNET_FLOOD_CLASS_TUNNEL_NORMAL;
- fib_prefix_from_ip46_addr (&t->remote, &tun_remote_pfx);
+ fib_protocol_t fp = fib_ip_proto (is_ip6);
+ fib_prefix_from_ip46_addr (fp, &t->remote, &tun_remote_pfx);
if (!ip46_address_is_multicast (&t->remote))
{
/* Unicast tunnel -
@@ -618,8 +611,6 @@ int vnet_vxlan_gpe_add_del_tunnel
* with different VNIs, create the output fib adjacency only if
* it does not already exist
*/
- fib_protocol_t fp = fib_ip_proto (is_ip6);
-
if (vtep_addr_ref (&ngm->vtep_table,
t->encap_fib_index, &t->remote) == 1)
{
@@ -919,7 +910,6 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm,
a->is_add = is_add;
a->is_ip6 = ipv6_set;
-/* *INDENT-OFF* */
#define _(x) a->x = x;
foreach_gpe_copy_field;
if (ipv4_set)
@@ -927,7 +917,6 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm,
else
foreach_copy_ipv6
#undef _
-/* *INDENT-ON* */
rv = vnet_vxlan_gpe_add_del_tunnel (a, &sw_if_index);
@@ -980,7 +969,6 @@ done:
* Example of how to delete a VXLAN-GPE Tunnel:
* @cliexcmd{create vxlan-gpe tunnel local 10.0.3.1 remote 10.0.3.3 vni 13 del}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_vxlan_gpe_tunnel_command, static) = {
.path = "create vxlan-gpe tunnel",
.short_help =
@@ -990,7 +978,6 @@ VLIB_CLI_COMMAND (create_vxlan_gpe_tunnel_command, static) = {
" [encap-vrf-id <nn>] [decap-vrf-id <nn>] [del]\n",
.function = vxlan_gpe_add_del_tunnel_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief CLI function for showing VXLAN GPE tunnels
@@ -1013,12 +1000,10 @@ show_vxlan_gpe_tunnel_command_fn (vlib_main_t * vm,
if (pool_elts (ngm->tunnels) == 0)
vlib_cli_output (vm, "No vxlan-gpe tunnels configured.");
- /* *INDENT-OFF* */
pool_foreach (t, ngm->tunnels)
{
vlib_cli_output (vm, "%U", format_vxlan_gpe_tunnel, t);
}
- /* *INDENT-ON* */
return 0;
}
@@ -1032,12 +1017,10 @@ show_vxlan_gpe_tunnel_command_fn (vlib_main_t * vm,
* [0] local 10.0.3.1 remote 10.0.3.3 vni 13 encap_fib_index 0 sw_if_index 5 decap_next l2
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_vxlan_gpe_tunnel_command, static) = {
.path = "show vxlan-gpe",
.function = show_vxlan_gpe_tunnel_command_fn,
};
-/* *INDENT-ON* */
void
vnet_int_vxlan_gpe_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable)
@@ -1145,13 +1128,11 @@ set_ip4_vxlan_gpe_bypass (vlib_main_t * vm,
* @cliexcmd{set interface ip vxlan-gpe-bypass GigabitEthernet2/0/0 del}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_vxlan_gpe_bypass_command, static) = {
.path = "set interface ip vxlan-gpe-bypass",
.function = set_ip4_vxlan_gpe_bypass,
.short_help = "set interface ip vxlan-gpe-bypass <interface> [del]",
};
-/* *INDENT-ON* */
static clib_error_t *
set_ip6_vxlan_gpe_bypass (vlib_main_t * vm,
@@ -1203,15 +1184,12 @@ set_ip6_vxlan_gpe_bypass (vlib_main_t * vm,
* @cliexcmd{set interface ip6 vxlan-gpe-bypass GigabitEthernet2/0/0 del}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip6_vxlan_gpe_bypass_command, static) = {
.path = "set interface ip6 vxlan-gpe-bypass",
.function = set_ip6_vxlan_gpe_bypass,
.short_help = "set interface ip6 vxlan-gpe-bypass <interface> [del]",
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip4_vxlan_gpe_bypass, static) =
{
.arc_name = "ip4-unicast",
@@ -1225,7 +1203,6 @@ VNET_FEATURE_INIT (ip6_vxlan_gpe_bypass, static) =
.node_name = "ip6-vxlan-gpe-bypass",
.runs_before = VNET_FEATURES ("ip6-lookup"),
};
-/* *INDENT-ON* */
/**
* @brief Feature init function for VXLAN GPE
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.h b/src/vnet/vxlan-gpe/vxlan_gpe.h
index 2cbbb6c5f36..aabaafeee6f 100644
--- a/src/vnet/vxlan-gpe/vxlan_gpe.h
+++ b/src/vnet/vxlan-gpe/vxlan_gpe.h
@@ -40,7 +40,6 @@
* @brief VXLAN GPE header struct
*
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
/** 20 bytes */
ip4_header_t ip4;
@@ -49,9 +48,7 @@ typedef CLIB_PACKED (struct {
/** 8 bytes */
vxlan_gpe_header_t vxlan;
}) ip4_vxlan_gpe_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
/** 40 bytes */
ip6_header_t ip6;
@@ -60,7 +57,6 @@ typedef CLIB_PACKED (struct {
/** 8 bytes */
vxlan_gpe_header_t vxlan;
}) ip6_vxlan_gpe_header_t;
-/* *INDENT-ON* */
/**
* @brief Key struct for IPv4 VXLAN GPE tunnel.
@@ -68,7 +64,6 @@ typedef CLIB_PACKED (struct {
* all fields in NET byte order
* VNI shifted 8 bits
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct {
union {
struct {
@@ -81,7 +76,6 @@ typedef CLIB_PACKED(struct {
u64 as_u64[2];
};
}) vxlan4_gpe_tunnel_key_t;
-/* *INDENT-ON* */
/**
* @brief Key struct for IPv6 VXLAN GPE tunnel.
@@ -89,14 +83,12 @@ typedef CLIB_PACKED(struct {
* all fields in NET byte order
* VNI shifted 8 bits
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct {
ip6_address_t local;
ip6_address_t remote;
u32 vni;
u32 port;
}) vxlan6_gpe_tunnel_key_t;
-/* *INDENT-ON* */
typedef union
{
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe_api.c b/src/vnet/vxlan-gpe/vxlan_gpe_api.c
index e9cf17f270b..cc74e1f58d4 100644
--- a/src/vnet/vxlan-gpe/vxlan_gpe_api.c
+++ b/src/vnet/vxlan-gpe/vxlan_gpe_api.c
@@ -114,12 +114,10 @@ static void
rv = vnet_vxlan_gpe_add_del_tunnel (a, &sw_if_index);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_VXLAN_GPE_ADD_DEL_TUNNEL_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -242,12 +240,10 @@ static void vl_api_vxlan_gpe_tunnel_dump_t_handler
if (~0 == sw_if_index)
{
- /* *INDENT-OFF* */
pool_foreach (t, vgm->tunnels)
{
send_vxlan_gpe_tunnel_details (t, reg, mp->context);
}
- /* *INDENT-ON* */
}
else
{
diff --git a/src/vnet/vxlan/FEATURE.yaml b/src/vnet/vxlan/FEATURE.yaml
deleted file mode 100644
index dc7d21b010e..00000000000
--- a/src/vnet/vxlan/FEATURE.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
----
-name: Virtual eXtensible LAN
-maintainer: John Lo <loj@cisco.com>
-features:
- - VXLAN tunnel for support of L2 overlay/virtual networks (RFC-7348)
- - Support either IPv4 or IPv6 underlay network VTEPs
- - Flooding via headend replication if all VXLAN tunnels in BD are unicast ones
- - Multicast VXLAN tunnel can be added to BD to flood via IP multicast
- - VXLAN encap with flow-hashed source port for better underlay IP load balance
- - VXLAN decap optimization via vxlan-bypass IP feature on underlay interfaces
- - VXLAN decap HW offload using flow director with DPDK on Intel Fortville NICs
-description: "Virtual eXtensible LAN (VXLAN) tunnels support L2 overlay networks that span L3 networks"
-state: production
-properties: [API, CLI, MULTITHREAD]
diff --git a/src/vnet/vxlan/decap.c b/src/vnet/vxlan/decap.c
deleted file mode 100644
index 729293fb3e5..00000000000
--- a/src/vnet/vxlan/decap.c
+++ /dev/null
@@ -1,1330 +0,0 @@
-/*
- * decap.c: vxlan tunnel decap packet processing
- *
- * Copyright (c) 2013 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vlib/vlib.h>
-#include <vnet/vxlan/vxlan.h>
-#include <vnet/udp/udp_local.h>
-
-#ifndef CLIB_MARCH_VARIANT
-vlib_node_registration_t vxlan4_input_node;
-vlib_node_registration_t vxlan6_input_node;
-#endif
-
-typedef struct
-{
- u32 next_index;
- u32 tunnel_index;
- u32 error;
- u32 vni;
-} vxlan_rx_trace_t;
-
-static u8 *
-format_vxlan_rx_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- vxlan_rx_trace_t *t = va_arg (*args, vxlan_rx_trace_t *);
-
- if (t->tunnel_index == ~0)
- return format (s, "VXLAN decap error - tunnel for vni %d does not exist",
- t->vni);
- return format (s, "VXLAN decap from vxlan_tunnel%d vni %d next %d error %d",
- t->tunnel_index, t->vni, t->next_index, t->error);
-}
-
-typedef vxlan4_tunnel_key_t last_tunnel_cache4;
-
-static const vxlan_decap_info_t decap_not_found = {
- .sw_if_index = ~0,
- .next_index = VXLAN_INPUT_NEXT_DROP,
- .error = VXLAN_ERROR_NO_SUCH_TUNNEL
-};
-
-static const vxlan_decap_info_t decap_bad_flags = {
- .sw_if_index = ~0,
- .next_index = VXLAN_INPUT_NEXT_DROP,
- .error = VXLAN_ERROR_BAD_FLAGS
-};
-
-always_inline vxlan_decap_info_t
-vxlan4_find_tunnel (vxlan_main_t * vxm, last_tunnel_cache4 * cache,
- u32 fib_index, ip4_header_t * ip4_0,
- vxlan_header_t * vxlan0, u32 * stats_sw_if_index)
-{
- if (PREDICT_FALSE (vxlan0->flags != VXLAN_FLAGS_I))
- return decap_bad_flags;
-
- /* Make sure VXLAN tunnel exist according to packet S/D IP, UDP port, VRF,
- * and VNI */
- u32 dst = ip4_0->dst_address.as_u32;
- u32 src = ip4_0->src_address.as_u32;
- udp_header_t *udp = ip4_next_header (ip4_0);
- vxlan4_tunnel_key_t key4 = {
- .key[0] = ((u64) dst << 32) | src,
- .key[1] = ((u64) udp->dst_port << 48) | ((u64) fib_index << 32) |
- vxlan0->vni_reserved,
- };
-
- if (PREDICT_TRUE
- (key4.key[0] == cache->key[0] && key4.key[1] == cache->key[1]))
- {
- /* cache hit */
- vxlan_decap_info_t di = {.as_u64 = cache->value };
- *stats_sw_if_index = di.sw_if_index;
- return di;
- }
-
- int rv = clib_bihash_search_inline_16_8 (&vxm->vxlan4_tunnel_by_key, &key4);
- if (PREDICT_TRUE (rv == 0))
- {
- *cache = key4;
- vxlan_decap_info_t di = {.as_u64 = key4.value };
- *stats_sw_if_index = di.sw_if_index;
- return di;
- }
-
- /* try multicast */
- if (PREDICT_TRUE (!ip4_address_is_multicast (&ip4_0->dst_address)))
- return decap_not_found;
-
- /* search for mcast decap info by mcast address */
- key4.key[0] = dst;
- rv = clib_bihash_search_inline_16_8 (&vxm->vxlan4_tunnel_by_key, &key4);
- if (rv != 0)
- return decap_not_found;
-
- /* search for unicast tunnel using the mcast tunnel local(src) ip */
- vxlan_decap_info_t mdi = {.as_u64 = key4.value };
- key4.key[0] = ((u64) mdi.local_ip.as_u32 << 32) | src;
- rv = clib_bihash_search_inline_16_8 (&vxm->vxlan4_tunnel_by_key, &key4);
- if (PREDICT_FALSE (rv != 0))
- return decap_not_found;
-
- /* mcast traffic does not update the cache */
- *stats_sw_if_index = mdi.sw_if_index;
- vxlan_decap_info_t di = {.as_u64 = key4.value };
- return di;
-}
-
-typedef vxlan6_tunnel_key_t last_tunnel_cache6;
-
-always_inline vxlan_decap_info_t
-vxlan6_find_tunnel (vxlan_main_t * vxm, last_tunnel_cache6 * cache,
- u32 fib_index, ip6_header_t * ip6_0,
- vxlan_header_t * vxlan0, u32 * stats_sw_if_index)
-{
- if (PREDICT_FALSE (vxlan0->flags != VXLAN_FLAGS_I))
- return decap_bad_flags;
-
- /* Make sure VXLAN tunnel exist according to packet SIP, UDP port, VRF, and
- * VNI */
- udp_header_t *udp = ip6_next_header (ip6_0);
- vxlan6_tunnel_key_t key6 = {
- .key[0] = ip6_0->src_address.as_u64[0],
- .key[1] = ip6_0->src_address.as_u64[1],
- .key[2] = ((u64) udp->dst_port << 48) | ((u64) fib_index << 32) |
- vxlan0->vni_reserved,
- };
-
- if (PREDICT_FALSE
- (clib_bihash_key_compare_24_8 (key6.key, cache->key) == 0))
- {
- int rv =
- clib_bihash_search_inline_24_8 (&vxm->vxlan6_tunnel_by_key, &key6);
- if (PREDICT_FALSE (rv != 0))
- return decap_not_found;
-
- *cache = key6;
- }
- vxlan_tunnel_t *t0 = pool_elt_at_index (vxm->tunnels, cache->value);
-
- /* Validate VXLAN tunnel SIP against packet DIP */
- if (PREDICT_TRUE (ip6_address_is_equal (&ip6_0->dst_address, &t0->src.ip6)))
- *stats_sw_if_index = t0->sw_if_index;
- else
- {
- /* try multicast */
- if (PREDICT_TRUE (!ip6_address_is_multicast (&ip6_0->dst_address)))
- return decap_not_found;
-
- /* Make sure mcast VXLAN tunnel exist by packet DIP and VNI */
- key6.key[0] = ip6_0->dst_address.as_u64[0];
- key6.key[1] = ip6_0->dst_address.as_u64[1];
- int rv =
- clib_bihash_search_inline_24_8 (&vxm->vxlan6_tunnel_by_key, &key6);
- if (PREDICT_FALSE (rv != 0))
- return decap_not_found;
-
- vxlan_tunnel_t *mcast_t0 = pool_elt_at_index (vxm->tunnels, key6.value);
- *stats_sw_if_index = mcast_t0->sw_if_index;
- }
-
- vxlan_decap_info_t di = {
- .sw_if_index = t0->sw_if_index,
- .next_index = t0->decap_next_index,
- };
- return di;
-}
-
-always_inline uword
-vxlan_input (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame, u32 is_ip4)
-{
- vxlan_main_t *vxm = &vxlan_main;
- vnet_main_t *vnm = vxm->vnet_main;
- vnet_interface_main_t *im = &vnm->interface_main;
- vlib_combined_counter_main_t *rx_counter =
- im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX;
- last_tunnel_cache4 last4;
- last_tunnel_cache6 last6;
- u32 pkts_dropped = 0;
- u32 thread_index = vlib_get_thread_index ();
-
- if (is_ip4)
- clib_memset (&last4, 0xff, sizeof last4);
- else
- clib_memset (&last6, 0xff, sizeof last6);
-
- u32 *from = vlib_frame_vector_args (from_frame);
- u32 n_left_from = from_frame->n_vectors;
-
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
- vlib_get_buffers (vm, from, bufs, n_left_from);
-
- u32 stats_if0 = ~0, stats_if1 = ~0;
- u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
- while (n_left_from >= 4)
- {
- /* Prefetch next iteration. */
- vlib_prefetch_buffer_header (b[2], LOAD);
- vlib_prefetch_buffer_header (b[3], LOAD);
-
- /* udp leaves current_data pointing at the vxlan header */
- void *cur0 = vlib_buffer_get_current (b[0]);
- void *cur1 = vlib_buffer_get_current (b[1]);
- vxlan_header_t *vxlan0 = cur0;
- vxlan_header_t *vxlan1 = cur1;
-
-
- ip4_header_t *ip4_0, *ip4_1;
- ip6_header_t *ip6_0, *ip6_1;
- if (is_ip4)
- {
- ip4_0 = cur0 - sizeof (udp_header_t) - sizeof (ip4_header_t);
- ip4_1 = cur1 - sizeof (udp_header_t) - sizeof (ip4_header_t);
- }
- else
- {
- ip6_0 = cur0 - sizeof (udp_header_t) - sizeof (ip6_header_t);
- ip6_1 = cur1 - sizeof (udp_header_t) - sizeof (ip6_header_t);
- }
-
- /* pop vxlan */
- vlib_buffer_advance (b[0], sizeof *vxlan0);
- vlib_buffer_advance (b[1], sizeof *vxlan1);
-
- u32 fi0 = vlib_buffer_get_ip_fib_index (b[0], is_ip4);
- u32 fi1 = vlib_buffer_get_ip_fib_index (b[1], is_ip4);
-
- vxlan_decap_info_t di0 = is_ip4 ?
- vxlan4_find_tunnel (vxm, &last4, fi0, ip4_0, vxlan0, &stats_if0) :
- vxlan6_find_tunnel (vxm, &last6, fi0, ip6_0, vxlan0, &stats_if0);
- vxlan_decap_info_t di1 = is_ip4 ?
- vxlan4_find_tunnel (vxm, &last4, fi1, ip4_1, vxlan1, &stats_if1) :
- vxlan6_find_tunnel (vxm, &last6, fi1, ip6_1, vxlan1, &stats_if1);
-
- /* Prefetch next iteration. */
- clib_prefetch_load (b[2]->data);
- clib_prefetch_load (b[3]->data);
-
- u32 len0 = vlib_buffer_length_in_chain (vm, b[0]);
- u32 len1 = vlib_buffer_length_in_chain (vm, b[1]);
-
- next[0] = di0.next_index;
- next[1] = di1.next_index;
-
- u8 any_error = di0.error | di1.error;
- if (PREDICT_TRUE (any_error == 0))
- {
- /* Required to make the l2 tag push / pop code work on l2 subifs */
- vnet_update_l2_len (b[0]);
- vnet_update_l2_len (b[1]);
- /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
- vnet_buffer (b[0])->sw_if_index[VLIB_RX] = di0.sw_if_index;
- vnet_buffer (b[1])->sw_if_index[VLIB_RX] = di1.sw_if_index;
- vlib_increment_combined_counter (rx_counter, thread_index,
- stats_if0, 1, len0);
- vlib_increment_combined_counter (rx_counter, thread_index,
- stats_if1, 1, len1);
- }
- else
- {
- if (di0.error == 0)
- {
- vnet_update_l2_len (b[0]);
- vnet_buffer (b[0])->sw_if_index[VLIB_RX] = di0.sw_if_index;
- vlib_increment_combined_counter (rx_counter, thread_index,
- stats_if0, 1, len0);
- }
- else
- {
- b[0]->error = node->errors[di0.error];
- pkts_dropped++;
- }
-
- if (di1.error == 0)
- {
- vnet_update_l2_len (b[1]);
- vnet_buffer (b[1])->sw_if_index[VLIB_RX] = di1.sw_if_index;
- vlib_increment_combined_counter (rx_counter, thread_index,
- stats_if1, 1, len1);
- }
- else
- {
- b[1]->error = node->errors[di1.error];
- pkts_dropped++;
- }
- }
-
- if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_rx_trace_t *tr =
- vlib_add_trace (vm, node, b[0], sizeof (*tr));
- tr->next_index = next[0];
- tr->error = di0.error;
- tr->tunnel_index = di0.sw_if_index == ~0 ?
- ~0 : vxm->tunnel_index_by_sw_if_index[di0.sw_if_index];
- tr->vni = vnet_get_vni (vxlan0);
- }
- if (PREDICT_FALSE (b[1]->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_rx_trace_t *tr =
- vlib_add_trace (vm, node, b[1], sizeof (*tr));
- tr->next_index = next[1];
- tr->error = di1.error;
- tr->tunnel_index = di1.sw_if_index == ~0 ?
- ~0 : vxm->tunnel_index_by_sw_if_index[di1.sw_if_index];
- tr->vni = vnet_get_vni (vxlan1);
- }
- b += 2;
- next += 2;
- n_left_from -= 2;
- }
-
- while (n_left_from > 0)
- {
- /* udp leaves current_data pointing at the vxlan header */
- void *cur0 = vlib_buffer_get_current (b[0]);
- vxlan_header_t *vxlan0 = cur0;
- ip4_header_t *ip4_0;
- ip6_header_t *ip6_0;
- if (is_ip4)
- ip4_0 = cur0 - sizeof (udp_header_t) - sizeof (ip4_header_t);
- else
- ip6_0 = cur0 - sizeof (udp_header_t) - sizeof (ip6_header_t);
-
- /* pop (ip, udp, vxlan) */
- vlib_buffer_advance (b[0], sizeof (*vxlan0));
-
- u32 fi0 = vlib_buffer_get_ip_fib_index (b[0], is_ip4);
-
- vxlan_decap_info_t di0 = is_ip4 ?
- vxlan4_find_tunnel (vxm, &last4, fi0, ip4_0, vxlan0, &stats_if0) :
- vxlan6_find_tunnel (vxm, &last6, fi0, ip6_0, vxlan0, &stats_if0);
-
- uword len0 = vlib_buffer_length_in_chain (vm, b[0]);
-
- next[0] = di0.next_index;
-
- /* Validate VXLAN tunnel encap-fib index against packet */
- if (di0.error == 0)
- {
- /* Required to make the l2 tag push / pop code work on l2 subifs */
- vnet_update_l2_len (b[0]);
-
- /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
- vnet_buffer (b[0])->sw_if_index[VLIB_RX] = di0.sw_if_index;
-
- vlib_increment_combined_counter (rx_counter, thread_index,
- stats_if0, 1, len0);
- }
- else
- {
- b[0]->error = node->errors[di0.error];
- pkts_dropped++;
- }
-
- if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_rx_trace_t *tr
- = vlib_add_trace (vm, node, b[0], sizeof (*tr));
- tr->next_index = next[0];
- tr->error = di0.error;
- tr->tunnel_index = di0.sw_if_index == ~0 ?
- ~0 : vxm->tunnel_index_by_sw_if_index[di0.sw_if_index];
- tr->vni = vnet_get_vni (vxlan0);
- }
- b += 1;
- next += 1;
- n_left_from -= 1;
- }
- vlib_buffer_enqueue_to_next (vm, node, from, nexts, from_frame->n_vectors);
- /* Do we still need this now that tunnel tx stats is kept? */
- u32 node_idx = is_ip4 ? vxlan4_input_node.index : vxlan6_input_node.index;
- vlib_node_increment_counter (vm, node_idx, VXLAN_ERROR_DECAPSULATED,
- from_frame->n_vectors - pkts_dropped);
-
- return from_frame->n_vectors;
-}
-
-VLIB_NODE_FN (vxlan4_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return vxlan_input (vm, node, from_frame, /* is_ip4 */ 1);
-}
-
-VLIB_NODE_FN (vxlan6_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return vxlan_input (vm, node, from_frame, /* is_ip4 */ 0);
-}
-
-static char *vxlan_error_strings[] = {
-#define vxlan_error(n,s) s,
-#include <vnet/vxlan/vxlan_error.def>
-#undef vxlan_error
-};
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (vxlan4_input_node) =
-{
- .name = "vxlan4-input",
- .vector_size = sizeof (u32),
- .n_errors = VXLAN_N_ERROR,
- .error_strings = vxlan_error_strings,
- .n_next_nodes = VXLAN_INPUT_N_NEXT,
- .format_trace = format_vxlan_rx_trace,
- .next_nodes = {
-#define _(s,n) [VXLAN_INPUT_NEXT_##s] = n,
- foreach_vxlan_input_next
-#undef _
- },
-};
-
-VLIB_REGISTER_NODE (vxlan6_input_node) =
-{
- .name = "vxlan6-input",
- .vector_size = sizeof (u32),
- .n_errors = VXLAN_N_ERROR,
- .error_strings = vxlan_error_strings,
- .n_next_nodes = VXLAN_INPUT_N_NEXT,
- .next_nodes = {
-#define _(s,n) [VXLAN_INPUT_NEXT_##s] = n,
- foreach_vxlan_input_next
-#undef _
- },
- .format_trace = format_vxlan_rx_trace,
-};
-/* *INDENT-ON* */
-
-typedef enum
-{
- IP_VXLAN_BYPASS_NEXT_DROP,
- IP_VXLAN_BYPASS_NEXT_VXLAN,
- IP_VXLAN_BYPASS_N_NEXT,
-} ip_vxlan_bypass_next_t;
-
-always_inline uword
-ip_vxlan_bypass_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, u32 is_ip4)
-{
- vxlan_main_t *vxm = &vxlan_main;
- u32 *from, *to_next, n_left_from, n_left_to_next, next_index;
- vlib_node_runtime_t *error_node =
- vlib_node_get_runtime (vm, ip4_input_node.index);
- vtep4_key_t last_vtep4; /* last IPv4 address / fib index
- matching a local VTEP address */
- vtep6_key_t last_vtep6; /* last IPv6 address / fib index
- matching a local VTEP address */
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
-
- last_tunnel_cache4 last4;
- last_tunnel_cache6 last6;
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
-
- vlib_get_buffers (vm, from, bufs, n_left_from);
-
- if (node->flags & VLIB_NODE_FLAG_TRACE)
- ip4_forward_next_trace (vm, node, frame, VLIB_TX);
-
- if (is_ip4)
- {
- vtep4_key_init (&last_vtep4);
- clib_memset (&last4, 0xff, sizeof last4);
- }
- else
- {
- vtep6_key_init (&last_vtep6);
- clib_memset (&last6, 0xff, sizeof last6);
- }
-
- while (n_left_from > 0)
- {
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- vlib_buffer_t *b0, *b1;
- ip4_header_t *ip40, *ip41;
- ip6_header_t *ip60, *ip61;
- udp_header_t *udp0, *udp1;
- vxlan_header_t *vxlan0, *vxlan1;
- u32 bi0, ip_len0, udp_len0, flags0, next0;
- u32 bi1, ip_len1, udp_len1, flags1, next1;
- i32 len_diff0, len_diff1;
- u8 error0, good_udp0, proto0;
- u8 error1, good_udp1, proto1;
- u32 stats_if0 = ~0, stats_if1 = ~0;
-
- /* Prefetch next iteration. */
- {
- vlib_prefetch_buffer_header (b[2], LOAD);
- vlib_prefetch_buffer_header (b[3], LOAD);
-
- CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- }
-
- bi0 = to_next[0] = from[0];
- bi1 = to_next[1] = from[1];
- from += 2;
- n_left_from -= 2;
- to_next += 2;
- n_left_to_next -= 2;
-
- b0 = b[0];
- b1 = b[1];
- b += 2;
- if (is_ip4)
- {
- ip40 = vlib_buffer_get_current (b0);
- ip41 = vlib_buffer_get_current (b1);
- }
- else
- {
- ip60 = vlib_buffer_get_current (b0);
- ip61 = vlib_buffer_get_current (b1);
- }
-
- /* Setup packet for next IP feature */
- vnet_feature_next (&next0, b0);
- vnet_feature_next (&next1, b1);
-
- if (is_ip4)
- {
- /* Treat IP frag packets as "experimental" protocol for now
- until support of IP frag reassembly is implemented */
- proto0 = ip4_is_fragment (ip40) ? 0xfe : ip40->protocol;
- proto1 = ip4_is_fragment (ip41) ? 0xfe : ip41->protocol;
- }
- else
- {
- proto0 = ip60->protocol;
- proto1 = ip61->protocol;
- }
-
- /* Process packet 0 */
- if (proto0 != IP_PROTOCOL_UDP)
- goto exit0; /* not UDP packet */
-
- if (is_ip4)
- udp0 = ip4_next_header (ip40);
- else
- udp0 = ip6_next_header (ip60);
-
- u32 fi0 = vlib_buffer_get_ip_fib_index (b0, is_ip4);
- vxlan0 = vlib_buffer_get_current (b0) + sizeof (udp_header_t) +
- sizeof (ip4_header_t);
-
- vxlan_decap_info_t di0 =
- is_ip4 ?
- vxlan4_find_tunnel (vxm, &last4, fi0, ip40, vxlan0, &stats_if0) :
- vxlan6_find_tunnel (vxm, &last6, fi0, ip60, vxlan0, &stats_if0);
-
- if (PREDICT_FALSE (di0.sw_if_index == ~0))
- goto exit0; /* unknown interface */
-
- /* Validate DIP against VTEPs */
- if (is_ip4)
- {
-#ifdef CLIB_HAVE_VEC512
- if (!vtep4_check_vector (&vxm->vtep_table, b0, ip40, &last_vtep4,
- &vxm->vtep4_u512))
-#else
- if (!vtep4_check (&vxm->vtep_table, b0, ip40, &last_vtep4))
-#endif
- goto exit0; /* no local VTEP for VXLAN packet */
- }
- else
- {
- if (!vtep6_check (&vxm->vtep_table, b0, ip60, &last_vtep6))
- goto exit0; /* no local VTEP for VXLAN packet */
- }
-
- flags0 = b0->flags;
- good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
-
- /* Don't verify UDP checksum for packets with explicit zero checksum. */
- good_udp0 |= udp0->checksum == 0;
-
- /* Verify UDP length */
- if (is_ip4)
- ip_len0 = clib_net_to_host_u16 (ip40->length);
- else
- ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
- udp_len0 = clib_net_to_host_u16 (udp0->length);
- len_diff0 = ip_len0 - udp_len0;
-
- /* Verify UDP checksum */
- if (PREDICT_FALSE (!good_udp0))
- {
- if (is_ip4)
- flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
- else
- flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
- good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
- }
-
- if (is_ip4)
- {
- error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
- error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
- }
- else
- {
- error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
- error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
- }
-
- next0 = error0 ?
- IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
- b0->error = error0 ? error_node->errors[error0] : 0;
-
- /* vxlan-input node expect current at VXLAN header */
- if (is_ip4)
- vlib_buffer_advance (b0,
- sizeof (ip4_header_t) +
- sizeof (udp_header_t));
- else
- vlib_buffer_advance (b0,
- sizeof (ip6_header_t) +
- sizeof (udp_header_t));
-
- exit0:
- /* Process packet 1 */
- if (proto1 != IP_PROTOCOL_UDP)
- goto exit1; /* not UDP packet */
-
- if (is_ip4)
- udp1 = ip4_next_header (ip41);
- else
- udp1 = ip6_next_header (ip61);
-
- u32 fi1 = vlib_buffer_get_ip_fib_index (b1, is_ip4);
- vxlan1 = vlib_buffer_get_current (b1) + sizeof (udp_header_t) +
- sizeof (ip4_header_t);
-
- vxlan_decap_info_t di1 =
- is_ip4 ?
- vxlan4_find_tunnel (vxm, &last4, fi1, ip41, vxlan1, &stats_if1) :
- vxlan6_find_tunnel (vxm, &last6, fi1, ip61, vxlan1, &stats_if1);
-
- if (PREDICT_FALSE (di1.sw_if_index == ~0))
- goto exit1; /* unknown interface */
-
- /* Validate DIP against VTEPs */
- if (is_ip4)
- {
-#ifdef CLIB_HAVE_VEC512
- if (!vtep4_check_vector (&vxm->vtep_table, b1, ip41, &last_vtep4,
- &vxm->vtep4_u512))
-#else
- if (!vtep4_check (&vxm->vtep_table, b1, ip41, &last_vtep4))
-#endif
- goto exit1; /* no local VTEP for VXLAN packet */
- }
- else
- {
- if (!vtep6_check (&vxm->vtep_table, b1, ip61, &last_vtep6))
- goto exit1; /* no local VTEP for VXLAN packet */
- }
-
- flags1 = b1->flags;
- good_udp1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
-
- /* Don't verify UDP checksum for packets with explicit zero checksum. */
- good_udp1 |= udp1->checksum == 0;
-
- /* Verify UDP length */
- if (is_ip4)
- ip_len1 = clib_net_to_host_u16 (ip41->length);
- else
- ip_len1 = clib_net_to_host_u16 (ip61->payload_length);
- udp_len1 = clib_net_to_host_u16 (udp1->length);
- len_diff1 = ip_len1 - udp_len1;
-
- /* Verify UDP checksum */
- if (PREDICT_FALSE (!good_udp1))
- {
- if (is_ip4)
- flags1 = ip4_tcp_udp_validate_checksum (vm, b1);
- else
- flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, b1);
- good_udp1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
- }
-
- if (is_ip4)
- {
- error1 = good_udp1 ? 0 : IP4_ERROR_UDP_CHECKSUM;
- error1 = (len_diff1 >= 0) ? error1 : IP4_ERROR_UDP_LENGTH;
- }
- else
- {
- error1 = good_udp1 ? 0 : IP6_ERROR_UDP_CHECKSUM;
- error1 = (len_diff1 >= 0) ? error1 : IP6_ERROR_UDP_LENGTH;
- }
-
- next1 = error1 ?
- IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
- b1->error = error1 ? error_node->errors[error1] : 0;
-
- /* vxlan-input node expect current at VXLAN header */
- if (is_ip4)
- vlib_buffer_advance (b1,
- sizeof (ip4_header_t) +
- sizeof (udp_header_t));
- else
- vlib_buffer_advance (b1,
- sizeof (ip6_header_t) +
- sizeof (udp_header_t));
-
- exit1:
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- vlib_buffer_t *b0;
- ip4_header_t *ip40;
- ip6_header_t *ip60;
- udp_header_t *udp0;
- vxlan_header_t *vxlan0;
- u32 bi0, ip_len0, udp_len0, flags0, next0;
- i32 len_diff0;
- u8 error0, good_udp0, proto0;
- u32 stats_if0 = ~0;
-
- bi0 = to_next[0] = from[0];
- from += 1;
- n_left_from -= 1;
- to_next += 1;
- n_left_to_next -= 1;
-
- b0 = b[0];
- b++;
- if (is_ip4)
- ip40 = vlib_buffer_get_current (b0);
- else
- ip60 = vlib_buffer_get_current (b0);
-
- /* Setup packet for next IP feature */
- vnet_feature_next (&next0, b0);
-
- if (is_ip4)
- /* Treat IP4 frag packets as "experimental" protocol for now
- until support of IP frag reassembly is implemented */
- proto0 = ip4_is_fragment (ip40) ? 0xfe : ip40->protocol;
- else
- proto0 = ip60->protocol;
-
- if (proto0 != IP_PROTOCOL_UDP)
- goto exit; /* not UDP packet */
-
- if (is_ip4)
- udp0 = ip4_next_header (ip40);
- else
- udp0 = ip6_next_header (ip60);
-
- u32 fi0 = vlib_buffer_get_ip_fib_index (b0, is_ip4);
- vxlan0 = vlib_buffer_get_current (b0) + sizeof (udp_header_t) +
- sizeof (ip4_header_t);
-
- vxlan_decap_info_t di0 =
- is_ip4 ?
- vxlan4_find_tunnel (vxm, &last4, fi0, ip40, vxlan0, &stats_if0) :
- vxlan6_find_tunnel (vxm, &last6, fi0, ip60, vxlan0, &stats_if0);
-
- if (PREDICT_FALSE (di0.sw_if_index == ~0))
- goto exit; /* unknown interface */
-
- /* Validate DIP against VTEPs */
- if (is_ip4)
- {
-#ifdef CLIB_HAVE_VEC512
- if (!vtep4_check_vector (&vxm->vtep_table, b0, ip40, &last_vtep4,
- &vxm->vtep4_u512))
-#else
- if (!vtep4_check (&vxm->vtep_table, b0, ip40, &last_vtep4))
-#endif
- goto exit; /* no local VTEP for VXLAN packet */
- }
- else
- {
- if (!vtep6_check (&vxm->vtep_table, b0, ip60, &last_vtep6))
- goto exit; /* no local VTEP for VXLAN packet */
- }
-
- flags0 = b0->flags;
- good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
-
- /* Don't verify UDP checksum for packets with explicit zero checksum. */
- good_udp0 |= udp0->checksum == 0;
-
- /* Verify UDP length */
- if (is_ip4)
- ip_len0 = clib_net_to_host_u16 (ip40->length);
- else
- ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
- udp_len0 = clib_net_to_host_u16 (udp0->length);
- len_diff0 = ip_len0 - udp_len0;
-
- /* Verify UDP checksum */
- if (PREDICT_FALSE (!good_udp0))
- {
- if (is_ip4)
- flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
- else
- flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
- good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
- }
-
- if (is_ip4)
- {
- error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
- error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
- }
- else
- {
- error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
- error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
- }
-
- next0 = error0 ?
- IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
- b0->error = error0 ? error_node->errors[error0] : 0;
-
- /* vxlan-input node expect current at VXLAN header */
- if (is_ip4)
- vlib_buffer_advance (b0,
- sizeof (ip4_header_t) +
- sizeof (udp_header_t));
- else
- vlib_buffer_advance (b0,
- sizeof (ip6_header_t) +
- sizeof (udp_header_t));
-
- exit:
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return frame->n_vectors;
-}
-
-VLIB_NODE_FN (ip4_vxlan_bypass_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return ip_vxlan_bypass_inline (vm, node, frame, /* is_ip4 */ 1);
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip4_vxlan_bypass_node) =
-{
- .name = "ip4-vxlan-bypass",
- .vector_size = sizeof (u32),
- .n_next_nodes = IP_VXLAN_BYPASS_N_NEXT,
- .next_nodes = {
- [IP_VXLAN_BYPASS_NEXT_DROP] = "error-drop",
- [IP_VXLAN_BYPASS_NEXT_VXLAN] = "vxlan4-input",
- },
- .format_buffer = format_ip4_header,
- .format_trace = format_ip4_forward_next_trace,
-};
-
-/* *INDENT-ON* */
-
-/* Dummy init function to get us linked in. */
-static clib_error_t *
-ip4_vxlan_bypass_init (vlib_main_t * vm)
-{
- return 0;
-}
-
-VLIB_INIT_FUNCTION (ip4_vxlan_bypass_init);
-
-VLIB_NODE_FN (ip6_vxlan_bypass_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return ip_vxlan_bypass_inline (vm, node, frame, /* is_ip4 */ 0);
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip6_vxlan_bypass_node) =
-{
- .name = "ip6-vxlan-bypass",
- .vector_size = sizeof (u32),
- .n_next_nodes = IP_VXLAN_BYPASS_N_NEXT,
- .next_nodes = {
- [IP_VXLAN_BYPASS_NEXT_DROP] = "error-drop",
- [IP_VXLAN_BYPASS_NEXT_VXLAN] = "vxlan6-input",
- },
- .format_buffer = format_ip6_header,
- .format_trace = format_ip6_forward_next_trace,
-};
-
-/* *INDENT-ON* */
-
-/* Dummy init function to get us linked in. */
-static clib_error_t *
-ip6_vxlan_bypass_init (vlib_main_t * vm)
-{
- return 0;
-}
-
-VLIB_INIT_FUNCTION (ip6_vxlan_bypass_init);
-
-#define foreach_vxlan_flow_input_next \
-_(DROP, "error-drop") \
-_(L2_INPUT, "l2-input")
-
-typedef enum
-{
-#define _(s,n) VXLAN_FLOW_NEXT_##s,
- foreach_vxlan_flow_input_next
-#undef _
- VXLAN_FLOW_N_NEXT,
-} vxlan_flow_input_next_t;
-
-#define foreach_vxlan_flow_error \
- _(NONE, "no error") \
- _(IP_CHECKSUM_ERROR, "Rx ip checksum errors") \
- _(IP_HEADER_ERROR, "Rx ip header errors") \
- _(UDP_CHECKSUM_ERROR, "Rx udp checksum errors") \
- _(UDP_LENGTH_ERROR, "Rx udp length errors")
-
-typedef enum
-{
-#define _(f,s) VXLAN_FLOW_ERROR_##f,
- foreach_vxlan_flow_error
-#undef _
- VXLAN_FLOW_N_ERROR,
-} vxlan_flow_error_t;
-
-static char *vxlan_flow_error_strings[] = {
-#define _(n,s) s,
- foreach_vxlan_flow_error
-#undef _
-};
-
-
-static_always_inline u8
-vxlan_validate_udp_csum (vlib_main_t * vm, vlib_buffer_t * b)
-{
- u32 flags = b->flags;
- enum
- { offset =
- sizeof (ip4_header_t) + sizeof (udp_header_t) + sizeof (vxlan_header_t),
- };
-
- /* Verify UDP checksum */
- if ((flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
- {
- vlib_buffer_advance (b, -offset);
- flags = ip4_tcp_udp_validate_checksum (vm, b);
- vlib_buffer_advance (b, offset);
- }
-
- return (flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
-}
-
-static_always_inline u8
-vxlan_check_udp_csum (vlib_main_t * vm, vlib_buffer_t * b)
-{
- ip4_vxlan_header_t *hdr = vlib_buffer_get_current (b) - sizeof *hdr;
- udp_header_t *udp = &hdr->udp;
- /* Don't verify UDP checksum for packets with explicit zero checksum. */
- u8 good_csum = (b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0 ||
- udp->checksum == 0;
-
- return !good_csum;
-}
-
-static_always_inline u8
-vxlan_check_ip (vlib_buffer_t * b, u16 payload_len)
-{
- ip4_vxlan_header_t *hdr = vlib_buffer_get_current (b) - sizeof *hdr;
- u16 ip_len = clib_net_to_host_u16 (hdr->ip4.length);
- u16 expected = payload_len + sizeof *hdr;
- return ip_len > expected || hdr->ip4.ttl == 0
- || hdr->ip4.ip_version_and_header_length != 0x45;
-}
-
-static_always_inline u8
-vxlan_check_ip_udp_len (vlib_buffer_t * b)
-{
- ip4_vxlan_header_t *hdr = vlib_buffer_get_current (b) - sizeof *hdr;
- u16 ip_len = clib_net_to_host_u16 (hdr->ip4.length);
- u16 udp_len = clib_net_to_host_u16 (hdr->udp.length);
- return udp_len > ip_len;
-}
-
-static_always_inline u8
-vxlan_err_code (u8 ip_err0, u8 udp_err0, u8 csum_err0)
-{
- u8 error0 = VXLAN_FLOW_ERROR_NONE;
- if (ip_err0)
- error0 = VXLAN_FLOW_ERROR_IP_HEADER_ERROR;
- if (udp_err0)
- error0 = VXLAN_FLOW_ERROR_UDP_LENGTH_ERROR;
- if (csum_err0)
- error0 = VXLAN_FLOW_ERROR_UDP_CHECKSUM_ERROR;
- return error0;
-}
-
-VLIB_NODE_FN (vxlan4_flow_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * f)
-{
- enum
- { payload_offset = sizeof (ip4_vxlan_header_t) };
-
- vxlan_main_t *vxm = &vxlan_main;
- vnet_interface_main_t *im = &vnet_main.interface_main;
- vlib_combined_counter_main_t *rx_counter[VXLAN_FLOW_N_NEXT] = {
- [VXLAN_FLOW_NEXT_DROP] =
- im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_DROP,
- [VXLAN_FLOW_NEXT_L2_INPUT] =
- im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
- };
- u32 thread_index = vlib_get_thread_index ();
-
- u32 *from = vlib_frame_vector_args (f);
- u32 n_left_from = f->n_vectors;
- u32 next_index = VXLAN_FLOW_NEXT_L2_INPUT;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next, *to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 3 && n_left_to_next > 3)
- {
- u32 bi0 = to_next[0] = from[0];
- u32 bi1 = to_next[1] = from[1];
- u32 bi2 = to_next[2] = from[2];
- u32 bi3 = to_next[3] = from[3];
- from += 4;
- n_left_from -= 4;
- to_next += 4;
- n_left_to_next -= 4;
-
- vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
- vlib_buffer_t *b1 = vlib_get_buffer (vm, bi1);
- vlib_buffer_t *b2 = vlib_get_buffer (vm, bi2);
- vlib_buffer_t *b3 = vlib_get_buffer (vm, bi3);
-
- vlib_buffer_advance (b0, payload_offset);
- vlib_buffer_advance (b1, payload_offset);
- vlib_buffer_advance (b2, payload_offset);
- vlib_buffer_advance (b3, payload_offset);
-
- u16 len0 = vlib_buffer_length_in_chain (vm, b0);
- u16 len1 = vlib_buffer_length_in_chain (vm, b1);
- u16 len2 = vlib_buffer_length_in_chain (vm, b2);
- u16 len3 = vlib_buffer_length_in_chain (vm, b3);
-
- u32 next0 = VXLAN_FLOW_NEXT_L2_INPUT, next1 =
- VXLAN_FLOW_NEXT_L2_INPUT, next2 =
- VXLAN_FLOW_NEXT_L2_INPUT, next3 = VXLAN_FLOW_NEXT_L2_INPUT;
-
- u8 ip_err0 = vxlan_check_ip (b0, len0);
- u8 ip_err1 = vxlan_check_ip (b1, len1);
- u8 ip_err2 = vxlan_check_ip (b2, len2);
- u8 ip_err3 = vxlan_check_ip (b3, len3);
- u8 ip_err = ip_err0 | ip_err1 | ip_err2 | ip_err3;
-
- u8 udp_err0 = vxlan_check_ip_udp_len (b0);
- u8 udp_err1 = vxlan_check_ip_udp_len (b1);
- u8 udp_err2 = vxlan_check_ip_udp_len (b2);
- u8 udp_err3 = vxlan_check_ip_udp_len (b3);
- u8 udp_err = udp_err0 | udp_err1 | udp_err2 | udp_err3;
-
- u8 csum_err0 = vxlan_check_udp_csum (vm, b0);
- u8 csum_err1 = vxlan_check_udp_csum (vm, b1);
- u8 csum_err2 = vxlan_check_udp_csum (vm, b2);
- u8 csum_err3 = vxlan_check_udp_csum (vm, b3);
- u8 csum_err = csum_err0 | csum_err1 | csum_err2 | csum_err3;
-
- if (PREDICT_FALSE (csum_err))
- {
- if (csum_err0)
- csum_err0 = !vxlan_validate_udp_csum (vm, b0);
- if (csum_err1)
- csum_err1 = !vxlan_validate_udp_csum (vm, b1);
- if (csum_err2)
- csum_err2 = !vxlan_validate_udp_csum (vm, b2);
- if (csum_err3)
- csum_err3 = !vxlan_validate_udp_csum (vm, b3);
- csum_err = csum_err0 | csum_err1 | csum_err2 | csum_err3;
- }
-
- if (PREDICT_FALSE (ip_err || udp_err || csum_err))
- {
- if (ip_err0 || udp_err0 || csum_err0)
- {
- next0 = VXLAN_FLOW_NEXT_DROP;
- u8 error0 = vxlan_err_code (ip_err0, udp_err0, csum_err0);
- b0->error = node->errors[error0];
- }
- if (ip_err1 || udp_err1 || csum_err1)
- {
- next1 = VXLAN_FLOW_NEXT_DROP;
- u8 error1 = vxlan_err_code (ip_err1, udp_err1, csum_err1);
- b1->error = node->errors[error1];
- }
- if (ip_err2 || udp_err2 || csum_err2)
- {
- next2 = VXLAN_FLOW_NEXT_DROP;
- u8 error2 = vxlan_err_code (ip_err2, udp_err2, csum_err2);
- b2->error = node->errors[error2];
- }
- if (ip_err3 || udp_err3 || csum_err3)
- {
- next3 = VXLAN_FLOW_NEXT_DROP;
- u8 error3 = vxlan_err_code (ip_err3, udp_err3, csum_err3);
- b3->error = node->errors[error3];
- }
- }
-
- vnet_update_l2_len (b0);
- vnet_update_l2_len (b1);
- vnet_update_l2_len (b2);
- vnet_update_l2_len (b3);
-
- ASSERT (b0->flow_id != 0);
- ASSERT (b1->flow_id != 0);
- ASSERT (b2->flow_id != 0);
- ASSERT (b3->flow_id != 0);
-
- u32 t_index0 = b0->flow_id - vxm->flow_id_start;
- u32 t_index1 = b1->flow_id - vxm->flow_id_start;
- u32 t_index2 = b2->flow_id - vxm->flow_id_start;
- u32 t_index3 = b3->flow_id - vxm->flow_id_start;
-
- vxlan_tunnel_t *t0 = &vxm->tunnels[t_index0];
- vxlan_tunnel_t *t1 = &vxm->tunnels[t_index1];
- vxlan_tunnel_t *t2 = &vxm->tunnels[t_index2];
- vxlan_tunnel_t *t3 = &vxm->tunnels[t_index3];
-
- /* flow id consumed */
- b0->flow_id = 0;
- b1->flow_id = 0;
- b2->flow_id = 0;
- b3->flow_id = 0;
-
- u32 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX] =
- t0->sw_if_index;
- u32 sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX] =
- t1->sw_if_index;
- u32 sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_RX] =
- t2->sw_if_index;
- u32 sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_RX] =
- t3->sw_if_index;
-
- vlib_increment_combined_counter (rx_counter[next0], thread_index,
- sw_if_index0, 1, len0);
- vlib_increment_combined_counter (rx_counter[next1], thread_index,
- sw_if_index1, 1, len1);
- vlib_increment_combined_counter (rx_counter[next2], thread_index,
- sw_if_index2, 1, len2);
- vlib_increment_combined_counter (rx_counter[next3], thread_index,
- sw_if_index3, 1, len3);
-
- u32 flags = b0->flags | b1->flags | b2->flags | b3->flags;
-
- if (PREDICT_FALSE (flags & VLIB_BUFFER_IS_TRACED))
- {
- if (b0->flags & VLIB_BUFFER_IS_TRACED)
- {
- vxlan_rx_trace_t *tr =
- vlib_add_trace (vm, node, b0, sizeof *tr);
- u8 error0 = vxlan_err_code (ip_err0, udp_err0, csum_err0);
- tr->next_index = next0;
- tr->error = error0;
- tr->tunnel_index = t_index0;
- tr->vni = t0->vni;
- }
- if (b1->flags & VLIB_BUFFER_IS_TRACED)
- {
- vxlan_rx_trace_t *tr =
- vlib_add_trace (vm, node, b1, sizeof *tr);
- u8 error1 = vxlan_err_code (ip_err1, udp_err1, csum_err1);
- tr->next_index = next1;
- tr->error = error1;
- tr->tunnel_index = t_index1;
- tr->vni = t1->vni;
- }
- if (b2->flags & VLIB_BUFFER_IS_TRACED)
- {
- vxlan_rx_trace_t *tr =
- vlib_add_trace (vm, node, b2, sizeof *tr);
- u8 error2 = vxlan_err_code (ip_err2, udp_err2, csum_err2);
- tr->next_index = next2;
- tr->error = error2;
- tr->tunnel_index = t_index2;
- tr->vni = t2->vni;
- }
- if (b3->flags & VLIB_BUFFER_IS_TRACED)
- {
- vxlan_rx_trace_t *tr =
- vlib_add_trace (vm, node, b3, sizeof *tr);
- u8 error3 = vxlan_err_code (ip_err3, udp_err3, csum_err3);
- tr->next_index = next3;
- tr->error = error3;
- tr->tunnel_index = t_index3;
- tr->vni = t3->vni;
- }
- }
- vlib_validate_buffer_enqueue_x4
- (vm, node, next_index, to_next, n_left_to_next,
- bi0, bi1, bi2, bi3, next0, next1, next2, next3);
- }
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0 = to_next[0] = from[0];
- from++;
- n_left_from--;
- to_next++;
- n_left_to_next--;
-
- vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
- vlib_buffer_advance (b0, payload_offset);
-
- u16 len0 = vlib_buffer_length_in_chain (vm, b0);
- u32 next0 = VXLAN_FLOW_NEXT_L2_INPUT;
-
- u8 ip_err0 = vxlan_check_ip (b0, len0);
- u8 udp_err0 = vxlan_check_ip_udp_len (b0);
- u8 csum_err0 = vxlan_check_udp_csum (vm, b0);
-
- if (csum_err0)
- csum_err0 = !vxlan_validate_udp_csum (vm, b0);
- if (ip_err0 || udp_err0 || csum_err0)
- {
- next0 = VXLAN_FLOW_NEXT_DROP;
- u8 error0 = vxlan_err_code (ip_err0, udp_err0, csum_err0);
- b0->error = node->errors[error0];
- }
-
- vnet_update_l2_len (b0);
-
- ASSERT (b0->flow_id != 0);
- u32 t_index0 = b0->flow_id - vxm->flow_id_start;
- vxlan_tunnel_t *t0 = &vxm->tunnels[t_index0];
- b0->flow_id = 0;
-
- u32 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX] =
- t0->sw_if_index;
- vlib_increment_combined_counter (rx_counter[next0], thread_index,
- sw_if_index0, 1, len0);
-
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_rx_trace_t *tr =
- vlib_add_trace (vm, node, b0, sizeof *tr);
- u8 error0 = vxlan_err_code (ip_err0, udp_err0, csum_err0);
- tr->next_index = next0;
- tr->error = error0;
- tr->tunnel_index = t_index0;
- tr->vni = t0->vni;
- }
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return f->n_vectors;
-}
-
-/* *INDENT-OFF* */
-#ifndef CLIB_MULTIARCH_VARIANT
-VLIB_REGISTER_NODE (vxlan4_flow_input_node) = {
- .name = "vxlan-flow-input",
- .type = VLIB_NODE_TYPE_INTERNAL,
- .vector_size = sizeof (u32),
-
- .format_trace = format_vxlan_rx_trace,
-
- .n_errors = VXLAN_FLOW_N_ERROR,
- .error_strings = vxlan_flow_error_strings,
-
- .n_next_nodes = VXLAN_FLOW_N_NEXT,
- .next_nodes = {
-#define _(s,n) [VXLAN_FLOW_NEXT_##s] = n,
- foreach_vxlan_flow_input_next
-#undef _
- },
-};
-#endif
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan/dir.dox b/src/vnet/vxlan/dir.dox
deleted file mode 100644
index 31a9e2b6112..00000000000
--- a/src/vnet/vxlan/dir.dox
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (c) 2016 Cisco and/or its affiliates.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
-@dir
-@brief VXLAN Code.
-
-This directory contains source code to support VXLAN.
-
-*/
-/*? %%clicmd:group_label VXLAN CLI %% ?*/
diff --git a/src/vnet/vxlan/encap.c b/src/vnet/vxlan/encap.c
deleted file mode 100644
index 0961a27942d..00000000000
--- a/src/vnet/vxlan/encap.c
+++ /dev/null
@@ -1,540 +0,0 @@
-
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <vppinfra/error.h>
-#include <vppinfra/hash.h>
-#include <vnet/vnet.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/interface_output.h>
-#include <vnet/vxlan/vxlan.h>
-#include <vnet/qos/qos_types.h>
-#include <vnet/adj/rewrite.h>
-
-/* Statistics (not all errors) */
-#define foreach_vxlan_encap_error \
-_(ENCAPSULATED, "good packets encapsulated")
-
-static char *vxlan_encap_error_strings[] = {
-#define _(sym,string) string,
- foreach_vxlan_encap_error
-#undef _
-};
-
-typedef enum
-{
-#define _(sym,str) VXLAN_ENCAP_ERROR_##sym,
- foreach_vxlan_encap_error
-#undef _
- VXLAN_ENCAP_N_ERROR,
-} vxlan_encap_error_t;
-
-typedef enum
-{
- VXLAN_ENCAP_NEXT_DROP,
- VXLAN_ENCAP_N_NEXT,
-} vxlan_encap_next_t;
-
-typedef struct
-{
- u32 tunnel_index;
- u32 vni;
-} vxlan_encap_trace_t;
-
-#ifndef CLIB_MARCH_VARIANT
-u8 *
-format_vxlan_encap_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- vxlan_encap_trace_t *t = va_arg (*args, vxlan_encap_trace_t *);
-
- s = format (s, "VXLAN encap to vxlan_tunnel%d vni %d",
- t->tunnel_index, t->vni);
- return s;
-}
-#endif
-
-always_inline uword
-vxlan_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
- vlib_frame_t *from_frame, u8 is_ip4)
-{
- u32 n_left_from, next_index, *from, *to_next;
- vxlan_main_t *vxm = &vxlan_main;
- vnet_main_t *vnm = vxm->vnet_main;
- vnet_interface_main_t *im = &vnm->interface_main;
- vlib_combined_counter_main_t *tx_counter =
- im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX;
- u32 pkts_encapsulated = 0;
- u32 thread_index = vlib_get_thread_index ();
- u32 sw_if_index0 = 0, sw_if_index1 = 0;
- u32 next0 = 0, next1 = 0;
- vxlan_tunnel_t *t0 = NULL, *t1 = NULL;
- index_t dpoi_idx0 = INDEX_INVALID, dpoi_idx1 = INDEX_INVALID;
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
- vlib_buffer_t **b = bufs;
-
- from = vlib_frame_vector_args (from_frame);
- n_left_from = from_frame->n_vectors;
-
- next_index = node->cached_next_index;
-
- STATIC_ASSERT_SIZEOF (ip6_vxlan_header_t, 56);
- STATIC_ASSERT_SIZEOF (ip4_vxlan_header_t, 36);
-
- u8 const underlay_hdr_len = is_ip4 ?
- sizeof (ip4_vxlan_header_t) : sizeof (ip6_vxlan_header_t);
- u16 const l3_len = is_ip4 ? sizeof (ip4_header_t) : sizeof (ip6_header_t);
- u32 const outer_packet_csum_offload_flags =
- is_ip4 ? (VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM |
- VNET_BUFFER_OFFLOAD_F_TNL_VXLAN) :
- (VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM |
- VNET_BUFFER_OFFLOAD_F_TNL_VXLAN);
-
- vlib_get_buffers (vm, from, bufs, n_left_from);
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- /* Prefetch next iteration. */
- {
- vlib_prefetch_buffer_header (b[2], LOAD);
- vlib_prefetch_buffer_header (b[3], LOAD);
-
- CLIB_PREFETCH (b[2]->data - CLIB_CACHE_LINE_BYTES,
- 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (b[3]->data - CLIB_CACHE_LINE_BYTES,
- 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- }
-
- u32 bi0 = to_next[0] = from[0];
- u32 bi1 = to_next[1] = from[1];
- from += 2;
- to_next += 2;
- n_left_to_next -= 2;
- n_left_from -= 2;
-
- vlib_buffer_t *b0 = b[0];
- vlib_buffer_t *b1 = b[1];
- b += 2;
-
- u32 flow_hash0 = vnet_l2_compute_flow_hash (b0);
- u32 flow_hash1 = vnet_l2_compute_flow_hash (b1);
-
- /* Get next node index and adj index from tunnel next_dpo */
- if (sw_if_index0 != vnet_buffer (b0)->sw_if_index[VLIB_TX])
- {
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
- vnet_hw_interface_t *hi0 =
- vnet_get_sup_hw_interface (vnm, sw_if_index0);
- t0 = &vxm->tunnels[hi0->dev_instance];
- /* Note: change to always set next0 if it may set to drop */
- next0 = t0->next_dpo.dpoi_next_node;
- dpoi_idx0 = t0->next_dpo.dpoi_index;
- }
-
- /* Get next node index and adj index from tunnel next_dpo */
- if (sw_if_index1 != vnet_buffer (b1)->sw_if_index[VLIB_TX])
- {
- if (sw_if_index0 == vnet_buffer (b1)->sw_if_index[VLIB_TX])
- {
- sw_if_index1 = sw_if_index0;
- t1 = t0;
- next1 = next0;
- dpoi_idx1 = dpoi_idx0;
- }
- else
- {
- sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX];
- vnet_hw_interface_t *hi1 =
- vnet_get_sup_hw_interface (vnm, sw_if_index1);
- t1 = &vxm->tunnels[hi1->dev_instance];
- /* Note: change to always set next1 if it may set to drop */
- next1 = t1->next_dpo.dpoi_next_node;
- dpoi_idx1 = t1->next_dpo.dpoi_index;
- }
- }
-
- vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpoi_idx0;
- vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpoi_idx1;
-
- ASSERT (t0->rewrite_header.data_bytes == underlay_hdr_len);
- ASSERT (t1->rewrite_header.data_bytes == underlay_hdr_len);
- vnet_rewrite_two_headers (*t0, *t1, vlib_buffer_get_current (b0),
- vlib_buffer_get_current (b1),
- underlay_hdr_len);
-
- vlib_buffer_advance (b0, -underlay_hdr_len);
- vlib_buffer_advance (b1, -underlay_hdr_len);
-
- u32 len0 = vlib_buffer_length_in_chain (vm, b0);
- u32 len1 = vlib_buffer_length_in_chain (vm, b1);
- u16 payload_l0 = clib_host_to_net_u16 (len0 - l3_len);
- u16 payload_l1 = clib_host_to_net_u16 (len1 - l3_len);
-
- void *underlay0 = vlib_buffer_get_current (b0);
- void *underlay1 = vlib_buffer_get_current (b1);
-
- ip4_header_t *ip4_0, *ip4_1;
- qos_bits_t ip4_0_tos = 0, ip4_1_tos = 0;
- ip6_header_t *ip6_0, *ip6_1;
- udp_header_t *udp0, *udp1;
- u8 *l3_0, *l3_1;
- if (is_ip4)
- {
- ip4_vxlan_header_t *hdr0 = underlay0;
- ip4_vxlan_header_t *hdr1 = underlay1;
-
- /* Fix the IP4 checksum and length */
- ip4_0 = &hdr0->ip4;
- ip4_1 = &hdr1->ip4;
- ip4_0->length = clib_host_to_net_u16 (len0);
- ip4_1->length = clib_host_to_net_u16 (len1);
-
- if (PREDICT_FALSE (b0->flags & VNET_BUFFER_F_QOS_DATA_VALID))
- {
- ip4_0_tos = vnet_buffer2 (b0)->qos.bits;
- ip4_0->tos = ip4_0_tos;
- }
- if (PREDICT_FALSE (b1->flags & VNET_BUFFER_F_QOS_DATA_VALID))
- {
- ip4_1_tos = vnet_buffer2 (b1)->qos.bits;
- ip4_1->tos = ip4_1_tos;
- }
-
- l3_0 = (u8 *) ip4_0;
- l3_1 = (u8 *) ip4_1;
- udp0 = &hdr0->udp;
- udp1 = &hdr1->udp;
- }
- else /* ipv6 */
- {
- ip6_vxlan_header_t *hdr0 = underlay0;
- ip6_vxlan_header_t *hdr1 = underlay1;
-
- /* Fix IP6 payload length */
- ip6_0 = &hdr0->ip6;
- ip6_1 = &hdr1->ip6;
- ip6_0->payload_length = payload_l0;
- ip6_1->payload_length = payload_l1;
-
- l3_0 = (u8 *) ip6_0;
- l3_1 = (u8 *) ip6_1;
- udp0 = &hdr0->udp;
- udp1 = &hdr1->udp;
- }
-
- /* Fix UDP length and set source port */
- udp0->length = payload_l0;
- udp0->src_port = flow_hash0;
- udp1->length = payload_l1;
- udp1->src_port = flow_hash1;
-
- if (b0->flags & VNET_BUFFER_F_OFFLOAD)
- {
- vnet_buffer2 (b0)->outer_l3_hdr_offset = l3_0 - b0->data;
- vnet_buffer2 (b0)->outer_l4_hdr_offset = (u8 *) udp0 - b0->data;
- vnet_buffer_offload_flags_set (b0,
- outer_packet_csum_offload_flags);
- }
- /* IPv4 checksum only */
- else if (is_ip4)
- {
- ip_csum_t sum0 = ip4_0->checksum;
- sum0 = ip_csum_update (sum0, 0, ip4_0->length, ip4_header_t,
- length /* changed member */);
- if (PREDICT_FALSE (ip4_0_tos))
- {
- sum0 = ip_csum_update (sum0, 0, ip4_0_tos, ip4_header_t,
- tos /* changed member */);
- }
- ip4_0->checksum = ip_csum_fold (sum0);
- }
- /* IPv6 UDP checksum is mandatory */
- else
- {
- int bogus = 0;
-
- udp0->checksum =
- ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip6_0, &bogus);
- ASSERT (bogus == 0);
- if (udp0->checksum == 0)
- udp0->checksum = 0xffff;
- }
-
- if (b1->flags & VNET_BUFFER_F_OFFLOAD)
- {
- vnet_buffer2 (b1)->outer_l3_hdr_offset = l3_1 - b1->data;
- vnet_buffer2 (b1)->outer_l4_hdr_offset = (u8 *) udp1 - b1->data;
- vnet_buffer_offload_flags_set (b1,
- outer_packet_csum_offload_flags);
- }
- /* IPv4 checksum only */
- else if (is_ip4)
- {
- ip_csum_t sum1 = ip4_1->checksum;
- sum1 = ip_csum_update (sum1, 0, ip4_1->length, ip4_header_t,
- length /* changed member */);
- if (PREDICT_FALSE (ip4_1_tos))
- {
- sum1 = ip_csum_update (sum1, 0, ip4_1_tos, ip4_header_t,
- tos /* changed member */);
- }
- ip4_1->checksum = ip_csum_fold (sum1);
- }
- /* IPv6 UDP checksum is mandatory */
- else
- {
- int bogus = 0;
-
- udp1->checksum = ip6_tcp_udp_icmp_compute_checksum
- (vm, b1, ip6_1, &bogus);
- ASSERT (bogus == 0);
- if (udp1->checksum == 0)
- udp1->checksum = 0xffff;
- }
-
- /* save inner packet flow_hash for load-balance node */
- vnet_buffer (b0)->ip.flow_hash = flow_hash0;
- vnet_buffer (b1)->ip.flow_hash = flow_hash1;
-
- if (sw_if_index0 == sw_if_index1)
- {
- vlib_increment_combined_counter (tx_counter, thread_index,
- sw_if_index0, 2, len0 + len1);
- }
- else
- {
- vlib_increment_combined_counter (tx_counter, thread_index,
- sw_if_index0, 1, len0);
- vlib_increment_combined_counter (tx_counter, thread_index,
- sw_if_index1, 1, len1);
- }
- pkts_encapsulated += 2;
-
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_encap_trace_t *tr =
- vlib_add_trace (vm, node, b0, sizeof (*tr));
- tr->tunnel_index = t0 - vxm->tunnels;
- tr->vni = t0->vni;
- }
-
- if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_encap_trace_t *tr =
- vlib_add_trace (vm, node, b1, sizeof (*tr));
- tr->tunnel_index = t1 - vxm->tunnels;
- tr->vni = t1->vni;
- }
-
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0 = to_next[0] = from[0];
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- vlib_buffer_t *b0 = b[0];
- b += 1;
-
- u32 flow_hash0 = vnet_l2_compute_flow_hash (b0);
-
- /* Get next node index and adj index from tunnel next_dpo */
- if (sw_if_index0 != vnet_buffer (b0)->sw_if_index[VLIB_TX])
- {
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
- vnet_hw_interface_t *hi0 =
- vnet_get_sup_hw_interface (vnm, sw_if_index0);
- t0 = &vxm->tunnels[hi0->dev_instance];
- /* Note: change to always set next0 if it may be set to drop */
- next0 = t0->next_dpo.dpoi_next_node;
- dpoi_idx0 = t0->next_dpo.dpoi_index;
- }
- vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpoi_idx0;
-
- ASSERT (t0->rewrite_header.data_bytes == underlay_hdr_len);
- vnet_rewrite_one_header (*t0, vlib_buffer_get_current (b0),
- underlay_hdr_len);
-
- vlib_buffer_advance (b0, -underlay_hdr_len);
- void *underlay0 = vlib_buffer_get_current (b0);
-
- u32 len0 = vlib_buffer_length_in_chain (vm, b0);
- u16 payload_l0 = clib_host_to_net_u16 (len0 - l3_len);
-
- udp_header_t *udp0;
- ip4_header_t *ip4_0;
- qos_bits_t ip4_0_tos = 0;
- ip6_header_t *ip6_0;
- u8 *l3_0;
- if (is_ip4)
- {
- ip4_vxlan_header_t *hdr = underlay0;
-
- /* Fix the IP4 checksum and length */
- ip4_0 = &hdr->ip4;
- ip4_0->length = clib_host_to_net_u16 (len0);
-
- if (PREDICT_FALSE (b0->flags & VNET_BUFFER_F_QOS_DATA_VALID))
- {
- ip4_0_tos = vnet_buffer2 (b0)->qos.bits;
- ip4_0->tos = ip4_0_tos;
- }
-
- l3_0 = (u8 *) ip4_0;
- udp0 = &hdr->udp;
- }
- else /* ip6 path */
- {
- ip6_vxlan_header_t *hdr = underlay0;
-
- /* Fix IP6 payload length */
- ip6_0 = &hdr->ip6;
- ip6_0->payload_length = payload_l0;
-
- l3_0 = (u8 *) ip6_0;
- udp0 = &hdr->udp;
- }
-
- /* Fix UDP length and set source port */
- udp0->length = payload_l0;
- udp0->src_port = flow_hash0;
-
- if (b0->flags & VNET_BUFFER_F_OFFLOAD)
- {
- vnet_buffer2 (b0)->outer_l3_hdr_offset = l3_0 - b0->data;
- vnet_buffer2 (b0)->outer_l4_hdr_offset = (u8 *) udp0 - b0->data;
- vnet_buffer_offload_flags_set (b0,
- outer_packet_csum_offload_flags);
- }
- /* IPv4 checksum only */
- else if (is_ip4)
- {
- ip_csum_t sum0 = ip4_0->checksum;
- sum0 = ip_csum_update (sum0, 0, ip4_0->length, ip4_header_t,
- length /* changed member */);
- if (PREDICT_FALSE (ip4_0_tos))
- {
- sum0 = ip_csum_update (sum0, 0, ip4_0_tos, ip4_header_t,
- tos /* changed member */);
- }
- ip4_0->checksum = ip_csum_fold (sum0);
- }
- /* IPv6 UDP checksum is mandatory */
- else
- {
- int bogus = 0;
-
- udp0->checksum = ip6_tcp_udp_icmp_compute_checksum
- (vm, b0, ip6_0, &bogus);
- ASSERT (bogus == 0);
- if (udp0->checksum == 0)
- udp0->checksum = 0xffff;
- }
-
- /* reuse inner packet flow_hash for load-balance node */
- vnet_buffer (b0)->ip.flow_hash = flow_hash0;
-
- vlib_increment_combined_counter (tx_counter, thread_index,
- sw_if_index0, 1, len0);
- pkts_encapsulated++;
-
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_encap_trace_t *tr =
- vlib_add_trace (vm, node, b0, sizeof (*tr));
- tr->tunnel_index = t0 - vxm->tunnels;
- tr->vni = t0->vni;
- }
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- /* Do we still need this now that tunnel tx stats is kept? */
- vlib_node_increment_counter (vm, node->node_index,
- VXLAN_ENCAP_ERROR_ENCAPSULATED,
- pkts_encapsulated);
-
- return from_frame->n_vectors;
-}
-
-VLIB_NODE_FN (vxlan4_encap_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- /* Disable chksum offload as setup overhead in tx node is not worthwhile
- for ip4 header checksum only, unless udp checksum is also required */
- return vxlan_encap_inline (vm, node, from_frame, /* is_ip4 */ 1);
-}
-
-VLIB_NODE_FN (vxlan6_encap_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- /* Enable checksum offload for ip6 as udp checksum is mandatory, */
- return vxlan_encap_inline (vm, node, from_frame, /* is_ip4 */ 0);
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (vxlan4_encap_node) = {
- .name = "vxlan4-encap",
- .vector_size = sizeof (u32),
- .format_trace = format_vxlan_encap_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(vxlan_encap_error_strings),
- .error_strings = vxlan_encap_error_strings,
- .n_next_nodes = VXLAN_ENCAP_N_NEXT,
- .next_nodes = {
- [VXLAN_ENCAP_NEXT_DROP] = "error-drop",
- },
-};
-
-VLIB_REGISTER_NODE (vxlan6_encap_node) = {
- .name = "vxlan6-encap",
- .vector_size = sizeof (u32),
- .format_trace = format_vxlan_encap_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(vxlan_encap_error_strings),
- .error_strings = vxlan_encap_error_strings,
- .n_next_nodes = VXLAN_ENCAP_N_NEXT,
- .next_nodes = {
- [VXLAN_ENCAP_NEXT_DROP] = "error-drop",
- },
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan/vxlan.api b/src/vnet/vxlan/vxlan.api
deleted file mode 100644
index b7e678595d8..00000000000
--- a/src/vnet/vxlan/vxlan.api
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Copyright (c) 2015-2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-option version = "2.1.0";
-
-import "vnet/interface_types.api";
-import "vnet/ip/ip_types.api";
-
-/** \brief Create or delete a VXLAN tunnel
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param is_add - Use 1 to create the tunnel, 0 to remove it
- @param instance - optional unique custom device instance, else ~0.
- @param src_address - Source IP address
- @param dst_address - Destination IP address, can be multicast
- @param mcast_sw_if_index - Interface for multicast destination
- @param encap_vrf_id - Encap route table FIB index
- @param decap_next_index - index of decap next graph node
- @param vni - The VXLAN Network Identifier, uint24
-*/
-define vxlan_add_del_tunnel
-{
- u32 client_index;
- u32 context;
- bool is_add [default=true];
- u32 instance; /* If non-~0, specifies a custom dev instance */
- vl_api_address_t src_address;
- vl_api_address_t dst_address;
- vl_api_interface_index_t mcast_sw_if_index;
- u32 encap_vrf_id;
- u32 decap_next_index;
- u32 vni;
-};
-
-/** \brief Create or delete a VXLAN tunnel
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param is_add - Use 1 to create the tunnel, 0 to remove it
- @param instance - optional unique custom device instance, else ~0.
- @param src_address - Source IP address
- @param dst_address - Destination IP address, can be multicast
- @param src_port - Source UDP port. It is not included in sent packets. Used only for port registration
- @param dst_port - Destination UDP port
- @param mcast_sw_if_index - Interface for multicast destination
- @param encap_vrf_id - Encap route table FIB index
- @param decap_next_index - index of decap next graph node
- @param vni - The VXLAN Network Identifier, uint24
-*/
-define vxlan_add_del_tunnel_v2
-{
- u32 client_index;
- u32 context;
- bool is_add [default=true];
- u32 instance [default=0xffffffff]; /* If non-~0, specifies a custom dev instance */
- vl_api_address_t src_address;
- vl_api_address_t dst_address;
- u16 src_port;
- u16 dst_port;
- vl_api_interface_index_t mcast_sw_if_index;
- u32 encap_vrf_id;
- u32 decap_next_index;
- u32 vni;
-};
-
-/** \brief Create or delete a VXLAN tunnel
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param is_add - Use 1 to create the tunnel, 0 to remove it
- @param instance - optional unique custom device instance, else ~0.
- @param src_address - Source IP address
- @param dst_address - Destination IP address, can be multicast
- @param src_port - Source UDP port. It is not included in sent packets. Used only for port registration
- @param dst_port - Destination UDP port
- @param mcast_sw_if_index - Interface for multicast destination
- @param encap_vrf_id - Encap route table FIB index
- @param decap_next_index - index of decap next graph node
- @param vni - The VXLAN Network Identifier, uint24
- @param is_l3 - if true, create the interface in L3 mode, w/o MAC
-*/
-define vxlan_add_del_tunnel_v3
-{
- u32 client_index;
- u32 context;
- bool is_add [default=true];
- u32 instance [default=0xffffffff]; /* If non-~0, specifies a custom dev instance */
- vl_api_address_t src_address;
- vl_api_address_t dst_address;
- u16 src_port;
- u16 dst_port;
- vl_api_interface_index_t mcast_sw_if_index;
- u32 encap_vrf_id;
- u32 decap_next_index;
- u32 vni;
- bool is_l3 [default=false];
-};
-
-define vxlan_add_del_tunnel_reply
-{
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
-};
-define vxlan_add_del_tunnel_v2_reply
-{
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
-};
-define vxlan_add_del_tunnel_v3_reply
-{
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
-};
-
-define vxlan_tunnel_dump
-{
- u32 client_index;
- u32 context;
- vl_api_interface_index_t sw_if_index;
-};
-define vxlan_tunnel_v2_dump
-{
- u32 client_index;
- u32 context;
- vl_api_interface_index_t sw_if_index;
-};
-
-define vxlan_tunnel_details
-{
- u32 context;
- vl_api_interface_index_t sw_if_index;
- u32 instance;
- vl_api_address_t src_address;
- vl_api_address_t dst_address;
- vl_api_interface_index_t mcast_sw_if_index;
- u32 encap_vrf_id;
- u32 decap_next_index;
- u32 vni;
-};
-define vxlan_tunnel_v2_details
-{
- u32 context;
- vl_api_interface_index_t sw_if_index;
- u32 instance;
- vl_api_address_t src_address;
- vl_api_address_t dst_address;
- u16 src_port;
- u16 dst_port;
- vl_api_interface_index_t mcast_sw_if_index;
- u32 encap_vrf_id;
- u32 decap_next_index;
- u32 vni;
-};
-
-/** \brief Interface set vxlan-bypass request
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param sw_if_index - interface used to reach neighbor
- @param is_ipv6 - if non-zero, enable ipv6-vxlan-bypass, else ipv4-vxlan-bypass
- @param enable - if non-zero enable, else disable
-*/
-autoreply define sw_interface_set_vxlan_bypass
-{
- u32 client_index;
- u32 context;
- vl_api_interface_index_t sw_if_index;
- bool is_ipv6;
- bool enable [default=true];
-};
-
-/** \brief Offload vxlan rx request
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param hw_if_index - rx hw interface
- @param sw_if_index - vxlan interface to offload
- @param enable - if non-zero enable, else disable
-*/
-autoreply define vxlan_offload_rx
-{
- u32 client_index;
- u32 context;
- vl_api_interface_index_t hw_if_index;
- vl_api_interface_index_t sw_if_index;
- bool enable [default=true];
-};
diff --git a/src/vnet/vxlan/vxlan.c b/src/vnet/vxlan/vxlan.c
deleted file mode 100644
index f670ee9c764..00000000000
--- a/src/vnet/vxlan/vxlan.c
+++ /dev/null
@@ -1,1350 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <vnet/vxlan/vxlan.h>
-#include <vnet/ip/format.h>
-#include <vnet/fib/fib_entry.h>
-#include <vnet/fib/fib_table.h>
-#include <vnet/fib/fib_entry_track.h>
-#include <vnet/mfib/mfib_table.h>
-#include <vnet/adj/adj_mcast.h>
-#include <vnet/adj/rewrite.h>
-#include <vnet/dpo/drop_dpo.h>
-#include <vnet/interface.h>
-#include <vnet/flow/flow.h>
-#include <vnet/udp/udp_local.h>
-#include <vlib/vlib.h>
-
-/**
- * @file
- * @brief VXLAN.
- *
- * VXLAN provides the features needed to allow L2 bridge domains (BDs)
- * to span multiple servers. This is done by building an L2 overlay on
- * top of an L3 network underlay using VXLAN tunnels.
- *
- * This makes it possible for servers to be co-located in the same data
- * center or be separated geographically as long as they are reachable
- * through the underlay L3 network.
- *
- * You can refer to this kind of L2 overlay bridge domain as a VXLAN
- * (Virtual eXtensible VLAN) segment.
- */
-
-
-vxlan_main_t vxlan_main;
-
-static u32
-vxlan_eth_flag_change (vnet_main_t *vnm, vnet_hw_interface_t *hi, u32 flags)
-{
- /* nothing for now */
- return 0;
-}
-
-static clib_error_t *
-vxlan_eth_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hw,
- u32 frame_size)
-{
- /* nothing for now */
- return 0;
-}
-
-static u8 *
-format_decap_next (u8 * s, va_list * args)
-{
- u32 next_index = va_arg (*args, u32);
-
- if (next_index == VXLAN_INPUT_NEXT_DROP)
- return format (s, "drop");
- else
- return format (s, "index %d", next_index);
- return s;
-}
-
-u8 *
-format_vxlan_tunnel (u8 * s, va_list * args)
-{
- vxlan_tunnel_t *t = va_arg (*args, vxlan_tunnel_t *);
-
- s = format (s,
- "[%d] instance %d src %U dst %U src_port %d dst_port %d vni %d "
- "fib-idx %d sw-if-idx %d ",
- t->dev_instance, t->user_instance, format_ip46_address, &t->src,
- IP46_TYPE_ANY, format_ip46_address, &t->dst, IP46_TYPE_ANY,
- t->src_port, t->dst_port, t->vni, t->encap_fib_index,
- t->sw_if_index);
-
- s = format (s, "encap-dpo-idx %d ", t->next_dpo.dpoi_index);
-
- if (PREDICT_FALSE (t->decap_next_index != VXLAN_INPUT_NEXT_L2_INPUT))
- s = format (s, "decap-next-%U ", format_decap_next, t->decap_next_index);
-
- if (PREDICT_FALSE (ip46_address_is_multicast (&t->dst)))
- s = format (s, "mcast-sw-if-idx %d ", t->mcast_sw_if_index);
-
- if (t->flow_index != ~0)
- s = format (s, "flow-index %d [%U]", t->flow_index,
- format_flow_enabled_hw, t->flow_index);
-
- return s;
-}
-
-static u8 *
-format_vxlan_name (u8 * s, va_list * args)
-{
- u32 dev_instance = va_arg (*args, u32);
- vxlan_main_t *vxm = &vxlan_main;
- vxlan_tunnel_t *t;
-
- if (dev_instance == ~0)
- return format (s, "<cached-unused>");
-
- if (dev_instance >= vec_len (vxm->tunnels))
- return format (s, "<improperly-referenced>");
-
- t = pool_elt_at_index (vxm->tunnels, dev_instance);
-
- return format (s, "vxlan_tunnel%d", t->user_instance);
-}
-
-static clib_error_t *
-vxlan_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
-{
- u32 hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
- VNET_HW_INTERFACE_FLAG_LINK_UP : 0;
- vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
-
- return /* no error */ 0;
-}
-
-/* *INDENT-OFF* */
-VNET_DEVICE_CLASS (vxlan_device_class, static) = {
- .name = "VXLAN",
- .format_device_name = format_vxlan_name,
- .format_tx_trace = format_vxlan_encap_trace,
- .admin_up_down_function = vxlan_interface_admin_up_down,
-};
-/* *INDENT-ON* */
-
-static u8 *
-format_vxlan_header_with_length (u8 * s, va_list * args)
-{
- u32 dev_instance = va_arg (*args, u32);
- s = format (s, "unimplemented dev %u", dev_instance);
- return s;
-}
-
-/* *INDENT-OFF* */
-VNET_HW_INTERFACE_CLASS (vxlan_hw_class) = {
- .name = "VXLAN",
- .format_header = format_vxlan_header_with_length,
- .build_rewrite = default_build_rewrite,
-};
-/* *INDENT-ON* */
-
-static void
-vxlan_tunnel_restack_dpo (vxlan_tunnel_t * t)
-{
- u8 is_ip4 = ip46_address_is_ip4 (&t->dst);
- dpo_id_t dpo = DPO_INVALID;
- fib_forward_chain_type_t forw_type = is_ip4 ?
- FIB_FORW_CHAIN_TYPE_UNICAST_IP4 : FIB_FORW_CHAIN_TYPE_UNICAST_IP6;
-
- fib_entry_contribute_forwarding (t->fib_entry_index, forw_type, &dpo);
-
- /* vxlan uses the payload hash as the udp source port
- * hence the packet's hash is unknown
- * skip single bucket load balance dpo's */
- while (DPO_LOAD_BALANCE == dpo.dpoi_type)
- {
- const load_balance_t *lb;
- const dpo_id_t *choice;
-
- lb = load_balance_get (dpo.dpoi_index);
- if (lb->lb_n_buckets > 1)
- break;
-
- choice = load_balance_get_bucket_i (lb, 0);
-
- if (DPO_RECEIVE == choice->dpoi_type)
- dpo_copy (&dpo, drop_dpo_get (choice->dpoi_proto));
- else
- dpo_copy (&dpo, choice);
- }
-
- u32 encap_index = is_ip4 ?
- vxlan4_encap_node.index : vxlan6_encap_node.index;
- dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
- dpo_reset (&dpo);
-}
-
-static vxlan_tunnel_t *
-vxlan_tunnel_from_fib_node (fib_node_t * node)
-{
- ASSERT (FIB_NODE_TYPE_VXLAN_TUNNEL == node->fn_type);
- return ((vxlan_tunnel_t *) (((char *) node) -
- STRUCT_OFFSET_OF (vxlan_tunnel_t, node)));
-}
-
-/**
- * Function definition to backwalk a FIB node -
- * Here we will restack the new dpo of VXLAN DIP to encap node.
- */
-static fib_node_back_walk_rc_t
-vxlan_tunnel_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx)
-{
- vxlan_tunnel_restack_dpo (vxlan_tunnel_from_fib_node (node));
- return (FIB_NODE_BACK_WALK_CONTINUE);
-}
-
-/**
- * Function definition to get a FIB node from its index
- */
-static fib_node_t *
-vxlan_tunnel_fib_node_get (fib_node_index_t index)
-{
- vxlan_tunnel_t *t;
- vxlan_main_t *vxm = &vxlan_main;
-
- t = pool_elt_at_index (vxm->tunnels, index);
-
- return (&t->node);
-}
-
-/**
- * Function definition to inform the FIB node that its last lock has gone.
- */
-static void
-vxlan_tunnel_last_lock_gone (fib_node_t * node)
-{
- /*
- * The VXLAN tunnel is a root of the graph. As such
- * it never has children and thus is never locked.
- */
- ASSERT (0);
-}
-
-/*
- * Virtual function table registered by VXLAN tunnels
- * for participation in the FIB object graph.
- */
-const static fib_node_vft_t vxlan_vft = {
- .fnv_get = vxlan_tunnel_fib_node_get,
- .fnv_last_lock = vxlan_tunnel_last_lock_gone,
- .fnv_back_walk = vxlan_tunnel_back_walk,
-};
-
-#define foreach_copy_field \
- _ (vni) \
- _ (mcast_sw_if_index) \
- _ (encap_fib_index) \
- _ (decap_next_index) \
- _ (src) \
- _ (dst) \
- _ (src_port) \
- _ (dst_port)
-
-static void
-vxlan_rewrite (vxlan_tunnel_t * t, bool is_ip6)
-{
- union
- {
- ip4_vxlan_header_t h4;
- ip6_vxlan_header_t h6;
- } h;
- int len = is_ip6 ? sizeof h.h6 : sizeof h.h4;
-
- udp_header_t *udp;
- vxlan_header_t *vxlan;
- /* Fixed portion of the (outer) ip header */
-
- clib_memset (&h, 0, sizeof (h));
- if (!is_ip6)
- {
- ip4_header_t *ip = &h.h4.ip4;
- udp = &h.h4.udp, vxlan = &h.h4.vxlan;
- ip->ip_version_and_header_length = 0x45;
- ip->ttl = 254;
- ip->protocol = IP_PROTOCOL_UDP;
-
- ip->src_address = t->src.ip4;
- ip->dst_address = t->dst.ip4;
-
- /* we fix up the ip4 header length and checksum after-the-fact */
- ip->checksum = ip4_header_checksum (ip);
- }
- else
- {
- ip6_header_t *ip = &h.h6.ip6;
- udp = &h.h6.udp, vxlan = &h.h6.vxlan;
- ip->ip_version_traffic_class_and_flow_label =
- clib_host_to_net_u32 (6 << 28);
- ip->hop_limit = 255;
- ip->protocol = IP_PROTOCOL_UDP;
-
- ip->src_address = t->src.ip6;
- ip->dst_address = t->dst.ip6;
- }
-
- /* UDP header, randomize src port on something, maybe? */
- udp->src_port = clib_host_to_net_u16 (t->src_port);
- udp->dst_port = clib_host_to_net_u16 (t->dst_port);
-
- /* VXLAN header */
- vnet_set_vni_and_flags (vxlan, t->vni);
- vnet_rewrite_set_data (*t, &h, len);
-}
-
-static bool
-vxlan_decap_next_is_valid (vxlan_main_t * vxm, u32 is_ip6,
- u32 decap_next_index)
-{
- vlib_main_t *vm = vxm->vlib_main;
- u32 input_idx = (!is_ip6) ?
- vxlan4_input_node.index : vxlan6_input_node.index;
- vlib_node_runtime_t *r = vlib_node_get_runtime (vm, input_idx);
-
- return decap_next_index < r->n_next_nodes;
-}
-
-/* *INDENT-OFF* */
-typedef CLIB_PACKED(union
-{
- struct
- {
- fib_node_index_t mfib_entry_index;
- adj_index_t mcast_adj_index;
- };
- u64 as_u64;
-}) mcast_shared_t;
-/* *INDENT-ON* */
-
-static inline mcast_shared_t
-mcast_shared_get (ip46_address_t * ip)
-{
- ASSERT (ip46_address_is_multicast (ip));
- uword *p = hash_get_mem (vxlan_main.mcast_shared, ip);
- ALWAYS_ASSERT (p);
- mcast_shared_t ret = {.as_u64 = *p };
- return ret;
-}
-
-static inline void
-mcast_shared_add (ip46_address_t * dst, fib_node_index_t mfei, adj_index_t ai)
-{
- mcast_shared_t new_ep = {
- .mcast_adj_index = ai,
- .mfib_entry_index = mfei,
- };
-
- hash_set_mem_alloc (&vxlan_main.mcast_shared, dst, new_ep.as_u64);
-}
-
-static inline void
-mcast_shared_remove (ip46_address_t * dst)
-{
- mcast_shared_t ep = mcast_shared_get (dst);
-
- adj_unlock (ep.mcast_adj_index);
- mfib_table_entry_delete_index (ep.mfib_entry_index, MFIB_SOURCE_VXLAN);
-
- hash_unset_mem_free (&vxlan_main.mcast_shared, dst);
-}
-
-int vnet_vxlan_add_del_tunnel
- (vnet_vxlan_add_del_tunnel_args_t * a, u32 * sw_if_indexp)
-{
- vxlan_main_t *vxm = &vxlan_main;
- vnet_main_t *vnm = vxm->vnet_main;
- vxlan_decap_info_t *p;
- u32 sw_if_index = ~0;
- vxlan4_tunnel_key_t key4;
- vxlan6_tunnel_key_t key6;
- u32 is_ip6 = a->is_ip6;
- vlib_main_t *vm = vlib_get_main ();
- u8 hw_addr[6];
-
- /* Set udp-ports */
- if (a->src_port == 0)
- a->src_port = is_ip6 ? UDP_DST_PORT_vxlan6 : UDP_DST_PORT_vxlan;
-
- if (a->dst_port == 0)
- a->dst_port = is_ip6 ? UDP_DST_PORT_vxlan6 : UDP_DST_PORT_vxlan;
-
- int not_found;
- if (!is_ip6)
- {
- /* ip4 mcast is indexed by mcast addr only */
- key4.key[0] = ip46_address_is_multicast (&a->dst) ?
- a->dst.ip4.as_u32 :
- a->dst.ip4.as_u32 | (((u64) a->src.ip4.as_u32) << 32);
- key4.key[1] = ((u64) clib_host_to_net_u16 (a->src_port) << 48) |
- (((u64) a->encap_fib_index) << 32) |
- clib_host_to_net_u32 (a->vni << 8);
- not_found =
- clib_bihash_search_inline_16_8 (&vxm->vxlan4_tunnel_by_key, &key4);
- p = (void *) &key4.value;
- }
- else
- {
- key6.key[0] = a->dst.ip6.as_u64[0];
- key6.key[1] = a->dst.ip6.as_u64[1];
- key6.key[2] = (((u64) clib_host_to_net_u16 (a->src_port) << 48) |
- ((u64) a->encap_fib_index) << 32) |
- clib_host_to_net_u32 (a->vni << 8);
- not_found =
- clib_bihash_search_inline_24_8 (&vxm->vxlan6_tunnel_by_key, &key6);
- p = (void *) &key6.value;
- }
-
- if (not_found)
- p = 0;
-
- if (a->is_add)
- {
- l2input_main_t *l2im = &l2input_main;
- u32 dev_instance; /* real dev instance tunnel index */
- u32 user_instance; /* request and actual instance number */
-
- /* adding a tunnel: tunnel must not already exist */
- if (p)
- return VNET_API_ERROR_TUNNEL_EXIST;
-
- /*if not set explicitly, default to l2 */
- if (a->decap_next_index == ~0)
- a->decap_next_index = VXLAN_INPUT_NEXT_L2_INPUT;
- if (!vxlan_decap_next_is_valid (vxm, is_ip6, a->decap_next_index))
- return VNET_API_ERROR_INVALID_DECAP_NEXT;
-
- vxlan_tunnel_t *t;
- pool_get_aligned (vxm->tunnels, t, CLIB_CACHE_LINE_BYTES);
- clib_memset (t, 0, sizeof (*t));
- dev_instance = t - vxm->tunnels;
-
- /* copy from arg structure */
-#define _(x) t->x = a->x;
- foreach_copy_field;
-#undef _
-
- vxlan_rewrite (t, is_ip6);
- /*
- * Reconcile the real dev_instance and a possible requested instance.
- */
- user_instance = a->instance;
- if (user_instance == ~0)
- user_instance = dev_instance;
- if (hash_get (vxm->instance_used, user_instance))
- {
- pool_put (vxm->tunnels, t);
- return VNET_API_ERROR_INSTANCE_IN_USE;
- }
-
- hash_set (vxm->instance_used, user_instance, 1);
-
- t->dev_instance = dev_instance; /* actual */
- t->user_instance = user_instance; /* name */
- t->flow_index = ~0;
-
- if (a->is_l3)
- t->hw_if_index =
- vnet_register_interface (vnm, vxlan_device_class.index, dev_instance,
- vxlan_hw_class.index, dev_instance);
- else
- {
- vnet_eth_interface_registration_t eir = {};
- f64 now = vlib_time_now (vm);
- u32 rnd;
- rnd = (u32) (now * 1e6);
- rnd = random_u32 (&rnd);
- memcpy (hw_addr + 2, &rnd, sizeof (rnd));
- hw_addr[0] = 2;
- hw_addr[1] = 0xfe;
-
- eir.dev_class_index = vxlan_device_class.index;
- eir.dev_instance = dev_instance;
- eir.address = hw_addr;
- eir.cb.flag_change = vxlan_eth_flag_change;
- eir.cb.set_max_frame_size = vxlan_eth_set_max_frame_size;
- t->hw_if_index = vnet_eth_register_interface (vnm, &eir);
- }
-
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, t->hw_if_index);
-
- /* Set vxlan tunnel output node */
- u32 encap_index = !is_ip6 ?
- vxlan4_encap_node.index : vxlan6_encap_node.index;
- vnet_set_interface_output_node (vnm, t->hw_if_index, encap_index);
-
- t->sw_if_index = sw_if_index = hi->sw_if_index;
-
- /* copy the key */
- int add_failed;
- if (is_ip6)
- {
- key6.value = (u64) dev_instance;
- add_failed = clib_bihash_add_del_24_8 (&vxm->vxlan6_tunnel_by_key,
- &key6, 1 /*add */ );
- }
- else
- {
- vxlan_decap_info_t di = {.sw_if_index = t->sw_if_index, };
- if (ip46_address_is_multicast (&t->dst))
- di.local_ip = t->src.ip4;
- else
- di.next_index = t->decap_next_index;
- key4.value = di.as_u64;
- add_failed = clib_bihash_add_del_16_8 (&vxm->vxlan4_tunnel_by_key,
- &key4, 1 /*add */ );
- }
-
- if (add_failed)
- {
- if (a->is_l3)
- vnet_delete_hw_interface (vnm, t->hw_if_index);
- else
- ethernet_delete_interface (vnm, t->hw_if_index);
- hash_unset (vxm->instance_used, t->user_instance);
- pool_put (vxm->tunnels, t);
- return VNET_API_ERROR_INVALID_REGISTRATION;
- }
-
- vec_validate_init_empty (vxm->tunnel_index_by_sw_if_index, sw_if_index,
- ~0);
- vxm->tunnel_index_by_sw_if_index[sw_if_index] = dev_instance;
-
- /* setup l2 input config with l2 feature and bd 0 to drop packet */
- vec_validate (l2im->configs, sw_if_index);
- l2im->configs[sw_if_index].feature_bitmap = L2INPUT_FEAT_DROP;
- l2im->configs[sw_if_index].bd_index = 0;
-
- vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
- si->flags &= ~VNET_SW_INTERFACE_FLAG_HIDDEN;
- vnet_sw_interface_set_flags (vnm, sw_if_index,
- VNET_SW_INTERFACE_FLAG_ADMIN_UP);
-
- fib_node_init (&t->node, FIB_NODE_TYPE_VXLAN_TUNNEL);
- fib_prefix_t tun_dst_pfx;
- vnet_flood_class_t flood_class = VNET_FLOOD_CLASS_TUNNEL_NORMAL;
-
- fib_prefix_from_ip46_addr (&t->dst, &tun_dst_pfx);
- if (!ip46_address_is_multicast (&t->dst))
- {
- /* Unicast tunnel -
- * source the FIB entry for the tunnel's destination
- * and become a child thereof. The tunnel will then get poked
- * when the forwarding for the entry updates, and the tunnel can
- * re-stack accordingly
- */
- vtep_addr_ref (&vxm->vtep_table, t->encap_fib_index, &t->src);
- t->fib_entry_index = fib_entry_track (t->encap_fib_index,
- &tun_dst_pfx,
- FIB_NODE_TYPE_VXLAN_TUNNEL,
- dev_instance,
- &t->sibling_index);
- vxlan_tunnel_restack_dpo (t);
- }
- else
- {
- /* Multicast tunnel -
- * as the same mcast group can be used for multiple mcast tunnels
- * with different VNIs, create the output fib adjacency only if
- * it does not already exist
- */
- fib_protocol_t fp = fib_ip_proto (is_ip6);
-
- if (vtep_addr_ref (&vxm->vtep_table,
- t->encap_fib_index, &t->dst) == 1)
- {
- fib_node_index_t mfei;
- adj_index_t ai;
- fib_route_path_t path = {
- .frp_proto = fib_proto_to_dpo (fp),
- .frp_addr = zero_addr,
- .frp_sw_if_index = 0xffffffff,
- .frp_fib_index = ~0,
- .frp_weight = 1,
- .frp_flags = FIB_ROUTE_PATH_LOCAL,
- .frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
- };
- const mfib_prefix_t mpfx = {
- .fp_proto = fp,
- .fp_len = (is_ip6 ? 128 : 32),
- .fp_grp_addr = tun_dst_pfx.fp_addr,
- };
-
- /*
- * Setup the (*,G) to receive traffic on the mcast group
- * - the forwarding interface is for-us
- * - the accepting interface is that from the API
- */
- mfib_table_entry_path_update (t->encap_fib_index, &mpfx,
- MFIB_SOURCE_VXLAN,
- MFIB_ENTRY_FLAG_NONE, &path);
-
- path.frp_sw_if_index = a->mcast_sw_if_index;
- path.frp_flags = FIB_ROUTE_PATH_FLAG_NONE;
- path.frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT;
- mfei = mfib_table_entry_path_update (
- t->encap_fib_index, &mpfx, MFIB_SOURCE_VXLAN,
- MFIB_ENTRY_FLAG_NONE, &path);
-
- /*
- * Create the mcast adjacency to send traffic to the group
- */
- ai = adj_mcast_add_or_lock (fp,
- fib_proto_to_link (fp),
- a->mcast_sw_if_index);
-
- /*
- * create a new end-point
- */
- mcast_shared_add (&t->dst, mfei, ai);
- }
-
- dpo_id_t dpo = DPO_INVALID;
- mcast_shared_t ep = mcast_shared_get (&t->dst);
-
- /* Stack shared mcast dst mac addr rewrite on encap */
- dpo_set (&dpo, DPO_ADJACENCY_MCAST,
- fib_proto_to_dpo (fp), ep.mcast_adj_index);
-
- dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
- dpo_reset (&dpo);
- flood_class = VNET_FLOOD_CLASS_TUNNEL_MASTER;
- }
-
- vnet_get_sw_interface (vnet_get_main (), sw_if_index)->flood_class =
- flood_class;
- }
- else
- {
- /* deleting a tunnel: tunnel must exist */
- if (!p)
- return VNET_API_ERROR_NO_SUCH_ENTRY;
-
- u32 instance = is_ip6 ? key6.value :
- vxm->tunnel_index_by_sw_if_index[p->sw_if_index];
- vxlan_tunnel_t *t = pool_elt_at_index (vxm->tunnels, instance);
-
- sw_if_index = t->sw_if_index;
- vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */ );
-
- vxm->tunnel_index_by_sw_if_index[sw_if_index] = ~0;
-
- if (!is_ip6)
- clib_bihash_add_del_16_8 (&vxm->vxlan4_tunnel_by_key, &key4,
- 0 /*del */ );
- else
- clib_bihash_add_del_24_8 (&vxm->vxlan6_tunnel_by_key, &key6,
- 0 /*del */ );
-
- if (!ip46_address_is_multicast (&t->dst))
- {
- if (t->flow_index != ~0)
- vnet_flow_del (vnm, t->flow_index);
-
- vtep_addr_unref (&vxm->vtep_table, t->encap_fib_index, &t->src);
- fib_entry_untrack (t->fib_entry_index, t->sibling_index);
- }
- else if (vtep_addr_unref (&vxm->vtep_table,
- t->encap_fib_index, &t->dst) == 0)
- {
- mcast_shared_remove (&t->dst);
- }
-
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, t->hw_if_index);
- if (hw->dev_class_index == vxlan_device_class.index)
- vnet_delete_hw_interface (vnm, t->hw_if_index);
- else
- ethernet_delete_interface (vnm, t->hw_if_index);
- hash_unset (vxm->instance_used, t->user_instance);
-
- fib_node_deinit (&t->node);
- pool_put (vxm->tunnels, t);
- }
-
- if (sw_if_indexp)
- *sw_if_indexp = sw_if_index;
-
- if (a->is_add)
- {
- /* register udp ports */
- if (!is_ip6 && !udp_is_valid_dst_port (a->src_port, 1))
- udp_register_dst_port (vxm->vlib_main, a->src_port,
- vxlan4_input_node.index, 1);
- if (is_ip6 && !udp_is_valid_dst_port (a->src_port, 0))
- udp_register_dst_port (vxm->vlib_main, a->src_port,
- vxlan6_input_node.index, 0);
- }
-
- return 0;
-}
-
-static uword
-get_decap_next_for_node (u32 node_index, u32 ipv4_set)
-{
- vxlan_main_t *vxm = &vxlan_main;
- vlib_main_t *vm = vxm->vlib_main;
- uword input_node = (ipv4_set) ? vxlan4_input_node.index :
- vxlan6_input_node.index;
-
- return vlib_node_add_next (vm, input_node, node_index);
-}
-
-static uword
-unformat_decap_next (unformat_input_t * input, va_list * args)
-{
- u32 *result = va_arg (*args, u32 *);
- u32 ipv4_set = va_arg (*args, int);
- vxlan_main_t *vxm = &vxlan_main;
- vlib_main_t *vm = vxm->vlib_main;
- u32 node_index;
- u32 tmp;
-
- if (unformat (input, "l2"))
- *result = VXLAN_INPUT_NEXT_L2_INPUT;
- else if (unformat (input, "node %U", unformat_vlib_node, vm, &node_index))
- *result = get_decap_next_for_node (node_index, ipv4_set);
- else if (unformat (input, "%d", &tmp))
- *result = tmp;
- else
- return 0;
- return 1;
-}
-
-static clib_error_t *
-vxlan_add_del_tunnel_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- ip46_address_t src = ip46_address_initializer, dst =
- ip46_address_initializer;
- u8 is_add = 1;
- u8 src_set = 0;
- u8 dst_set = 0;
- u8 grp_set = 0;
- u8 ipv4_set = 0;
- u8 ipv6_set = 0;
- u8 is_l3 = 0;
- u32 instance = ~0;
- u32 encap_fib_index = 0;
- u32 mcast_sw_if_index = ~0;
- u32 decap_next_index = VXLAN_INPUT_NEXT_L2_INPUT;
- u32 vni = 0;
- u32 src_port = 0;
- u32 dst_port = 0;
- u32 table_id;
- clib_error_t *parse_error = NULL;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "del"))
- {
- is_add = 0;
- }
- else if (unformat (line_input, "instance %d", &instance))
- ;
- else if (unformat (line_input, "src %U",
- unformat_ip46_address, &src, IP46_TYPE_ANY))
- {
- src_set = 1;
- ip46_address_is_ip4 (&src) ? (ipv4_set = 1) : (ipv6_set = 1);
- }
- else if (unformat (line_input, "dst %U",
- unformat_ip46_address, &dst, IP46_TYPE_ANY))
- {
- dst_set = 1;
- ip46_address_is_ip4 (&dst) ? (ipv4_set = 1) : (ipv6_set = 1);
- }
- else if (unformat (line_input, "group %U %U",
- unformat_ip46_address, &dst, IP46_TYPE_ANY,
- unformat_vnet_sw_interface,
- vnet_get_main (), &mcast_sw_if_index))
- {
- grp_set = dst_set = 1;
- ip46_address_is_ip4 (&dst) ? (ipv4_set = 1) : (ipv6_set = 1);
- }
- else if (unformat (line_input, "encap-vrf-id %d", &table_id))
- {
- encap_fib_index =
- fib_table_find (fib_ip_proto (ipv6_set), table_id);
- }
- else if (unformat (line_input, "l3"))
- is_l3 = 1;
- else if (unformat (line_input, "decap-next %U", unformat_decap_next,
- &decap_next_index, ipv4_set))
- ;
- else if (unformat (line_input, "vni %d", &vni))
- ;
- else if (unformat (line_input, "src_port %d", &src_port))
- ;
- else if (unformat (line_input, "dst_port %d", &dst_port))
- ;
- else
- {
- parse_error = clib_error_return (0, "parse error: '%U'",
- format_unformat_error, line_input);
- break;
- }
- }
-
- unformat_free (line_input);
-
- if (parse_error)
- return parse_error;
-
- if (is_l3 && decap_next_index == VXLAN_INPUT_NEXT_L2_INPUT)
- {
- vlib_node_t *node = vlib_get_node_by_name (
- vm, (u8 *) (ipv4_set ? "ip4-input" : "ip6-input"));
- decap_next_index = get_decap_next_for_node (node->index, ipv4_set);
- }
-
- if (encap_fib_index == ~0)
- return clib_error_return (0, "nonexistent encap-vrf-id %d", table_id);
-
- if (src_set == 0)
- return clib_error_return (0, "tunnel src address not specified");
-
- if (dst_set == 0)
- return clib_error_return (0, "tunnel dst address not specified");
-
- if (grp_set && !ip46_address_is_multicast (&dst))
- return clib_error_return (0, "tunnel group address not multicast");
-
- if (grp_set == 0 && ip46_address_is_multicast (&dst))
- return clib_error_return (0, "dst address must be unicast");
-
- if (grp_set && mcast_sw_if_index == ~0)
- return clib_error_return (0, "tunnel nonexistent multicast device");
-
- if (ipv4_set && ipv6_set)
- return clib_error_return (0, "both IPv4 and IPv6 addresses specified");
-
- if (ip46_address_cmp (&src, &dst) == 0)
- return clib_error_return (0, "src and dst addresses are identical");
-
- if (decap_next_index == ~0)
- return clib_error_return (0, "next node not found");
-
- if (vni == 0)
- return clib_error_return (0, "vni not specified");
-
- if (vni >> 24)
- return clib_error_return (0, "vni %d out of range", vni);
-
- vnet_vxlan_add_del_tunnel_args_t a = { .is_add = is_add,
- .is_ip6 = ipv6_set,
- .is_l3 = is_l3,
- .instance = instance,
-#define _(x) .x = x,
- foreach_copy_field
-#undef _
- };
-
- u32 tunnel_sw_if_index;
- int rv = vnet_vxlan_add_del_tunnel (&a, &tunnel_sw_if_index);
-
- switch (rv)
- {
- case 0:
- if (is_add)
- vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
- vnet_get_main (), tunnel_sw_if_index);
- break;
-
- case VNET_API_ERROR_TUNNEL_EXIST:
- return clib_error_return (0, "tunnel already exists...");
-
- case VNET_API_ERROR_NO_SUCH_ENTRY:
- return clib_error_return (0, "tunnel does not exist...");
-
- case VNET_API_ERROR_INSTANCE_IN_USE:
- return clib_error_return (0, "Instance is in use");
-
- default:
- return clib_error_return
- (0, "vnet_vxlan_add_del_tunnel returned %d", rv);
- }
-
- return 0;
-}
-
-/*?
- * Add or delete a VXLAN Tunnel.
- *
- * VXLAN provides the features needed to allow L2 bridge domains (BDs)
- * to span multiple servers. This is done by building an L2 overlay on
- * top of an L3 network underlay using VXLAN tunnels.
- *
- * This makes it possible for servers to be co-located in the same data
- * center or be separated geographically as long as they are reachable
- * through the underlay L3 network.
- *
- * You can refer to this kind of L2 overlay bridge domain as a VXLAN
- * (Virtual eXtensible VLAN) segment.
- *
- * @cliexpar
- * Example of how to create a VXLAN Tunnel:
- * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 encap-vrf-id
- 7}
- * Example of how to create a VXLAN Tunnel with a known name, vxlan_tunnel42:
- * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 instance 42}
- * Example of how to create a multicast VXLAN Tunnel with a known name,
- vxlan_tunnel23:
- * @cliexcmd{create vxlan tunnel src 10.0.3.1 group 239.1.1.1
- GigabitEthernet0/8/0 instance 23}
- * Example of how to create a VXLAN Tunnel with custom udp-ports:
- * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 src_port
- 59000 dst_port 59001}
- * Example of how to delete a VXLAN Tunnel:
- * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 del}
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (create_vxlan_tunnel_command, static) = {
- .path = "create vxlan tunnel",
- .short_help =
- "create vxlan tunnel src <local-vtep-addr>"
- " {dst <remote-vtep-addr>|group <mcast-vtep-addr> <intf-name>} vni <nn>"
- " [instance <id>]"
- " [encap-vrf-id <nn>] [decap-next [l2|node <name>]] [del] [l3]"
- " [src_port <local-vtep-udp-port>] [dst_port <remote-vtep-udp-port>]",
- .function = vxlan_add_del_tunnel_command_fn,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-show_vxlan_tunnel_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- vxlan_main_t *vxm = &vxlan_main;
- vxlan_tunnel_t *t;
- int raw = 0;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "raw"))
- raw = 1;
- else
- return clib_error_return (0, "parse error: '%U'",
- format_unformat_error, input);
- }
-
- if (pool_elts (vxm->tunnels) == 0)
- vlib_cli_output (vm, "No vxlan tunnels configured...");
-
-/* *INDENT-OFF* */
- pool_foreach (t, vxm->tunnels)
- {
- vlib_cli_output (vm, "%U", format_vxlan_tunnel, t);
- }
-/* *INDENT-ON* */
-
- if (raw)
- {
- vlib_cli_output (vm, "Raw IPv4 Hash Table:\n%U\n",
- format_bihash_16_8, &vxm->vxlan4_tunnel_by_key,
- 1 /* verbose */ );
- vlib_cli_output (vm, "Raw IPv6 Hash Table:\n%U\n",
- format_bihash_24_8, &vxm->vxlan6_tunnel_by_key,
- 1 /* verbose */ );
- }
-
- return 0;
-}
-
-/*?
- * Display all the VXLAN Tunnel entries.
- *
- * @cliexpar
- * Example of how to display the VXLAN Tunnel entries:
- * @cliexstart{show vxlan tunnel}
- * [0] src 10.0.3.1 dst 10.0.3.3 src_port 4789 dst_port 4789 vni 13
- encap_fib_index 0 sw_if_index 5 decap_next l2
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (show_vxlan_tunnel_command, static) = {
- .path = "show vxlan tunnel",
- .short_help = "show vxlan tunnel [raw]",
- .function = show_vxlan_tunnel_command_fn,
-};
-/* *INDENT-ON* */
-
-
-void
-vnet_int_vxlan_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable)
-{
- vxlan_main_t *vxm = &vxlan_main;
-
- if (pool_is_free_index (vxm->vnet_main->interface_main.sw_interfaces,
- sw_if_index))
- return;
-
- is_enable = ! !is_enable;
-
- if (is_ip6)
- {
- if (clib_bitmap_get (vxm->bm_ip6_bypass_enabled_by_sw_if, sw_if_index)
- != is_enable)
- {
- vnet_feature_enable_disable ("ip6-unicast", "ip6-vxlan-bypass",
- sw_if_index, is_enable, 0, 0);
- vxm->bm_ip6_bypass_enabled_by_sw_if =
- clib_bitmap_set (vxm->bm_ip6_bypass_enabled_by_sw_if,
- sw_if_index, is_enable);
- }
- }
- else
- {
- if (clib_bitmap_get (vxm->bm_ip4_bypass_enabled_by_sw_if, sw_if_index)
- != is_enable)
- {
- vnet_feature_enable_disable ("ip4-unicast", "ip4-vxlan-bypass",
- sw_if_index, is_enable, 0, 0);
- vxm->bm_ip4_bypass_enabled_by_sw_if =
- clib_bitmap_set (vxm->bm_ip4_bypass_enabled_by_sw_if,
- sw_if_index, is_enable);
- }
- }
-}
-
-
-static clib_error_t *
-set_ip_vxlan_bypass (u32 is_ip6,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- vnet_main_t *vnm = vnet_get_main ();
- clib_error_t *error = 0;
- u32 sw_if_index, is_enable;
-
- sw_if_index = ~0;
- is_enable = 1;
-
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat_user
- (line_input, unformat_vnet_sw_interface, vnm, &sw_if_index))
- ;
- else if (unformat (line_input, "del"))
- is_enable = 0;
- else
- {
- error = unformat_parse_error (line_input);
- goto done;
- }
- }
-
- if (~0 == sw_if_index)
- {
- error = clib_error_return (0, "unknown interface `%U'",
- format_unformat_error, line_input);
- goto done;
- }
-
- vnet_int_vxlan_bypass_mode (sw_if_index, is_ip6, is_enable);
-
-done:
- unformat_free (line_input);
-
- return error;
-}
-
-static clib_error_t *
-set_ip4_vxlan_bypass (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- return set_ip_vxlan_bypass (0, input, cmd);
-}
-
-/*?
- * This command adds the 'ip4-vxlan-bypass' graph node for a given interface.
- * By adding the IPv4 vxlan-bypass graph node to an interface, the node checks
- * for and validate input vxlan packet and bypass ip4-lookup, ip4-local,
- * ip4-udp-lookup nodes to speedup vxlan packet forwarding. This node will
- * cause extra overhead to for non-vxlan packets which is kept at a minimum.
- *
- * @cliexpar
- * @parblock
- * Example of graph node before ip4-vxlan-bypass is enabled:
- * @cliexstart{show vlib graph ip4-vxlan-bypass}
- * Name Next Previous
- * ip4-vxlan-bypass error-drop [0]
- * vxlan4-input [1]
- * ip4-lookup [2]
- * @cliexend
- *
- * Example of how to enable ip4-vxlan-bypass on an interface:
- * @cliexcmd{set interface ip vxlan-bypass GigabitEthernet2/0/0}
- *
- * Example of graph node after ip4-vxlan-bypass is enabled:
- * @cliexstart{show vlib graph ip4-vxlan-bypass}
- * Name Next Previous
- * ip4-vxlan-bypass error-drop [0] ip4-input
- * vxlan4-input [1] ip4-input-no-checksum
- * ip4-lookup [2]
- * @cliexend
- *
- * Example of how to display the feature enabled on an interface:
- * @cliexstart{show ip interface features GigabitEthernet2/0/0}
- * IP feature paths configured on GigabitEthernet2/0/0...
- * ...
- * ipv4 unicast:
- * ip4-vxlan-bypass
- * ip4-lookup
- * ...
- * @cliexend
- *
- * Example of how to disable ip4-vxlan-bypass on an interface:
- * @cliexcmd{set interface ip vxlan-bypass GigabitEthernet2/0/0 del}
- * @endparblock
-?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (set_interface_ip_vxlan_bypass_command, static) = {
- .path = "set interface ip vxlan-bypass",
- .function = set_ip4_vxlan_bypass,
- .short_help = "set interface ip vxlan-bypass <interface> [del]",
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-set_ip6_vxlan_bypass (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- return set_ip_vxlan_bypass (1, input, cmd);
-}
-
-/*?
- * This command adds the 'ip6-vxlan-bypass' graph node for a given interface.
- * By adding the IPv6 vxlan-bypass graph node to an interface, the node checks
- * for and validate input vxlan packet and bypass ip6-lookup, ip6-local,
- * ip6-udp-lookup nodes to speedup vxlan packet forwarding. This node will
- * cause extra overhead to for non-vxlan packets which is kept at a minimum.
- *
- * @cliexpar
- * @parblock
- * Example of graph node before ip6-vxlan-bypass is enabled:
- * @cliexstart{show vlib graph ip6-vxlan-bypass}
- * Name Next Previous
- * ip6-vxlan-bypass error-drop [0]
- * vxlan6-input [1]
- * ip6-lookup [2]
- * @cliexend
- *
- * Example of how to enable ip6-vxlan-bypass on an interface:
- * @cliexcmd{set interface ip6 vxlan-bypass GigabitEthernet2/0/0}
- *
- * Example of graph node after ip6-vxlan-bypass is enabled:
- * @cliexstart{show vlib graph ip6-vxlan-bypass}
- * Name Next Previous
- * ip6-vxlan-bypass error-drop [0] ip6-input
- * vxlan6-input [1] ip4-input-no-checksum
- * ip6-lookup [2]
- * @cliexend
- *
- * Example of how to display the feature enabled on an interface:
- * @cliexstart{show ip interface features GigabitEthernet2/0/0}
- * IP feature paths configured on GigabitEthernet2/0/0...
- * ...
- * ipv6 unicast:
- * ip6-vxlan-bypass
- * ip6-lookup
- * ...
- * @cliexend
- *
- * Example of how to disable ip6-vxlan-bypass on an interface:
- * @cliexcmd{set interface ip6 vxlan-bypass GigabitEthernet2/0/0 del}
- * @endparblock
-?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (set_interface_ip6_vxlan_bypass_command, static) = {
- .path = "set interface ip6 vxlan-bypass",
- .function = set_ip6_vxlan_bypass,
- .short_help = "set interface ip6 vxlan-bypass <interface> [del]",
-};
-/* *INDENT-ON* */
-
-int
-vnet_vxlan_add_del_rx_flow (u32 hw_if_index, u32 t_index, int is_add)
-{
- vxlan_main_t *vxm = &vxlan_main;
- vxlan_tunnel_t *t = pool_elt_at_index (vxm->tunnels, t_index);
- vnet_main_t *vnm = vnet_get_main ();
- if (is_add)
- {
- if (t->flow_index == ~0)
- {
- vxlan_main_t *vxm = &vxlan_main;
- vnet_flow_t flow = {
- .actions =
- VNET_FLOW_ACTION_REDIRECT_TO_NODE | VNET_FLOW_ACTION_MARK |
- VNET_FLOW_ACTION_BUFFER_ADVANCE,
- .mark_flow_id = t->dev_instance + vxm->flow_id_start,
- .redirect_node_index = vxlan4_flow_input_node.index,
- .buffer_advance = sizeof (ethernet_header_t),
- .type = VNET_FLOW_TYPE_IP4_VXLAN,
- .ip4_vxlan = {
- .protocol.prot = IP_PROTOCOL_UDP,
- .src_addr.addr = t->dst.ip4,
- .dst_addr.addr = t->src.ip4,
- .src_addr.mask.as_u32 = ~0,
- .dst_addr.mask.as_u32 = ~0,
- .dst_port.port = t->src_port,
- .dst_port.mask = 0xFF,
- .vni = t->vni,
- }
- ,
- };
- vnet_flow_add (vnm, &flow, &t->flow_index);
- }
- return vnet_flow_enable (vnm, t->flow_index, hw_if_index);
- }
- /* flow index is removed when the tunnel is deleted */
- return vnet_flow_disable (vnm, t->flow_index, hw_if_index);
-}
-
-u32
-vnet_vxlan_get_tunnel_index (u32 sw_if_index)
-{
- vxlan_main_t *vxm = &vxlan_main;
-
- if (sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index))
- return ~0;
- return vxm->tunnel_index_by_sw_if_index[sw_if_index];
-}
-
-static clib_error_t *
-vxlan_offload_command_fn (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- vnet_main_t *vnm = vnet_get_main ();
- u32 rx_sw_if_index = ~0;
- u32 hw_if_index = ~0;
- int is_add = 1;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "hw %U", unformat_vnet_hw_interface, vnm,
- &hw_if_index))
- continue;
- if (unformat (line_input, "rx %U", unformat_vnet_sw_interface, vnm,
- &rx_sw_if_index))
- continue;
- if (unformat (line_input, "del"))
- {
- is_add = 0;
- continue;
- }
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
- }
-
- if (rx_sw_if_index == ~0)
- return clib_error_return (0, "missing rx interface");
- if (hw_if_index == ~0)
- return clib_error_return (0, "missing hw interface");
-
- u32 t_index = vnet_vxlan_get_tunnel_index (rx_sw_if_index);;
- if (t_index == ~0)
- return clib_error_return (0, "%U is not a vxlan tunnel",
- format_vnet_sw_if_index_name, vnm,
- rx_sw_if_index);
-
- vxlan_main_t *vxm = &vxlan_main;
- vxlan_tunnel_t *t = pool_elt_at_index (vxm->tunnels, t_index);
-
- if (!ip46_address_is_ip4 (&t->dst))
- return clib_error_return (0, "currently only IPV4 tunnels are supported");
-
- vnet_hw_interface_t *hw_if = vnet_get_hw_interface (vnm, hw_if_index);
- ip4_main_t *im = &ip4_main;
- u32 rx_fib_index =
- vec_elt (im->fib_index_by_sw_if_index, hw_if->sw_if_index);
-
- if (t->encap_fib_index != rx_fib_index)
- return clib_error_return (0, "interface/tunnel fib mismatch");
-
- if (vnet_vxlan_add_del_rx_flow (hw_if_index, t_index, is_add))
- return clib_error_return (0, "error %s flow",
- is_add ? "enabling" : "disabling");
-
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (vxlan_offload_command, static) = {
- .path = "set flow-offload vxlan",
- .short_help =
- "set flow-offload vxlan hw <interface-name> rx <tunnel-name> [del]",
- .function = vxlan_offload_command_fn,
-};
-/* *INDENT-ON* */
-
-#define VXLAN_HASH_NUM_BUCKETS (2 * 1024)
-#define VXLAN_HASH_MEMORY_SIZE (1 << 20)
-
-clib_error_t *
-vxlan_init (vlib_main_t * vm)
-{
- vxlan_main_t *vxm = &vxlan_main;
-
- vxm->vnet_main = vnet_get_main ();
- vxm->vlib_main = vm;
-
- vnet_flow_get_range (vxm->vnet_main, "vxlan", 1024 * 1024,
- &vxm->flow_id_start);
-
- vxm->bm_ip4_bypass_enabled_by_sw_if = 0;
- vxm->bm_ip6_bypass_enabled_by_sw_if = 0;
-
- /* initialize the ip6 hash */
- clib_bihash_init_16_8 (&vxm->vxlan4_tunnel_by_key, "vxlan4",
- VXLAN_HASH_NUM_BUCKETS, VXLAN_HASH_MEMORY_SIZE);
- clib_bihash_init_24_8 (&vxm->vxlan6_tunnel_by_key, "vxlan6",
- VXLAN_HASH_NUM_BUCKETS, VXLAN_HASH_MEMORY_SIZE);
- vxm->vtep_table = vtep_table_create ();
- vxm->mcast_shared = hash_create_mem (0,
- sizeof (ip46_address_t),
- sizeof (mcast_shared_t));
-
- fib_node_register_type (FIB_NODE_TYPE_VXLAN_TUNNEL, &vxlan_vft);
-
- return 0;
-}
-
-VLIB_INIT_FUNCTION (vxlan_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan/vxlan.h b/src/vnet/vxlan/vxlan.h
deleted file mode 100644
index fa47605e42d..00000000000
--- a/src/vnet/vxlan/vxlan.h
+++ /dev/null
@@ -1,242 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef included_vnet_vxlan_h
-#define included_vnet_vxlan_h
-
-#include <vppinfra/error.h>
-#include <vppinfra/hash.h>
-#include <vppinfra/bihash_16_8.h>
-#include <vppinfra/bihash_24_8.h>
-#include <vnet/vnet.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ip/vtep.h>
-#include <vnet/l2/l2_input.h>
-#include <vnet/l2/l2_output.h>
-#include <vnet/l2/l2_bd.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/vxlan/vxlan_packet.h>
-#include <vnet/ip/ip4_packet.h>
-#include <vnet/ip/ip6_packet.h>
-#include <vnet/udp/udp_packet.h>
-#include <vnet/dpo/dpo.h>
-#include <vnet/adj/adj_types.h>
-
-/* *INDENT-OFF* */
-typedef CLIB_PACKED (struct {
- ip4_header_t ip4; /* 20 bytes */
- udp_header_t udp; /* 8 bytes */
- vxlan_header_t vxlan; /* 8 bytes */
-}) ip4_vxlan_header_t;
-
-typedef CLIB_PACKED (struct {
- ip6_header_t ip6; /* 40 bytes */
- udp_header_t udp; /* 8 bytes */
- vxlan_header_t vxlan; /* 8 bytes */
-}) ip6_vxlan_header_t;
-/* *INDENT-ON* */
-
-/*
-* Key fields: remote ip, vni on incoming VXLAN packet
-* all fields in NET byte order
-*/
-typedef clib_bihash_kv_16_8_t vxlan4_tunnel_key_t;
-
-/*
-* Key fields: remote ip, vni and fib index on incoming VXLAN packet
-* ip, vni fields in NET byte order
-* fib index field in host byte order
-*/
-typedef clib_bihash_kv_24_8_t vxlan6_tunnel_key_t;
-
-typedef union
-{
- struct
- {
- u32 sw_if_index; /* unicast - input interface / mcast - stats interface */
- union
- {
- struct /* unicast action */
- {
- u16 next_index;
- u8 error;
- };
- ip4_address_t local_ip; /* used as dst ip for mcast pkts to assign them to unicast tunnel */
- };
- };
- u64 as_u64;
-} vxlan_decap_info_t;
-
-typedef struct
-{
- /* Required for pool_get_aligned */
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
-
- /* FIB DPO for IP forwarding of VXLAN encap packet */
- dpo_id_t next_dpo;
-
- /* vxlan VNI in HOST byte order */
- u32 vni;
-
- /* tunnel src and dst addresses */
- ip46_address_t src;
- ip46_address_t dst;
-
- /* udp-ports */
- u16 src_port;
- u16 dst_port;
-
- /* mcast packet output intfc index (used only if dst is mcast) */
- u32 mcast_sw_if_index;
-
- /* decap next index */
- u16 decap_next_index;
-
- /* The FIB index for src/dst addresses */
- u32 encap_fib_index;
-
- /* vnet intfc index */
- u32 sw_if_index;
- u32 hw_if_index;
-
- /**
- * Linkage into the FIB object graph
- */
- fib_node_t node;
-
- /*
- * The FIB entry for (depending on VXLAN tunnel is unicast or mcast)
- * sending unicast VXLAN encap packets or receiving mcast VXLAN packets
- */
- fib_node_index_t fib_entry_index;
- adj_index_t mcast_adj_index;
-
- /**
- * The tunnel is a child of the FIB entry for its destination. This is
- * so it receives updates when the forwarding information for that entry
- * changes.
- * The tunnels sibling index on the FIB entry's dependency list.
- */
- u32 sibling_index;
-
- u32 flow_index; /* infra flow index */
- u32 dev_instance; /* Real device instance in tunnel vector */
- u32 user_instance; /* Instance name being shown to user */
-
- VNET_DECLARE_REWRITE;
-} vxlan_tunnel_t;
-
-#define foreach_vxlan_input_next \
-_(DROP, "error-drop") \
-_(L2_INPUT, "l2-input")
-
-typedef enum
-{
-#define _(s,n) VXLAN_INPUT_NEXT_##s,
- foreach_vxlan_input_next
-#undef _
- VXLAN_INPUT_N_NEXT,
-} vxlan_input_next_t;
-
-typedef enum
-{
-#define vxlan_error(n,s) VXLAN_ERROR_##n,
-#include <vnet/vxlan/vxlan_error.def>
-#undef vxlan_error
- VXLAN_N_ERROR,
-} vxlan_input_error_t;
-
-typedef struct
-{
- /* vector of encap tunnel instances */
- vxlan_tunnel_t *tunnels;
-
- /* lookup tunnel by key */
- clib_bihash_16_8_t
- vxlan4_tunnel_by_key; /* keyed on ipv4.dst + src_port + fib + vni */
- clib_bihash_24_8_t
- vxlan6_tunnel_by_key; /* keyed on ipv6.dst + src_port + fib + vni */
-
- /* local VTEP IPs ref count used by vxlan-bypass node to check if
- received VXLAN packet DIP matches any local VTEP address */
- vtep_table_t vtep_table;
-
- /* mcast shared info */
- uword *mcast_shared; /* keyed on mcast ip46 addr */
-
- /* Mapping from sw_if_index to tunnel index */
- u32 *tunnel_index_by_sw_if_index;
-
- /* graph node state */
- uword *bm_ip4_bypass_enabled_by_sw_if;
- uword *bm_ip6_bypass_enabled_by_sw_if;
-
- /* convenience */
- vlib_main_t *vlib_main;
- vnet_main_t *vnet_main;
-
- /* Record used instances */
- uword *instance_used;
- u32 flow_id_start;
-
- /* cache for last 8 vxlan tunnel */
- vtep4_cache_t vtep4_u512;
-
-} vxlan_main_t;
-
-extern vxlan_main_t vxlan_main;
-
-extern vlib_node_registration_t vxlan4_input_node;
-extern vlib_node_registration_t vxlan6_input_node;
-extern vlib_node_registration_t vxlan4_encap_node;
-extern vlib_node_registration_t vxlan6_encap_node;
-extern vlib_node_registration_t vxlan4_flow_input_node;
-
-u8 *format_vxlan_encap_trace (u8 * s, va_list * args);
-
-typedef struct
-{
- u8 is_add;
-
- /* we normally use is_ip4, but since this adds to the
- * structure, this seems less of a breaking change */
- u8 is_ip6;
- u8 is_l3;
- u32 instance;
- ip46_address_t src, dst;
- u32 mcast_sw_if_index;
- u32 encap_fib_index;
- u32 decap_next_index;
- u32 vni;
- u16 src_port;
- u16 dst_port;
-} vnet_vxlan_add_del_tunnel_args_t;
-
-int vnet_vxlan_add_del_tunnel
- (vnet_vxlan_add_del_tunnel_args_t * a, u32 * sw_if_indexp);
-
-void vnet_int_vxlan_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable);
-
-int vnet_vxlan_add_del_rx_flow (u32 hw_if_index, u32 t_imdex, int is_add);
-
-u32 vnet_vxlan_get_tunnel_index (u32 sw_if_index);
-#endif /* included_vnet_vxlan_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan/vxlan_api.c b/src/vnet/vxlan/vxlan_api.c
deleted file mode 100644
index 56fd654951f..00000000000
--- a/src/vnet/vxlan/vxlan_api.c
+++ /dev/null
@@ -1,376 +0,0 @@
-/*
- *------------------------------------------------------------------
- * vxlan_api.c - vxlan api
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <vnet/vnet.h>
-#include <vlibmemory/api.h>
-
-#include <vnet/interface.h>
-#include <vnet/api_errno.h>
-#include <vnet/feature/feature.h>
-#include <vnet/vxlan/vxlan.h>
-#include <vnet/fib/fib_table.h>
-#include <vnet/ip/ip_types_api.h>
-#include <vnet/udp/udp_local.h>
-#include <vnet/format_fns.h>
-#include <vxlan/vxlan.api_enum.h>
-#include <vxlan/vxlan.api_types.h>
-
-static u16 msg_id_base;
-
-#define REPLY_MSG_ID_BASE msg_id_base
-#include <vlibapi/api_helper_macros.h>
-
-static void
-vl_api_vxlan_offload_rx_t_handler (vl_api_vxlan_offload_rx_t * mp)
-{
- vl_api_vxlan_offload_rx_reply_t *rmp;
- int rv = 0;
- u32 hw_if_index = ntohl (mp->hw_if_index);
- u32 sw_if_index = ntohl (mp->sw_if_index);
-
- if (!vnet_hw_interface_is_valid (vnet_get_main (), hw_if_index))
- {
- rv = VNET_API_ERROR_NO_SUCH_ENTRY;
- goto err;
- }
- VALIDATE_SW_IF_INDEX (mp);
-
- u32 t_index = vnet_vxlan_get_tunnel_index (sw_if_index);
- if (t_index == ~0)
- {
- rv = VNET_API_ERROR_INVALID_SW_IF_INDEX_2;
- goto err;
- }
-
- vxlan_main_t *vxm = &vxlan_main;
- vxlan_tunnel_t *t = pool_elt_at_index (vxm->tunnels, t_index);
- if (!ip46_address_is_ip4 (&t->dst))
- {
- rv = VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
- goto err;
- }
-
- vnet_main_t *vnm = vnet_get_main ();
- vnet_hw_interface_t *hw_if = vnet_get_hw_interface (vnm, hw_if_index);
- ip4_main_t *im = &ip4_main;
- u32 rx_fib_index =
- vec_elt (im->fib_index_by_sw_if_index, hw_if->sw_if_index);
-
- if (t->encap_fib_index != rx_fib_index)
- {
- rv = VNET_API_ERROR_NO_SUCH_FIB;
- goto err;
- }
-
- if (vnet_vxlan_add_del_rx_flow (hw_if_index, t_index, mp->enable))
- {
- rv = VNET_API_ERROR_UNSPECIFIED;
- goto err;
- }
- BAD_SW_IF_INDEX_LABEL;
-err:
-
- REPLY_MACRO (VL_API_VXLAN_OFFLOAD_RX_REPLY);
-}
-
-static void
- vl_api_sw_interface_set_vxlan_bypass_t_handler
- (vl_api_sw_interface_set_vxlan_bypass_t * mp)
-{
- vl_api_sw_interface_set_vxlan_bypass_reply_t *rmp;
- int rv = 0;
- u32 sw_if_index = ntohl (mp->sw_if_index);
-
- VALIDATE_SW_IF_INDEX (mp);
-
- vnet_int_vxlan_bypass_mode (sw_if_index, mp->is_ipv6, mp->enable);
- BAD_SW_IF_INDEX_LABEL;
-
- REPLY_MACRO (VL_API_SW_INTERFACE_SET_VXLAN_BYPASS_REPLY);
-}
-
-static int
-vxlan_add_del_tunnel_clean_input (vnet_vxlan_add_del_tunnel_args_t *a,
- u32 encap_vrf_id)
-{
- a->is_ip6 = !ip46_address_is_ip4 (&a->src);
-
- a->encap_fib_index = fib_table_find (fib_ip_proto (a->is_ip6), encap_vrf_id);
- if (a->encap_fib_index == ~0)
- {
- return VNET_API_ERROR_NO_SUCH_FIB;
- }
-
- if (ip46_address_is_ip4 (&a->src) != ip46_address_is_ip4 (&a->dst))
- {
- return VNET_API_ERROR_INVALID_VALUE;
- }
-
- /* Check src & dst are different */
- if (ip46_address_cmp (&a->dst, &a->src) == 0)
- {
- return VNET_API_ERROR_SAME_SRC_DST;
- }
- if (ip46_address_is_multicast (&a->dst) &&
- !vnet_sw_if_index_is_api_valid (a->mcast_sw_if_index))
- {
- return VNET_API_ERROR_INVALID_SW_IF_INDEX;
- }
- return 0;
-}
-
-static void
-vl_api_vxlan_add_del_tunnel_t_handler (vl_api_vxlan_add_del_tunnel_t *mp)
-{
- vl_api_vxlan_add_del_tunnel_reply_t *rmp;
- u32 sw_if_index = ~0;
- int rv = 0;
-
- vnet_vxlan_add_del_tunnel_args_t a = {
- .is_add = mp->is_add,
- .instance = ntohl (mp->instance),
- .mcast_sw_if_index = ntohl (mp->mcast_sw_if_index),
- .decap_next_index = ntohl (mp->decap_next_index),
- .vni = ntohl (mp->vni),
- };
- ip_address_decode (&mp->src_address, &a.src);
- ip_address_decode (&mp->dst_address, &a.dst);
-
- rv = vxlan_add_del_tunnel_clean_input (&a, ntohl (mp->encap_vrf_id));
- if (rv)
- goto out;
- a.dst_port = a.is_ip6 ? UDP_DST_PORT_vxlan6 : UDP_DST_PORT_vxlan,
- a.src_port = a.is_ip6 ? UDP_DST_PORT_vxlan6 : UDP_DST_PORT_vxlan,
- rv = vnet_vxlan_add_del_tunnel (&a, &sw_if_index);
-
-out:
- REPLY_MACRO2(VL_API_VXLAN_ADD_DEL_TUNNEL_REPLY,
- ({
- rmp->sw_if_index = ntohl (sw_if_index);
- }));
-}
-
-static void
-vl_api_vxlan_add_del_tunnel_v2_t_handler (vl_api_vxlan_add_del_tunnel_v2_t *mp)
-{
- vl_api_vxlan_add_del_tunnel_v2_reply_t *rmp;
- u32 sw_if_index = ~0;
- int rv = 0;
-
- vnet_vxlan_add_del_tunnel_args_t a = {
- .is_add = mp->is_add,
- .instance = ntohl (mp->instance),
- .mcast_sw_if_index = ntohl (mp->mcast_sw_if_index),
- .decap_next_index = ntohl (mp->decap_next_index),
- .vni = ntohl (mp->vni),
- .dst_port = ntohs (mp->dst_port),
- .src_port = ntohs (mp->src_port),
- };
-
- ip_address_decode (&mp->src_address, &a.src);
- ip_address_decode (&mp->dst_address, &a.dst);
-
- rv = vxlan_add_del_tunnel_clean_input (&a, ntohl (mp->encap_vrf_id));
- if (rv)
- goto out;
- rv = vnet_vxlan_add_del_tunnel (&a, &sw_if_index);
-out:
- REPLY_MACRO2 (VL_API_VXLAN_ADD_DEL_TUNNEL_V2_REPLY,
- ({ rmp->sw_if_index = ntohl (sw_if_index); }));
-}
-
-static void
-vl_api_vxlan_add_del_tunnel_v3_t_handler (vl_api_vxlan_add_del_tunnel_v3_t *mp)
-{
- vl_api_vxlan_add_del_tunnel_v3_reply_t *rmp;
- u32 sw_if_index = ~0;
- int rv = 0;
-
- vnet_vxlan_add_del_tunnel_args_t a = {
- .is_add = mp->is_add,
- .instance = ntohl (mp->instance),
- .mcast_sw_if_index = ntohl (mp->mcast_sw_if_index),
- .decap_next_index = ntohl (mp->decap_next_index),
- .vni = ntohl (mp->vni),
- .dst_port = ntohs (mp->dst_port),
- .src_port = ntohs (mp->src_port),
- .is_l3 = mp->is_l3,
- };
-
- ip_address_decode (&mp->src_address, &a.src);
- ip_address_decode (&mp->dst_address, &a.dst);
-
- rv = vxlan_add_del_tunnel_clean_input (&a, ntohl (mp->encap_vrf_id));
- if (rv)
- goto out;
- rv = vnet_vxlan_add_del_tunnel (&a, &sw_if_index);
-out:
- REPLY_MACRO2 (VL_API_VXLAN_ADD_DEL_TUNNEL_V3_REPLY,
- ({ rmp->sw_if_index = ntohl (sw_if_index); }));
-}
-
-static void send_vxlan_tunnel_details
- (vxlan_tunnel_t * t, vl_api_registration_t * reg, u32 context)
-{
- vl_api_vxlan_tunnel_details_t *rmp;
- ip4_main_t *im4 = &ip4_main;
- ip6_main_t *im6 = &ip6_main;
-
- rmp = vl_msg_api_alloc (sizeof (*rmp));
- clib_memset (rmp, 0, sizeof (*rmp));
- rmp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_VXLAN_TUNNEL_DETAILS);
-
- ip_address_encode (&t->src, IP46_TYPE_ANY, &rmp->src_address);
- ip_address_encode (&t->dst, IP46_TYPE_ANY, &rmp->dst_address);
-
- if (ip46_address_is_ip4 (&t->dst))
- rmp->encap_vrf_id = htonl (im4->fibs[t->encap_fib_index].ft_table_id);
- else
- rmp->encap_vrf_id = htonl (im6->fibs[t->encap_fib_index].ft_table_id);
-
- rmp->instance = htonl (t->user_instance);
- rmp->mcast_sw_if_index = htonl (t->mcast_sw_if_index);
- rmp->vni = htonl (t->vni);
- rmp->decap_next_index = htonl (t->decap_next_index);
- rmp->sw_if_index = htonl (t->sw_if_index);
- rmp->context = context;
-
- vl_api_send_msg (reg, (u8 *) rmp);
-}
-
-static void vl_api_vxlan_tunnel_dump_t_handler
- (vl_api_vxlan_tunnel_dump_t * mp)
-{
- vl_api_registration_t *reg;
- vxlan_main_t *vxm = &vxlan_main;
- vxlan_tunnel_t *t;
- u32 sw_if_index;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- sw_if_index = ntohl (mp->sw_if_index);
-
- if (~0 == sw_if_index)
- {
- pool_foreach (t, vxm->tunnels)
- send_vxlan_tunnel_details(t, reg, mp->context);
- }
- else
- {
- if ((sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index)) ||
- (~0 == vxm->tunnel_index_by_sw_if_index[sw_if_index]))
- {
- return;
- }
- t = &vxm->tunnels[vxm->tunnel_index_by_sw_if_index[sw_if_index]];
- send_vxlan_tunnel_details (t, reg, mp->context);
- }
-}
-
-static void
-send_vxlan_tunnel_v2_details (vxlan_tunnel_t *t, vl_api_registration_t *reg,
- u32 context)
-{
- vl_api_vxlan_tunnel_v2_details_t *rmp;
- ip4_main_t *im4 = &ip4_main;
- ip6_main_t *im6 = &ip6_main;
-
- rmp = vl_msg_api_alloc (sizeof (*rmp));
- clib_memset (rmp, 0, sizeof (*rmp));
- rmp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_VXLAN_TUNNEL_V2_DETAILS);
-
- ip_address_encode (&t->src, IP46_TYPE_ANY, &rmp->src_address);
- ip_address_encode (&t->dst, IP46_TYPE_ANY, &rmp->dst_address);
- rmp->src_port = htons (t->src_port);
- rmp->dst_port = htons (t->dst_port);
-
- if (ip46_address_is_ip4 (&t->dst))
- rmp->encap_vrf_id = htonl (im4->fibs[t->encap_fib_index].ft_table_id);
- else
- rmp->encap_vrf_id = htonl (im6->fibs[t->encap_fib_index].ft_table_id);
-
- rmp->instance = htonl (t->user_instance);
- rmp->mcast_sw_if_index = htonl (t->mcast_sw_if_index);
- rmp->vni = htonl (t->vni);
- rmp->decap_next_index = htonl (t->decap_next_index);
- rmp->sw_if_index = htonl (t->sw_if_index);
- rmp->context = context;
-
- vl_api_send_msg (reg, (u8 *) rmp);
-}
-
-static void
-vl_api_vxlan_tunnel_v2_dump_t_handler (vl_api_vxlan_tunnel_v2_dump_t *mp)
-{
- vl_api_registration_t *reg;
- vxlan_main_t *vxm = &vxlan_main;
- vxlan_tunnel_t *t;
- u32 sw_if_index;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- sw_if_index = ntohl (mp->sw_if_index);
-
- if (~0 == sw_if_index)
- {
- pool_foreach (t, vxm->tunnels)
- send_vxlan_tunnel_v2_details (t, reg, mp->context);
- }
- else
- {
- if ((sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index)) ||
- (~0 == vxm->tunnel_index_by_sw_if_index[sw_if_index]))
- {
- return;
- }
- t = &vxm->tunnels[vxm->tunnel_index_by_sw_if_index[sw_if_index]];
- send_vxlan_tunnel_v2_details (t, reg, mp->context);
- }
-}
-
-#include <vxlan/vxlan.api.c>
-static clib_error_t *
-vxlan_api_hookup (vlib_main_t * vm)
-{
- api_main_t *am = vlibapi_get_main ();
-
- vl_api_increase_msg_trace_size (am, VL_API_VXLAN_ADD_DEL_TUNNEL,
- 16 * sizeof (u32));
-
- /*
- * Set up the (msg_name, crc, message-id) table
- */
- msg_id_base = setup_message_id_table ();
-
- return 0;
-}
-
-VLIB_API_INIT_FUNCTION (vxlan_api_hookup);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan/vxlan_error.def b/src/vnet/vxlan/vxlan_error.def
deleted file mode 100644
index 17f905950f5..00000000000
--- a/src/vnet/vxlan/vxlan_error.def
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-vxlan_error (DECAPSULATED, "good packets decapsulated")
-vxlan_error (NO_SUCH_TUNNEL, "no such tunnel packets")
-vxlan_error (BAD_FLAGS, "packets with bad flags field in vxlan header")
diff --git a/src/vnet/vxlan/vxlan_packet.h b/src/vnet/vxlan/vxlan_packet.h
deleted file mode 100644
index d1d1ed813e5..00000000000
--- a/src/vnet/vxlan/vxlan_packet.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __included_vxlan_packet_h__
-#define __included_vxlan_packet_h__ 1
-
-/*
- * From RFC-7348
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * |R|R|R|R|I|R|R|R| Reserved |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | VXLAN Network Identifier (VNI) | Reserved |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *
- * VXLAN Header: This is an 8-byte field that has:
- *
- * - Flags (8 bits): where the I flag MUST be set to 1 for a valid
- * VXLAN Network ID (VNI). The other 7 bits (designated "R") are
- * reserved fields and MUST be set to zero on transmission and
- * ignored on receipt.
- *
- * - VXLAN Segment ID/VXLAN Network Identifier (VNI): this is a
- * 24-bit value used to designate the individual VXLAN overlay
- * network on which the communicating VMs are situated. VMs in
- * different VXLAN overlay networks cannot communicate with each
- * other.
- *
- * - Reserved fields (24 bits and 8 bits): MUST be set to zero on
- * transmission and ignored on receipt.
- *
- */
-
-typedef struct
-{
- u8 flags;
- u8 res1;
- u8 res2;
- u8 res3;
- u32 vni_reserved;
-} vxlan_header_t;
-
-#define VXLAN_FLAGS_I 0x08
-
-static inline u32
-vnet_get_vni (vxlan_header_t * h)
-{
- u32 vni_reserved_host_byte_order;
-
- vni_reserved_host_byte_order = clib_net_to_host_u32 (h->vni_reserved);
- return vni_reserved_host_byte_order >> 8;
-}
-
-static inline void
-vnet_set_vni_and_flags (vxlan_header_t * h, u32 vni)
-{
- h->vni_reserved = clib_host_to_net_u32 (vni << 8);
- *(u32 *) h = 0;
- h->flags = VXLAN_FLAGS_I;
-}
-
-#endif /* __included_vxlan_packet_h__ */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */