aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet')
-rw-r--r--src/vnet/CMakeLists.txt190
-rw-r--r--src/vnet/adj/adj.c19
-rw-r--r--src/vnet/adj/adj.h10
-rw-r--r--src/vnet/adj/adj_bfd.c53
-rw-r--r--src/vnet/adj/adj_dp.h42
-rw-r--r--src/vnet/adj/adj_glean.c121
-rw-r--r--src/vnet/adj/adj_internal.h19
-rw-r--r--src/vnet/adj/adj_mcast.c4
-rw-r--r--src/vnet/adj/adj_midchain.c82
-rw-r--r--src/vnet/adj/adj_midchain.h5
-rw-r--r--src/vnet/adj/adj_midchain_delegate.c25
-rw-r--r--src/vnet/adj/adj_midchain_node.c35
-rw-r--r--src/vnet/adj/adj_nbr.c60
-rw-r--r--src/vnet/adj/adj_nsh.c2
-rw-r--r--src/vnet/adj/rewrite.h6
-rw-r--r--src/vnet/api_errno.h179
-rw-r--r--src/vnet/arp/arp.api118
-rw-r--r--src/vnet/arp/arp.c249
-rw-r--r--src/vnet/arp/arp.h27
-rw-r--r--src/vnet/arp/arp_packet.h2
-rw-r--r--src/vnet/arp/arp_proxy.c40
-rw-r--r--src/vnet/bfd/bfd.api121
-rw-r--r--src/vnet/bfd/bfd_api.c29
-rw-r--r--src/vnet/bfd/bfd_api.h9
-rw-r--r--src/vnet/bfd/bfd_cli.c134
-rw-r--r--src/vnet/bfd/bfd_main.c373
-rw-r--r--src/vnet/bfd/bfd_main.h51
-rw-r--r--src/vnet/bfd/bfd_protocol.h10
-rw-r--r--src/vnet/bfd/bfd_udp.c555
-rw-r--r--src/vnet/bfd/bfd_udp.h14
-rw-r--r--src/vnet/bier/bier_update.c9
-rw-r--r--src/vnet/bonding/bond_api.c16
-rw-r--r--src/vnet/bonding/cli.c108
-rw-r--r--src/vnet/bonding/device.c383
-rw-r--r--src/vnet/bonding/node.c2
-rw-r--r--src/vnet/bonding/node.h3
-rw-r--r--src/vnet/buffer.h35
-rw-r--r--src/vnet/classify/classify.api24
-rw-r--r--src/vnet/classify/classify_api.c43
-rw-r--r--src/vnet/classify/flow_classify.c4
-rw-r--r--src/vnet/classify/flow_classify_node.c8
-rw-r--r--src/vnet/classify/in_out_acl.c6
-rw-r--r--src/vnet/classify/ip_classify.c8
-rw-r--r--src/vnet/classify/pcap_classify.h6
-rw-r--r--src/vnet/classify/policer_classify.c4
-rw-r--r--src/vnet/classify/trace_classify.h5
-rw-r--r--src/vnet/classify/vnet_classify.c136
-rw-r--r--src/vnet/classify/vnet_classify.h48
-rw-r--r--src/vnet/config.c31
-rw-r--r--src/vnet/config.h6
-rw-r--r--src/vnet/crypto/cli.c98
-rw-r--r--src/vnet/crypto/crypto.api21
-rw-r--r--src/vnet/crypto/crypto.c236
-rw-r--r--src/vnet/crypto/crypto.h114
-rw-r--r--src/vnet/crypto/crypto_api.c14
-rw-r--r--src/vnet/crypto/node.c38
-rw-r--r--src/vnet/dev/api.c275
-rw-r--r--src/vnet/dev/api.h68
-rw-r--r--src/vnet/dev/args.c237
-rw-r--r--src/vnet/dev/args.h74
-rw-r--r--src/vnet/dev/cli.c331
-rw-r--r--src/vnet/dev/config.c196
-rw-r--r--src/vnet/dev/counters.c132
-rw-r--r--src/vnet/dev/counters.h128
-rw-r--r--src/vnet/dev/dev.api86
-rw-r--r--src/vnet/dev/dev.c461
-rw-r--r--src/vnet/dev/dev.h753
-rw-r--r--src/vnet/dev/dev_api.c192
-rw-r--r--src/vnet/dev/dev_funcs.h332
-rw-r--r--src/vnet/dev/error.c54
-rw-r--r--src/vnet/dev/errors.h46
-rw-r--r--src/vnet/dev/format.c507
-rw-r--r--src/vnet/dev/handlers.c256
-rw-r--r--src/vnet/dev/log.h22
-rw-r--r--src/vnet/dev/mgmt.h10
-rw-r--r--src/vnet/dev/pci.c458
-rw-r--r--src/vnet/dev/pci.h80
-rw-r--r--src/vnet/dev/port.c748
-rw-r--r--src/vnet/dev/process.c474
-rw-r--r--src/vnet/dev/process.h10
-rw-r--r--src/vnet/dev/queue.c227
-rw-r--r--src/vnet/dev/runtime.c180
-rw-r--r--src/vnet/dev/types.h66
-rw-r--r--src/vnet/devices/af_packet/FEATURE.yaml15
-rw-r--r--src/vnet/devices/af_packet/af_packet.api137
-rw-r--r--src/vnet/devices/af_packet/af_packet.c607
-rw-r--r--src/vnet/devices/af_packet/af_packet.h111
-rw-r--r--src/vnet/devices/af_packet/af_packet_api.c183
-rw-r--r--src/vnet/devices/af_packet/cli.c277
-rw-r--r--src/vnet/devices/af_packet/device.c412
-rw-r--r--src/vnet/devices/af_packet/dir.dox29
-rw-r--r--src/vnet/devices/af_packet/node.c435
-rw-r--r--src/vnet/devices/devices.c50
-rw-r--r--src/vnet/devices/devices.h2
-rw-r--r--src/vnet/devices/netlink.c50
-rw-r--r--src/vnet/devices/netlink.h2
-rw-r--r--src/vnet/devices/pipe/pipe.c34
-rw-r--r--src/vnet/devices/pipe/pipe_api.c2
-rw-r--r--src/vnet/devices/tap/FEATURE.yaml2
-rw-r--r--src/vnet/devices/tap/cli.c24
-rw-r--r--src/vnet/devices/tap/tap.c130
-rw-r--r--src/vnet/devices/tap/tap.h3
-rw-r--r--src/vnet/devices/tap/tapv2.api80
-rw-r--r--src/vnet/devices/tap/tapv2_api.c95
-rw-r--r--src/vnet/devices/virtio/FEATURE.yaml4
-rw-r--r--src/vnet/devices/virtio/cli.c20
-rw-r--r--src/vnet/devices/virtio/device.c198
-rw-r--r--src/vnet/devices/virtio/node.c156
-rw-r--r--src/vnet/devices/virtio/pci.c264
-rw-r--r--src/vnet/devices/virtio/pci.h17
-rw-r--r--src/vnet/devices/virtio/vhost_user.api201
-rw-r--r--src/vnet/devices/virtio/vhost_user.c2615
-rw-r--r--src/vnet/devices/virtio/vhost_user.h388
-rw-r--r--src/vnet/devices/virtio/vhost_user_api.c351
-rw-r--r--src/vnet/devices/virtio/vhost_user_inline.h493
-rw-r--r--src/vnet/devices/virtio/vhost_user_input.c1473
-rw-r--r--src/vnet/devices/virtio/vhost_user_output.c1144
-rw-r--r--src/vnet/devices/virtio/virtio.api2
-rw-r--r--src/vnet/devices/virtio/virtio.c120
-rw-r--r--src/vnet/devices/virtio/virtio.h86
-rw-r--r--src/vnet/devices/virtio/virtio_api.c6
-rw-r--r--src/vnet/devices/virtio/virtio_buffering.h11
-rw-r--r--src/vnet/devices/virtio/virtio_inline.h18
-rw-r--r--src/vnet/devices/virtio/virtio_pci_legacy.c7
-rw-r--r--src/vnet/devices/virtio/virtio_pci_modern.c22
-rw-r--r--src/vnet/devices/virtio/virtio_pre_input.c160
-rw-r--r--src/vnet/devices/virtio/virtio_process.c4
-rw-r--r--src/vnet/devices/virtio/virtio_std.h82
-rw-r--r--src/vnet/dpo/dpo.c4
-rw-r--r--src/vnet/dpo/dpo.h2
-rw-r--r--src/vnet/dpo/dvr_dpo.c9
-rw-r--r--src/vnet/dpo/interface_rx_dpo.c7
-rw-r--r--src/vnet/dpo/interface_tx_dpo.c5
-rw-r--r--src/vnet/dpo/ip6_ll_dpo.c22
-rw-r--r--src/vnet/dpo/l3_proxy_dpo.c5
-rw-r--r--src/vnet/dpo/load_balance.c45
-rw-r--r--src/vnet/dpo/load_balance.h18
-rw-r--r--src/vnet/dpo/load_balance_map.c2
-rw-r--r--src/vnet/dpo/mpls_disposition.c58
-rw-r--r--src/vnet/dpo/mpls_label_dpo.c36
-rw-r--r--src/vnet/dpo/receive_dpo.c7
-rw-r--r--src/vnet/dpo/replicate_dpo.c16
-rw-r--r--src/vnet/dpo/replicate_dpo.h8
-rw-r--r--src/vnet/error.c54
-rw-r--r--src/vnet/error.h177
-rw-r--r--src/vnet/ethernet/arp_packet.h12
-rw-r--r--src/vnet/ethernet/ethernet.h34
-rw-r--r--src/vnet/ethernet/init.c4
-rw-r--r--src/vnet/ethernet/interface.c124
-rw-r--r--src/vnet/ethernet/mac_address.c6
-rw-r--r--src/vnet/ethernet/node.c82
-rw-r--r--src/vnet/ethernet/p2p_ethernet.c16
-rw-r--r--src/vnet/ethernet/p2p_ethernet_api.c2
-rw-r--r--src/vnet/ethernet/p2p_ethernet_input.c2
-rw-r--r--src/vnet/ethernet/packet.h2
-rw-r--r--src/vnet/feature/feature.c64
-rw-r--r--src/vnet/feature/feature.h115
-rw-r--r--src/vnet/feature/registration.c2
-rw-r--r--src/vnet/fib/fib.c2
-rw-r--r--src/vnet/fib/fib_api.c8
-rw-r--r--src/vnet/fib/fib_api.h2
-rw-r--r--src/vnet/fib/fib_attached_export.c1
-rw-r--r--src/vnet/fib/fib_bfd.c2
-rw-r--r--src/vnet/fib/fib_entry.c217
-rw-r--r--src/vnet/fib/fib_entry.h11
-rw-r--r--src/vnet/fib/fib_entry_src.c81
-rw-r--r--src/vnet/fib/fib_entry_src.h3
-rw-r--r--src/vnet/fib/fib_entry_src_interface.c81
-rw-r--r--src/vnet/fib/fib_node.c47
-rw-r--r--src/vnet/fib/fib_node.h32
-rw-r--r--src/vnet/fib/fib_path.c78
-rw-r--r--src/vnet/fib/fib_path.h2
-rw-r--r--src/vnet/fib/fib_path_ext.c5
-rw-r--r--src/vnet/fib/fib_path_ext.h2
-rw-r--r--src/vnet/fib/fib_path_list.c9
-rw-r--r--src/vnet/fib/fib_sas.c2
-rw-r--r--src/vnet/fib/fib_table.c45
-rw-r--r--src/vnet/fib/fib_table.h9
-rw-r--r--src/vnet/fib/fib_types.c37
-rw-r--r--src/vnet/fib/fib_types.h7
-rw-r--r--src/vnet/fib/fib_urpf_list.c7
-rw-r--r--src/vnet/fib/fib_walk.c2
-rw-r--r--src/vnet/fib/ip4_fib.c24
-rw-r--r--src/vnet/fib/ip6_fib.c7
-rw-r--r--src/vnet/fib/mpls_fib.c24
-rw-r--r--src/vnet/fib/mpls_fib.h2
-rw-r--r--src/vnet/flow/FEATURE.yaml4
-rw-r--r--src/vnet/flow/flow.api31
-rw-r--r--src/vnet/flow/flow.c2
-rw-r--r--src/vnet/flow/flow.h101
-rw-r--r--src/vnet/flow/flow_api.c97
-rw-r--r--src/vnet/flow/flow_cli.c204
-rw-r--r--src/vnet/flow/flow_types.api109
-rw-r--r--src/vnet/gre/FEATURE.yaml13
-rw-r--r--src/vnet/gre/error.def23
-rw-r--r--src/vnet/gre/gre.api110
-rw-r--r--src/vnet/gre/gre.c870
-rw-r--r--src/vnet/gre/gre.h443
-rw-r--r--src/vnet/gre/gre_api.c219
-rw-r--r--src/vnet/gre/interface.c840
-rw-r--r--src/vnet/gre/node.c598
-rw-r--r--src/vnet/gre/packet.h2
-rw-r--r--src/vnet/gre/pg.c86
-rw-r--r--src/vnet/gso/FEATURE.yaml2
-rw-r--r--src/vnet/gso/cli.c2
-rw-r--r--src/vnet/gso/gro_func.h16
-rw-r--r--src/vnet/gso/gso.h269
-rw-r--r--src/vnet/gso/gso.rst154
-rw-r--r--src/vnet/gso/hdr_offset_parser.h10
-rw-r--r--src/vnet/gso/node.c199
-rw-r--r--src/vnet/handoff.c130
-rw-r--r--src/vnet/hash/FEATURE.yaml2
-rw-r--r--src/vnet/hash/crc32_5tuple.c119
-rw-r--r--src/vnet/hash/handoff_eth.c (renamed from src/vnet/handoff.h)116
-rw-r--r--src/vnet/hash/hash.rst90
-rw-r--r--src/vnet/hash/hash_eth.c326
-rw-r--r--src/vnet/hdlc/hdlc.c2
-rw-r--r--src/vnet/hdlc/node.c2
-rw-r--r--src/vnet/interface.api132
-rw-r--r--src/vnet/interface.c163
-rw-r--r--src/vnet/interface.h143
-rw-r--r--src/vnet/interface/caps.c63
-rw-r--r--src/vnet/interface/monitor.c121
-rw-r--r--src/vnet/interface/runtime.c90
-rw-r--r--src/vnet/interface/rx_queue.c18
-rw-r--r--src/vnet/interface/stats.c84
-rw-r--r--src/vnet/interface/tx_queue.rst159
-rw-r--r--src/vnet/interface/tx_queue_funcs.h17
-rw-r--r--src/vnet/interface_api.c455
-rw-r--r--src/vnet/interface_cli.c379
-rw-r--r--src/vnet/interface_format.c28
-rw-r--r--src/vnet/interface_funcs.h46
-rw-r--r--src/vnet/interface_output.c376
-rw-r--r--src/vnet/interface_output.h31
-rw-r--r--src/vnet/interface_stats.c2
-rw-r--r--src/vnet/interface_test.c146
-rw-r--r--src/vnet/ip-neighbor/ip4_neighbor.c64
-rw-r--r--src/vnet/ip-neighbor/ip4_neighbor.h24
-rw-r--r--src/vnet/ip-neighbor/ip6_neighbor.c67
-rw-r--r--src/vnet/ip-neighbor/ip6_neighbor.h23
-rw-r--r--src/vnet/ip-neighbor/ip_neighbor.api115
-rw-r--r--src/vnet/ip-neighbor/ip_neighbor.c262
-rw-r--r--src/vnet/ip-neighbor/ip_neighbor.h8
-rw-r--r--src/vnet/ip-neighbor/ip_neighbor_api.c28
-rw-r--r--src/vnet/ip-neighbor/ip_neighbor_types.c62
-rw-r--r--src/vnet/ip-neighbor/ip_neighbor_types.h30
-rw-r--r--src/vnet/ip-neighbor/ip_neighbor_watch.c10
-rw-r--r--src/vnet/ip/icmp4.c49
-rw-r--r--src/vnet/ip/icmp4.h23
-rw-r--r--src/vnet/ip/icmp46_packet.h4
-rw-r--r--src/vnet/ip/icmp6.c234
-rw-r--r--src/vnet/ip/icmp6.h42
-rw-r--r--src/vnet/ip/ip.api874
-rw-r--r--src/vnet/ip/ip.c19
-rw-r--r--src/vnet/ip/ip.h7
-rw-r--r--src/vnet/ip/ip4.h3
-rw-r--r--src/vnet/ip/ip46_address.h2
-rw-r--r--src/vnet/ip/ip46_cli.c10
-rw-r--r--src/vnet/ip/ip4_error.h108
-rw-r--r--src/vnet/ip/ip4_forward.c115
-rw-r--r--src/vnet/ip/ip4_inlines.h26
-rw-r--r--src/vnet/ip/ip4_input.c13
-rw-r--r--src/vnet/ip/ip4_input.h19
-rw-r--r--src/vnet/ip/ip4_mtrie.c92
-rw-r--r--src/vnet/ip/ip4_mtrie.h22
-rw-r--r--src/vnet/ip/ip4_options.c9
-rw-r--r--src/vnet/ip/ip4_packet.h50
-rw-r--r--src/vnet/ip/ip4_punt_drop.c23
-rw-r--r--src/vnet/ip/ip4_source_and_port_range_check.c23
-rw-r--r--src/vnet/ip/ip4_to_ip6.h2
-rw-r--r--src/vnet/ip/ip6.h2
-rw-r--r--src/vnet/ip/ip6_error.h112
-rw-r--r--src/vnet/ip/ip6_format.c4
-rw-r--r--src/vnet/ip/ip6_forward.c118
-rw-r--r--src/vnet/ip/ip6_hop_by_hop.c18
-rw-r--r--src/vnet/ip/ip6_inlines.h103
-rw-r--r--src/vnet/ip/ip6_input.c12
-rw-r--r--src/vnet/ip/ip6_input.h2
-rw-r--r--src/vnet/ip/ip6_link.c25
-rw-r--r--src/vnet/ip/ip6_ll_table.c40
-rw-r--r--src/vnet/ip/ip6_ll_types.c6
-rw-r--r--src/vnet/ip/ip6_packet.h357
-rw-r--r--src/vnet/ip/ip6_punt_drop.c31
-rw-r--r--src/vnet/ip/ip6_to_ip4.h56
-rw-r--r--src/vnet/ip/ip_api.c84
-rw-r--r--src/vnet/ip/ip_checksum.c2
-rw-r--r--src/vnet/ip/ip_container_proxy.c6
-rw-r--r--src/vnet/ip/ip_flow_hash.h12
-rw-r--r--src/vnet/ip/ip_frag.c82
-rw-r--r--src/vnet/ip/ip_frag.h22
-rw-r--r--src/vnet/ip/ip_in_out_acl.c197
-rw-r--r--src/vnet/ip/ip_init.c2
-rw-r--r--src/vnet/ip/ip_interface.c18
-rw-r--r--src/vnet/ip/ip_interface.h5
-rwxr-xr-x[-rw-r--r--]src/vnet/ip/ip_packet.h94
-rw-r--r--src/vnet/ip/ip_path_mtu.c28
-rw-r--r--src/vnet/ip/ip_path_mtu.h3
-rw-r--r--src/vnet/ip/ip_path_mtu_node.c7
-rw-r--r--src/vnet/ip/ip_psh_cksum.h5
-rw-r--r--src/vnet/ip/ip_punt_drop.c5
-rw-r--r--src/vnet/ip/ip_test.c28
-rw-r--r--src/vnet/ip/ip_types.c32
-rw-r--r--src/vnet/ip/ip_types.h8
-rw-r--r--src/vnet/ip/lookup.c98
-rw-r--r--src/vnet/ip/lookup.h5
-rw-r--r--src/vnet/ip/punt.c64
-rw-r--r--src/vnet/ip/punt.h6
-rw-r--r--src/vnet/ip/punt_api.c2
-rw-r--r--src/vnet/ip/punt_node.c79
-rw-r--r--src/vnet/ip/reass/ip4_full_reass.c687
-rw-r--r--src/vnet/ip/reass/ip4_full_reass.h3
-rw-r--r--src/vnet/ip/reass/ip4_sv_reass.c486
-rw-r--r--src/vnet/ip/reass/ip4_sv_reass.h1
-rw-r--r--src/vnet/ip/reass/ip6_full_reass.c741
-rw-r--r--src/vnet/ip/reass/ip6_full_reass.h2
-rw-r--r--src/vnet/ip/reass/ip6_sv_reass.c309
-rw-r--r--src/vnet/ip/reass/ip6_sv_reass.h1
-rw-r--r--src/vnet/ip/reass/reassembly.rst221
-rw-r--r--src/vnet/ip/vtep.h6
-rw-r--r--src/vnet/ip6-nd/ip6_mld.c14
-rw-r--r--src/vnet/ip6-nd/ip6_nd.api130
-rw-r--r--src/vnet/ip6-nd/ip6_nd.c21
-rw-r--r--src/vnet/ip6-nd/ip6_nd_api.c173
-rw-r--r--src/vnet/ip6-nd/ip6_nd_inline.h13
-rw-r--r--src/vnet/ip6-nd/ip6_nd_proxy.c4
-rw-r--r--src/vnet/ip6-nd/ip6_nd_test.c57
-rw-r--r--src/vnet/ip6-nd/ip6_ra.c165
-rw-r--r--src/vnet/ip6-nd/ip6_ra.h111
-rw-r--r--src/vnet/ip6-nd/rd_cp.c16
-rw-r--r--src/vnet/ip6-nd/rd_cp_api.c1
-rw-r--r--src/vnet/ipfix-export/flow_api.c257
-rw-r--r--src/vnet/ipfix-export/flow_report.c651
-rw-r--r--src/vnet/ipfix-export/flow_report.h154
-rw-r--r--src/vnet/ipfix-export/flow_report_classify.c46
-rw-r--r--src/vnet/ipfix-export/flow_report_classify.h17
-rw-r--r--src/vnet/ipfix-export/ipfix_export.api74
-rw-r--r--src/vnet/ipip/ipip.c66
-rw-r--r--src/vnet/ipip/ipip_api.c52
-rw-r--r--src/vnet/ipip/ipip_cli.c12
-rw-r--r--src/vnet/ipip/node.c2
-rw-r--r--src/vnet/ipip/sixrd.c12
-rw-r--r--src/vnet/ipsec/ah.h57
-rw-r--r--src/vnet/ipsec/ah_decrypt.c122
-rw-r--r--src/vnet/ipsec/ah_encrypt.c49
-rw-r--r--src/vnet/ipsec/esp.h134
-rw-r--r--src/vnet/ipsec/esp_decrypt.c434
-rw-r--r--src/vnet/ipsec/esp_encrypt.c480
-rw-r--r--src/vnet/ipsec/ipsec.api473
-rw-r--r--src/vnet/ipsec/ipsec.c250
-rw-r--r--src/vnet/ipsec/ipsec.h79
-rw-r--r--src/vnet/ipsec/ipsec_api.c413
-rw-r--r--src/vnet/ipsec/ipsec_cli.c135
-rw-r--r--src/vnet/ipsec/ipsec_format.c216
-rw-r--r--src/vnet/ipsec/ipsec_handoff.c2
-rw-r--r--src/vnet/ipsec/ipsec_input.c561
-rw-r--r--src/vnet/ipsec/ipsec_itf.c43
-rw-r--r--src/vnet/ipsec/ipsec_itf.h2
-rw-r--r--src/vnet/ipsec/ipsec_output.c206
-rw-r--r--src/vnet/ipsec/ipsec_output.h489
-rw-r--r--src/vnet/ipsec/ipsec_punt.h3
-rw-r--r--src/vnet/ipsec/ipsec_sa.c375
-rw-r--r--src/vnet/ipsec/ipsec_sa.h549
-rw-r--r--src/vnet/ipsec/ipsec_spd.c144
-rw-r--r--src/vnet/ipsec/ipsec_spd.h33
-rw-r--r--src/vnet/ipsec/ipsec_spd_fp_lookup.h579
-rw-r--r--src/vnet/ipsec/ipsec_spd_policy.c855
-rw-r--r--src/vnet/ipsec/ipsec_spd_policy.h153
-rw-r--r--src/vnet/ipsec/ipsec_test.c160
-rw-r--r--src/vnet/ipsec/ipsec_tun.c150
-rw-r--r--src/vnet/ipsec/ipsec_tun.h1
-rw-r--r--src/vnet/ipsec/ipsec_tun_in.c135
-rw-r--r--src/vnet/ipsec/ipsec_types.api145
-rw-r--r--src/vnet/l2/feat_bitmap.c2
-rw-r--r--src/vnet/l2/l2.api49
-rw-r--r--src/vnet/l2/l2_api.c51
-rw-r--r--src/vnet/l2/l2_arp_term.c6
-rw-r--r--src/vnet/l2/l2_bd.c65
-rw-r--r--src/vnet/l2/l2_bd.h3
-rw-r--r--src/vnet/l2/l2_bvi.c25
-rw-r--r--src/vnet/l2/l2_classify.h1
-rw-r--r--src/vnet/l2/l2_efp_filter.c4
-rw-r--r--src/vnet/l2/l2_fib.c25
-rw-r--r--src/vnet/l2/l2_fib.h26
-rw-r--r--src/vnet/l2/l2_flood.c4
-rw-r--r--src/vnet/l2/l2_fwd.c9
-rw-r--r--src/vnet/l2/l2_in_out_acl.c6
-rw-r--r--src/vnet/l2/l2_in_out_feat_arc.c7
-rw-r--r--src/vnet/l2/l2_input.c12
-rw-r--r--src/vnet/l2/l2_input.h13
-rw-r--r--src/vnet/l2/l2_input_classify.c14
-rw-r--r--src/vnet/l2/l2_input_node.c17
-rw-r--r--src/vnet/l2/l2_input_vtr.c2
-rw-r--r--src/vnet/l2/l2_learn.c4
-rw-r--r--src/vnet/l2/l2_output.c5
-rw-r--r--src/vnet/l2/l2_output.h3
-rw-r--r--src/vnet/l2/l2_output_classify.c13
-rw-r--r--src/vnet/l2/l2_patch.c10
-rw-r--r--src/vnet/l2/l2_rw.c82
-rw-r--r--src/vnet/l2/l2_rw.h4
-rw-r--r--src/vnet/l2/l2_test.c19
-rw-r--r--src/vnet/l2/l2_uu_fwd.c2
-rw-r--r--src/vnet/l2/l2_vtr.c4
-rw-r--r--src/vnet/l2/l2_xcrw.c18
-rw-r--r--src/vnet/lawful-intercept/lawful_intercept.c124
-rw-r--r--src/vnet/lawful-intercept/lawful_intercept.h56
-rw-r--r--src/vnet/lawful-intercept/node.c288
-rw-r--r--src/vnet/llc/llc.c2
-rw-r--r--src/vnet/llc/node.c2
-rw-r--r--src/vnet/mfib/ip4_mfib.c36
-rw-r--r--src/vnet/mfib/mfib_entry.c9
-rw-r--r--src/vnet/mfib/mfib_entry_src.h2
-rw-r--r--src/vnet/mfib/mfib_entry_src_rr.c24
-rw-r--r--src/vnet/mfib/mfib_forward.c2
-rw-r--r--src/vnet/mfib/mfib_itf.c6
-rw-r--r--src/vnet/mfib/mfib_types.c4
-rw-r--r--src/vnet/misc.c9
-rw-r--r--src/vnet/mpls/error.def32
-rw-r--r--src/vnet/mpls/interface.c25
-rw-r--r--src/vnet/mpls/mpls.api122
-rw-r--r--src/vnet/mpls/mpls.c15
-rw-r--r--src/vnet/mpls/mpls.h23
-rw-r--r--src/vnet/mpls/mpls_api.c57
-rw-r--r--src/vnet/mpls/mpls_features.c1
-rw-r--r--src/vnet/mpls/mpls_input.c11
-rw-r--r--src/vnet/mpls/mpls_lookup.c244
-rw-r--r--src/vnet/mpls/mpls_output.c253
-rw-r--r--src/vnet/mpls/mpls_tunnel.c17
-rw-r--r--src/vnet/osi/node.c2
-rw-r--r--src/vnet/pg/cli.c38
-rw-r--r--src/vnet/pg/example.script10
-rw-r--r--src/vnet/pg/input.c30
-rw-r--r--src/vnet/pg/pg.api4
-rw-r--r--src/vnet/pg/pg.h6
-rw-r--r--src/vnet/pg/pg_api.c2
-rw-r--r--src/vnet/pg/stream.c26
-rw-r--r--src/vnet/policer/node_funcs.c80
-rw-r--r--src/vnet/policer/police.h8
-rw-r--r--src/vnet/policer/police_inlines.h2
-rw-r--r--src/vnet/policer/policer.api107
-rw-r--r--src/vnet/policer/policer.c558
-rw-r--r--src/vnet/policer/policer.h22
-rw-r--r--src/vnet/policer/policer.rst217
-rw-r--r--src/vnet/policer/policer_api.c412
-rw-r--r--src/vnet/policer/policer_types.api28
-rw-r--r--src/vnet/policer/xlate.c2
-rw-r--r--src/vnet/policer/xlate.h2
-rw-r--r--src/vnet/ppp/node.c2
-rw-r--r--src/vnet/ppp/ppp.c2
-rw-r--r--src/vnet/qos/qos_egress_map.c10
-rw-r--r--src/vnet/qos/qos_mark.c4
-rw-r--r--src/vnet/qos/qos_mark_node.c2
-rw-r--r--src/vnet/qos/qos_record.c4
-rw-r--r--src/vnet/qos/qos_record_node.c2
-rw-r--r--src/vnet/qos/qos_store.c4
-rw-r--r--src/vnet/qos/qos_store_node.c2
-rw-r--r--src/vnet/session/application.c261
-rw-r--r--src/vnet/session/application.h85
-rw-r--r--src/vnet/session/application_interface.c26
-rw-r--r--src/vnet/session/application_interface.h115
-rw-r--r--src/vnet/session/application_local.c576
-rw-r--r--src/vnet/session/application_local.h3
-rw-r--r--src/vnet/session/application_namespace.c43
-rw-r--r--src/vnet/session/application_namespace.h9
-rw-r--r--src/vnet/session/application_worker.c434
-rw-r--r--src/vnet/session/mma_template.h2
-rw-r--r--src/vnet/session/segment_manager.c391
-rw-r--r--src/vnet/session/segment_manager.h21
-rw-r--r--src/vnet/session/session.api75
-rw-r--r--src/vnet/session/session.c1034
-rw-r--r--src/vnet/session/session.h368
-rw-r--r--src/vnet/session/session_api.c660
-rw-r--r--src/vnet/session/session_cli.c151
-rw-r--r--src/vnet/session/session_debug.c125
-rw-r--r--src/vnet/session/session_debug.h203
-rw-r--r--src/vnet/session/session_input.c343
-rw-r--r--src/vnet/session/session_lookup.c159
-rw-r--r--src/vnet/session/session_lookup.h7
-rw-r--r--src/vnet/session/session_node.c665
-rw-r--r--src/vnet/session/session_rules_table.c16
-rw-r--r--src/vnet/session/session_rules_table.h8
-rw-r--r--src/vnet/session/session_table.c61
-rw-r--r--src/vnet/session/session_table.h4
-rw-r--r--src/vnet/session/session_test.c14
-rw-r--r--src/vnet/session/session_types.h151
-rw-r--r--src/vnet/session/transport.c309
-rw-r--r--src/vnet/session/transport.h34
-rw-r--r--src/vnet/session/transport_types.h54
-rw-r--r--src/vnet/snap/node.c2
-rw-r--r--src/vnet/snap/snap.h2
-rw-r--r--src/vnet/span/node.c4
-rw-r--r--src/vnet/span/span.c9
-rw-r--r--src/vnet/span/span_api.c2
-rw-r--r--src/vnet/srmpls/FEATURE.yaml9
-rwxr-xr-xsrc/vnet/srmpls/dir.dox22
-rw-r--r--src/vnet/srmpls/sr_doc.rst215
-rw-r--r--src/vnet/srmpls/sr_mpls.api124
-rw-r--r--src/vnet/srmpls/sr_mpls.h177
-rw-r--r--src/vnet/srmpls/sr_mpls_api.c237
-rw-r--r--src/vnet/srmpls/sr_mpls_policy.c921
-rw-r--r--src/vnet/srmpls/sr_mpls_steering.c905
-rw-r--r--src/vnet/srmpls/sr_mpls_test.c174
-rw-r--r--src/vnet/srp/node.c8
-rw-r--r--src/vnet/srp/packet.h3
-rw-r--r--[-rwxr-xr-x]src/vnet/srv6/dir.dox0
-rw-r--r--src/vnet/srv6/sr.api116
-rw-r--r--src/vnet/srv6/sr.h18
-rw-r--r--src/vnet/srv6/sr_api.c256
-rw-r--r--src/vnet/srv6/sr_localsid.c22
-rw-r--r--src/vnet/srv6/sr_packet.h18
-rw-r--r--src/vnet/srv6/sr_policy_rewrite.c180
-rw-r--r--src/vnet/srv6/sr_pt.api59
-rw-r--r--src/vnet/srv6/sr_pt.c281
-rw-r--r--src/vnet/srv6/sr_pt.h89
-rw-r--r--src/vnet/srv6/sr_pt_api.c97
-rw-r--r--src/vnet/srv6/sr_pt_node.c175
-rw-r--r--src/vnet/srv6/sr_steering.c13
-rw-r--r--src/vnet/srv6/sr_test.c23
-rw-r--r--src/vnet/syslog/syslog.c2
-rw-r--r--src/vnet/syslog/syslog_api.c4
-rw-r--r--src/vnet/tcp/tcp.c130
-rw-r--r--src/vnet/tcp/tcp.h22
-rw-r--r--src/vnet/tcp/tcp_bt.c2
-rw-r--r--src/vnet/tcp/tcp_cli.c18
-rw-r--r--src/vnet/tcp/tcp_cubic.c26
-rw-r--r--src/vnet/tcp/tcp_debug.c4
-rw-r--r--src/vnet/tcp/tcp_debug.h62
-rw-r--r--src/vnet/tcp/tcp_error.def1
-rw-r--r--src/vnet/tcp/tcp_format.c89
-rw-r--r--src/vnet/tcp/tcp_inlines.h44
-rw-r--r--src/vnet/tcp/tcp_input.c550
-rw-r--r--src/vnet/tcp/tcp_newreno.c43
-rw-r--r--src/vnet/tcp/tcp_output.c307
-rw-r--r--src/vnet/tcp/tcp_packet.h97
-rw-r--r--src/vnet/tcp/tcp_pg.c181
-rw-r--r--src/vnet/tcp/tcp_sack.c21
-rw-r--r--src/vnet/tcp/tcp_sack.h2
-rw-r--r--src/vnet/tcp/tcp_syn_filter4.c6
-rw-r--r--src/vnet/tcp/tcp_timer.h53
-rw-r--r--src/vnet/tcp/tcp_types.h2
-rw-r--r--src/vnet/teib/teib.c91
-rw-r--r--src/vnet/teib/teib_cli.c6
-rw-r--r--src/vnet/tls/tls.c380
-rw-r--r--src/vnet/tls/tls.h54
-rw-r--r--src/vnet/tunnel/tunnel.c27
-rw-r--r--src/vnet/tunnel/tunnel_types_api.c9
-rw-r--r--src/vnet/udp/udp.api2
-rw-r--r--src/vnet/udp/udp.c460
-rw-r--r--src/vnet/udp/udp.h119
-rw-r--r--src/vnet/udp/udp_api.c23
-rw-r--r--src/vnet/udp/udp_cli.c231
-rw-r--r--src/vnet/udp/udp_encap.c57
-rw-r--r--src/vnet/udp/udp_encap.h3
-rw-r--r--src/vnet/udp/udp_encap_node.c138
-rw-r--r--src/vnet/udp/udp_error.def3
-rw-r--r--src/vnet/udp/udp_inlines.h111
-rw-r--r--src/vnet/udp/udp_input.c76
-rw-r--r--src/vnet/udp/udp_local.c117
-rw-r--r--src/vnet/udp/udp_output.c254
-rw-r--r--src/vnet/unix/gdb_funcs.c66
-rw-r--r--src/vnet/unix/tuntap.c34
-rw-r--r--src/vnet/util/throttle.c5
-rw-r--r--src/vnet/util/throttle.h19
-rw-r--r--src/vnet/vnet.h2
-rw-r--r--src/vnet/vxlan-gbp/decap.c1050
-rw-r--r--src/vnet/vxlan-gbp/dir.dox24
-rw-r--r--src/vnet/vxlan-gbp/encap.c601
-rw-r--r--src/vnet/vxlan-gbp/vxlan_gbp.api100
-rw-r--r--src/vnet/vxlan-gbp/vxlan_gbp.c1193
-rw-r--r--src/vnet/vxlan-gbp/vxlan_gbp.h250
-rw-r--r--src/vnet/vxlan-gbp/vxlan_gbp_api.c217
-rw-r--r--src/vnet/vxlan-gbp/vxlan_gbp_error.def17
-rw-r--r--src/vnet/vxlan-gbp/vxlan_gbp_packet.c60
-rw-r--r--src/vnet/vxlan-gbp/vxlan_gbp_packet.h173
-rw-r--r--src/vnet/vxlan-gpe/decap.c8
-rw-r--r--src/vnet/vxlan-gpe/encap.c51
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe.c29
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe.h10
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe_api.c8
-rw-r--r--src/vnet/vxlan/FEATURE.yaml14
-rw-r--r--src/vnet/vxlan/decap.c1330
-rw-r--r--src/vnet/vxlan/dir.dox24
-rw-r--r--src/vnet/vxlan/encap.c540
-rw-r--r--src/vnet/vxlan/vxlan.api198
-rw-r--r--src/vnet/vxlan/vxlan.c1343
-rw-r--r--src/vnet/vxlan/vxlan.h244
-rw-r--r--src/vnet/vxlan/vxlan_api.c375
-rw-r--r--src/vnet/vxlan/vxlan_error.def17
-rw-r--r--src/vnet/vxlan/vxlan_packet.h80
588 files changed, 35667 insertions, 36689 deletions
diff --git a/src/vnet/CMakeLists.txt b/src/vnet/CMakeLists.txt
index 330d4a17360..eb74f5de84c 100644
--- a/src/vnet/CMakeLists.txt
+++ b/src/vnet/CMakeLists.txt
@@ -26,6 +26,22 @@ list(APPEND VNET_SOURCES
config.c
devices/devices.c
devices/netlink.c
+ dev/api.c
+ dev/args.c
+ dev/cli.c
+ dev/config.c
+ dev/counters.c
+ dev/dev.c
+ dev/dev_api.c
+ dev/error.c
+ dev/format.c
+ dev/handlers.c
+ dev/pci.c
+ dev/port.c
+ dev/process.c
+ dev/queue.c
+ dev/runtime.c
+ error.c
flow/flow.c
flow/flow_cli.c
flow/flow_api.c
@@ -35,9 +51,12 @@ list(APPEND VNET_SOURCES
interface_cli.c
interface_format.c
interface_output.c
+ interface/caps.c
interface/rx_queue.c
interface/tx_queue.c
interface/runtime.c
+ interface/monitor.c
+ interface/stats.c
interface_stats.c
misc.c
)
@@ -50,13 +69,14 @@ list(APPEND VNET_MULTIARCH_SOURCES
list(APPEND VNET_HEADERS
api_errno.h
+ error.h
buffer.h
config.h
devices/devices.h
devices/netlink.h
+ dev/dev.h
flow/flow.h
global_funcs.h
- handoff.h
interface/rx_queue_funcs.h
interface/tx_queue_funcs.h
interface.h
@@ -79,6 +99,7 @@ list(APPEND VNET_HEADERS
)
list(APPEND VNET_API_FILES
+ dev/dev.api
interface.api
interface_types.api
ip/ip_types.api
@@ -293,30 +314,6 @@ list(APPEND VNET_HEADERS
)
##############################################################################
-# Layer 2 / vxlan
-##############################################################################
-list(APPEND VNET_SOURCES
- vxlan/vxlan.c
- vxlan/encap.c
- vxlan/decap.c
- vxlan/vxlan_api.c
-)
-
-list(APPEND VNET_MULTIARCH_SOURCES
- vxlan/encap.c
-)
-
-list(APPEND VNET_HEADERS
- vxlan/vxlan.h
- vxlan/vxlan_packet.h
- vxlan/vxlan_error.def
-)
-
-list(APPEND VNET_MULTIARCH_SOURCES vxlan/decap.c)
-
-list(APPEND VNET_API_FILES vxlan/vxlan.api)
-
-##############################################################################
# Layer 2 / Bonding
##############################################################################
list(APPEND VNET_SOURCES
@@ -440,13 +437,11 @@ list(APPEND VNET_HEADERS
ip/icmp4.h
ip/icmp6.h
ip/igmp_packet.h
- ip/ip4_error.h
ip/ip4.h
ip/ip4_mtrie.h
ip/ip4_inlines.h
ip/ip4_packet.h
ip/ip46_address.h
- ip/ip6_error.h
ip/ip6.h
ip/ip6_hop_by_hop.h
ip/ip6_hop_by_hop_packet.h
@@ -466,6 +461,8 @@ list(APPEND VNET_HEADERS
ip/protocols.def
ip/punt_error.def
ip/punt.h
+ ip/reass/ip4_sv_reass.h
+ ip/reass/ip6_sv_reass.h
)
list(APPEND VNET_API_FILES
@@ -666,6 +663,7 @@ list(APPEND VNET_SOURCES
udp/udp_encap.c
udp/udp_decap.c
udp/udp_api.c
+ udp/udp_output.c
)
list(APPEND VNET_MULTIARCH_SOURCES
@@ -676,6 +674,7 @@ list(APPEND VNET_MULTIARCH_SOURCES
list(APPEND VNET_HEADERS
udp/udp_error.def
udp/udp.h
+ udp/udp_encap.h
udp/udp_packet.h
udp/udp_inlines.h
udp/udp_local.h
@@ -686,27 +685,10 @@ list(APPEND VNET_API_FILES udp/udp.api)
##############################################################################
# Tunnel protocol: gre
##############################################################################
-list(APPEND VNET_SOURCES
- gre/gre.c
- gre/node.c
- gre/interface.c
- gre/pg.c
- gre/gre_api.c
-)
-
-list(APPEND VNET_MULTIARCH_SOURCES
- gre/node.c
- gre/gre.c
-)
-
list(APPEND VNET_HEADERS
- gre/gre.h
gre/packet.h
- gre/error.def
)
-list(APPEND VNET_API_FILES gre/gre.api)
-
##############################################################################
# Tunnel protocol: ipip
##############################################################################
@@ -745,6 +727,7 @@ list(APPEND VNET_API_FILES
list(APPEND VNET_HEADERS
tunnel/tunnel.h
tunnel/tunnel_dp.h
+ tunnel/tunnel_types_api.h
)
##############################################################################
@@ -771,39 +754,15 @@ list(APPEND VNET_MULTIARCH_SOURCES
list(APPEND VNET_HEADERS
mpls/mpls.h
+ mpls/mpls_lookup.h
mpls/mpls_types.h
mpls/mpls_tunnel.h
mpls/packet.h
- mpls/error.def
)
list(APPEND VNET_API_FILES mpls/mpls.api)
##############################################################################
-# Tunnel protocol: vxlan-gbp
-##############################################################################
-list(APPEND VNET_SOURCES
- vxlan-gbp/decap.c
- vxlan-gbp/encap.c
- vxlan-gbp/vxlan_gbp_api.c
- vxlan-gbp/vxlan_gbp.c
- vxlan-gbp/vxlan_gbp_packet.c
-)
-
-list (APPEND VNET_MULTIARCH_SOURCES
- vxlan-gbp/decap.c
- vxlan-gbp/encap.c
-)
-
-list(APPEND VNET_HEADERS
- vxlan-gbp/vxlan_gbp.h
- vxlan-gbp/vxlan_gbp_packet.h
- vxlan-gbp/vxlan_gbp_error.def
-)
-
-list(APPEND VNET_API_FILES vxlan-gbp/vxlan_gbp.api)
-
-##############################################################################
# Tunnel protocol: vxlan-gpe
##############################################################################
@@ -836,34 +795,21 @@ list(APPEND VNET_SOURCES
srv6/sr_policy_rewrite.c
srv6/sr_steering.c
srv6/sr_api.c
+ srv6/sr_pt.c
+ srv6/sr_pt_node.c
+ srv6/sr_pt_api.c
)
list(APPEND VNET_HEADERS
srv6/sr_packet.h
srv6/sr.h
+ srv6/sr_pt.h
)
list(APPEND VNET_API_FILES
srv6/sr.api
srv6/sr_types.api
-)
-
-##############################################################################
-# mpls segment routing
-##############################################################################
-
-list(APPEND VNET_SOURCES
- srmpls/sr_mpls_policy.c
- srmpls/sr_mpls_steering.c
- srmpls/sr_mpls_api.c
-)
-
-list(APPEND VNET_HEADERS
- srmpls/sr_mpls.h
-)
-
-list(APPEND VNET_API_FILES
- srmpls/sr_mpls.api
+ srv6/sr_pt.api
)
##############################################################################
@@ -889,6 +835,8 @@ list(APPEND VNET_SOURCES
hash/hash.c
hash/cli.c
hash/crc32_5tuple.c
+ hash/handoff_eth.c
+ hash/hash_eth.c
)
list(APPEND VNET_HEADERS
@@ -929,23 +877,6 @@ list(APPEND VNET_HEADERS
)
##############################################################################
-# lawful intercept
-##############################################################################
-
-list(APPEND VNET_SOURCES
- lawful-intercept/lawful_intercept.c
- lawful-intercept/node.c
-)
-
-list(APPEND VNET_MULTIARCH_SOURCES
- lawful-intercept/node.c
-)
-
-list(APPEND VNET_HEADERS
- lawful-intercept/lawful_intercept.h
-)
-
-##############################################################################
# SPAN (port mirroring)
##############################################################################
@@ -996,15 +927,11 @@ list(APPEND VNET_SOURCES
devices/virtio/format.c
devices/virtio/node.c
devices/virtio/pci.c
- devices/virtio/vhost_user.c
- devices/virtio/vhost_user_input.c
- devices/virtio/vhost_user_output.c
- devices/virtio/vhost_user_api.c
devices/virtio/virtio.c
devices/virtio/virtio_api.c
devices/virtio/virtio_pci_legacy.c
devices/virtio/virtio_pci_modern.c
- devices/virtio/virtio_process.c
+ devices/virtio/virtio_pre_input.c
devices/virtio/virtio_types_api.c
)
@@ -1016,20 +943,15 @@ list(APPEND VNET_HEADERS
devices/virtio/virtio_pci_legacy.h
devices/virtio/virtio_pci_modern.h
devices/virtio/vhost_std.h
- devices/virtio/vhost_user.h
devices/virtio/virtio_types_api.h
)
list(APPEND VNET_MULTIARCH_SOURCES
- devices/virtio/vhost_user_input.c
- devices/virtio/vhost_user_output.c
devices/virtio/node.c
- devices/af_packet/node.c
devices/virtio/device.c
)
list(APPEND VNET_API_FILES
- devices/virtio/vhost_user.api
devices/virtio/virtio.api
devices/virtio/virtio_types.api
)
@@ -1038,6 +960,7 @@ list(APPEND VNET_API_FILES
# tap interface (with virtio backend)
##############################################################################
+if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux")
list(APPEND VNET_SOURCES
devices/tap/cli.c
devices/tap/tap.c
@@ -1051,6 +974,7 @@ list(APPEND VNET_HEADERS
list(APPEND VNET_API_FILES
devices/tap/tapv2.api
)
+endif()
##############################################################################
# tap interface (with virtio backend)
@@ -1078,6 +1002,7 @@ list(APPEND VNET_SOURCES
session/session_rules_table.c
session/session_lookup.c
session/session_node.c
+ session/session_input.c
session/transport.c
session/application.c
session/application_worker.c
@@ -1124,27 +1049,6 @@ list(APPEND VNET_HEADERS
tls/tls_test.h
)
-##############################################################################
-# Linux packet interface
-##############################################################################
-
-list(APPEND VNET_SOURCES
- devices/af_packet/af_packet.c
- devices/af_packet/device.c
- devices/af_packet/node.c
- devices/af_packet/cli.c
- devices/af_packet/af_packet_api.c
-)
-
-list(APPEND VNET_MULTIARCH_SOURCES
- devices/af_packet/device.c
-)
-
-list(APPEND VNET_HEADERS
- devices/af_packet/af_packet.h
-)
-
-list(APPEND VNET_API_FILES devices/af_packet/af_packet.api)
##############################################################################
# Driver feature graph arc support
@@ -1168,6 +1072,7 @@ list(APPEND VNET_API_FILES feature/feature.api)
# FIXME: unix/hgshm.c
+if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux")
list(APPEND VNET_SOURCES
unix/gdb_funcs.c
unix/tuntap.c
@@ -1176,6 +1081,7 @@ list(APPEND VNET_SOURCES
list(APPEND VNET_HEADERS
unix/tuntap.h
)
+endif()
##############################################################################
# FIB
@@ -1222,7 +1128,11 @@ list(APPEND VNET_SOURCES
list(APPEND VNET_HEADERS
fib/fib.h
fib/fib_api.h
+ fib/fib_entry_track.h
fib/ip4_fib.h
+ fib/ip4_fib_8.h
+ fib/ip4_fib_16.h
+ fib/ip4_fib_hash.h
fib/ip6_fib.h
fib/fib_types.h
fib/fib_table.h
@@ -1230,8 +1140,11 @@ list(APPEND VNET_HEADERS
fib/fib_node_list.h
fib/fib_entry.h
fib/fib_entry_delegate.h
+ fib/fib_path.h
+ fib/fib_path_list.h
fib/fib_sas.h
fib/fib_source.h
+ fib/fib_urpf_list.h
)
list(APPEND VNET_API_FILES
@@ -1309,6 +1222,7 @@ list(APPEND VNET_MULTIARCH_SOURCES
list(APPEND VNET_HEADERS
dpo/load_balance.h
+ dpo/load_balance_map.h
dpo/drop_dpo.h
dpo/lookup_dpo.h
dpo/punt_dpo.h
@@ -1422,10 +1336,13 @@ list(APPEND VNET_MULTIARCH_SOURCES
)
list(APPEND VNET_HEADERS
- bier/bier_types.h
+ bier/bier_bit_string.h
bier/bier_entry.h
+ bier/bier_fwd.h
+ bier/bier_hdr_inlines.h
bier/bier_update.h
bier/bier_table.h
+ bier/bier_types.h
)
list(APPEND VNET_API_FILES bier/bier.api)
@@ -1526,7 +1443,6 @@ add_vat_test_library(vnet
ip/ip_test.c
arp/arp_test.c
ip6-nd/ip6_nd_test.c
- srmpls/sr_mpls_test.c
session/session_test.c
l2/l2_test.c
ipsec/ipsec_test.c
diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c
index 8808294f7a6..201561fe485 100644
--- a/src/vnet/adj/adj.c
+++ b/src/vnet/adj/adj.c
@@ -34,6 +34,11 @@ vlib_combined_counter_main_t adjacency_counters = {
ip_adjacency_t *adj_pool;
/**
+ * The adjacency logger
+ */
+vlib_log_class_t adj_logger;
+
+/**
* @brief Global Config for enabling per-adjacency counters.
* By default these are disabled.
*/
@@ -64,14 +69,12 @@ ip_adjacency_t *
adj_alloc (fib_protocol_t proto)
{
ip_adjacency_t *adj;
- u8 need_barrier_sync = 0;
+ u8 need_barrier_sync = pool_get_will_expand (adj_pool);
vlib_main_t *vm;
vm = vlib_get_main();
ASSERT (vm->thread_index == 0);
- pool_get_aligned_will_expand (adj_pool, need_barrier_sync,
- CLIB_CACHE_LINE_BYTES);
/* If the adj_pool will expand, stop the parade. */
if (need_barrier_sync)
vlib_worker_thread_barrier_sync (vm);
@@ -309,12 +312,12 @@ adj_last_lock_gone (ip_adjacency_t *adj)
break;
}
- vlib_worker_thread_barrier_release(vm);
fib_node_deinit(&adj->ia_node);
ASSERT(0 == vec_len(adj->ia_delegates));
vec_free(adj->ia_delegates);
pool_put(adj_pool, adj);
+ vlib_worker_thread_barrier_release(vm);
}
u32
@@ -350,7 +353,6 @@ adj_lock (adj_index_t adj_index)
adj = adj_get(adj_index);
ASSERT(adj);
- ADJ_DBG(adj, "lock");
fib_node_lock(&adj->ia_node);
}
@@ -367,9 +369,6 @@ adj_unlock (adj_index_t adj_index)
adj = adj_get(adj_index);
ASSERT(adj);
- ADJ_DBG(adj, "unlock");
- ASSERT(adj);
-
fib_node_unlock(&adj->ia_node);
}
@@ -650,6 +649,8 @@ adj_module_init (vlib_main_t * vm)
vnet_feature_register(adj_feature_update, NULL);
+ adj_logger = vlib_log_register_class("adj", "adj");
+
return (NULL);
}
@@ -703,7 +704,6 @@ adj_show (vlib_main_t * vm,
}
else
{
- /* *INDENT-OFF* */
pool_foreach_index (ai, adj_pool)
{
if (~0 != sw_if_index &&
@@ -718,7 +718,6 @@ adj_show (vlib_main_t * vm,
FORMAT_IP_ADJACENCY_NONE);
}
}
- /* *INDENT-ON* */
}
}
return 0;
diff --git a/src/vnet/adj/adj.h b/src/vnet/adj/adj.h
index c1922c755ec..860193c04ad 100644
--- a/src/vnet/adj/adj.h
+++ b/src/vnet/adj/adj.h
@@ -165,14 +165,6 @@ typedef enum adj_attr_t_
ADJ_ATTR_SYNC_WALK_ACTIVE = 0,
/**
- * Packets TX through the midchain do not increment the interface
- * counters. This should be used when the adj is associated with an L2
- * interface and that L2 interface is in a bridge domain. In that case
- * the packet will have traversed the interface's TX node, and hence have
- * been counted, before it traverses ths midchain
- */
- ADJ_ATTR_MIDCHAIN_NO_COUNT,
- /**
* When stacking midchains on a fib-entry extract the choice from the
* load-balance returned based on an IP hash of the adj's rewrite
*/
@@ -195,7 +187,6 @@ typedef enum adj_attr_t_
#define ADJ_ATTR_NAMES { \
[ADJ_ATTR_SYNC_WALK_ACTIVE] = "walk-active", \
- [ADJ_ATTR_MIDCHAIN_NO_COUNT] = "midchain-no-count", \
[ADJ_ATTR_MIDCHAIN_IP_STACK] = "midchain-ip-stack", \
[ADJ_ATTR_MIDCHAIN_LOOPED] = "midchain-looped", \
[ADJ_ATTR_MIDCHAIN_FIXUP_IP4O4_HDR] = "midchain-ip4o4-hdr-fixup", \
@@ -214,7 +205,6 @@ typedef enum adj_flags_t_
{
ADJ_FLAG_NONE = 0,
ADJ_FLAG_SYNC_WALK_ACTIVE = (1 << ADJ_ATTR_SYNC_WALK_ACTIVE),
- ADJ_FLAG_MIDCHAIN_NO_COUNT = (1 << ADJ_ATTR_MIDCHAIN_NO_COUNT),
ADJ_FLAG_MIDCHAIN_IP_STACK = (1 << ADJ_ATTR_MIDCHAIN_IP_STACK),
ADJ_FLAG_MIDCHAIN_LOOPED = (1 << ADJ_ATTR_MIDCHAIN_LOOPED),
ADJ_FLAG_MIDCHAIN_FIXUP_IP4O4_HDR = (1 << ADJ_ATTR_MIDCHAIN_FIXUP_IP4O4_HDR),
diff --git a/src/vnet/adj/adj_bfd.c b/src/vnet/adj/adj_bfd.c
index 2d787d41ab6..e54ba6d74ae 100644
--- a/src/vnet/adj/adj_bfd.c
+++ b/src/vnet/adj/adj_bfd.c
@@ -114,9 +114,7 @@ void
adj_bfd_notify (bfd_listen_event_e event,
const bfd_session_t *session)
{
- const bfd_udp_key_t *key;
adj_bfd_delegate_t *abd;
- fib_protocol_t fproto;
adj_delegate_t *aed;
adj_index_t ai;
@@ -129,19 +127,28 @@ adj_bfd_notify (bfd_listen_event_e event,
return;
}
- key = &session->udp.key;
-
- fproto = (ip46_address_is_ip4 (&key->peer_addr) ?
- FIB_PROTOCOL_IP4:
- FIB_PROTOCOL_IP6);
+ switch (session->transport)
+ {
+ case BFD_TRANSPORT_UDP4:
+ case BFD_TRANSPORT_UDP6:
+ /*
+ * pick up the same adjacency that the BFD session is using
+ * to send. The BFD session is holding a lock on this adj.
+ */
+ ai = session->udp.adj_index;
+ break;
+ default:
+ /*
+ * Don't know what adj this session uses
+ */
+ return;
+ }
- /*
- * find the adj that corresponds to the BFD session.
- */
- ai = adj_nbr_add_or_lock(fproto,
- fib_proto_to_link(fproto),
- &key->peer_addr,
- key->sw_if_index);
+ if (INDEX_INVALID == ai)
+ {
+ /* No associated Adjacency with the session */
+ return;
+ }
switch (event)
{
@@ -160,13 +167,6 @@ adj_bfd_notify (bfd_listen_event_e event,
else
{
/*
- * lock the adj. add the delegate.
- * Locking the adj prevents it being removed and thus maintains
- * the BFD derived states
- */
- adj_lock(ai);
-
- /*
* allocate and init a new delegate struct
*/
pool_get(abd_pool, abd);
@@ -213,14 +213,12 @@ adj_bfd_notify (bfd_listen_event_e event,
{
/*
* has an associated BFD tracking delegate
- * remove the BFD tracking delegate, update children, then
- * unlock the adj
+ * remove the BFD tracking delegate, update children
*/
adj_delegate_remove(ai, ADJ_DELEGATE_BFD);
pool_put(abd_pool, abd);
adj_bfd_update_walk(ai);
- adj_unlock(ai);
}
/*
* else
@@ -228,11 +226,6 @@ adj_bfd_notify (bfd_listen_event_e event,
*/
break;
}
-
- /*
- * unlock match of the add-or-lock at the start
- */
- adj_unlock(ai);
}
int
@@ -287,9 +280,7 @@ adj_bfd_main_init (vlib_main_t * vm)
return (0);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (adj_bfd_main_init)=
{
.runs_after = VLIB_INITS("bfd_main_init"),
};
-/* *INDENT-ON* */
diff --git a/src/vnet/adj/adj_dp.h b/src/vnet/adj/adj_dp.h
index aff1a2b1f43..186044b90ad 100644
--- a/src/vnet/adj/adj_dp.h
+++ b/src/vnet/adj/adj_dp.h
@@ -36,22 +36,36 @@ adj_midchain_ipip44_fixup (vlib_main_t * vm,
ip4->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b));
if (PREDICT_TRUE(TUNNEL_ENCAP_DECAP_FLAG_NONE == flags))
- {
- ip_csum_t sum;
- u16 old,new;
-
- old = 0;
- new = ip4->length;
-
- sum = ip4->checksum;
- sum = ip_csum_update (sum, old, new, ip4_header_t, length);
- ip4->checksum = ip_csum_fold (sum);
- }
+ {
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data;
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP |
+ VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM);
+ }
+ else
+ {
+ ip_csum_t sum;
+ u16 old,new;
+ old = 0;
+ new = ip4->length;
+ sum = ip4->checksum;
+ sum = ip_csum_update (sum, old, new, ip4_header_t, length);
+ ip4->checksum = ip_csum_fold (sum);
+ }
+ }
else
- {
+ {
tunnel_encap_fixup_4o4 (flags, ip4 + 1, ip4);
- ip4->checksum = ip4_header_checksum (ip4);
- }
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data;
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP |
+ VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM);
+ }
+ else
+ ip4->checksum = ip4_header_checksum (ip4);
+ }
}
static_always_inline void
diff --git a/src/vnet/adj/adj_glean.c b/src/vnet/adj/adj_glean.c
index e8ca043662f..ceece0d74ed 100644
--- a/src/vnet/adj/adj_glean.c
+++ b/src/vnet/adj/adj_glean.c
@@ -45,7 +45,7 @@ adj_glean_db_lookup (fib_protocol_t proto,
{
uword *p;
- if (vec_len(adj_gleans[proto]) <= sw_if_index)
+ if ((proto >= FIB_PROTOCOL_IP_MAX) || vec_len(adj_gleans[proto]) <= sw_if_index)
return (ADJ_INDEX_INVALID);
p = hash_get_mem (adj_gleans[proto][sw_if_index], nh_addr);
@@ -66,6 +66,7 @@ adj_glean_db_insert (fib_protocol_t proto,
vlib_worker_thread_barrier_sync(vm);
+ ASSERT(proto < FIB_PROTOCOL_IP_MAX);
vec_validate(adj_gleans[proto], sw_if_index);
if (NULL == adj_gleans[proto][sw_if_index])
@@ -186,6 +187,38 @@ adj_glean_update_rewrite_walk (adj_index_t ai,
return (ADJ_WALK_RC_CONTINUE);
}
+static void
+adj_glean_walk_proto (fib_protocol_t proto,
+ u32 sw_if_index,
+ adj_walk_cb_t cb,
+ void *data)
+{
+ adj_index_t ai, *aip, *ais = NULL;
+ ip46_address_t *conn;
+
+ ASSERT(proto < FIB_PROTOCOL_IP_MAX);
+ if (vec_len(adj_gleans[proto]) <= sw_if_index ||
+ NULL == adj_gleans[proto][sw_if_index])
+ return;
+
+ /*
+ * Walk first to collect the indices
+ * then walk the collection. This is safe
+ * to modifications of the hash table
+ */
+ hash_foreach_mem(conn, ai, adj_gleans[proto][sw_if_index],
+ ({
+ vec_add1(ais, ai);
+ }));
+
+ vec_foreach(aip, ais)
+ {
+ if (ADJ_WALK_RC_STOP == cb(*aip, data))
+ break;
+ }
+ vec_free(ais);
+}
+
void
adj_glean_walk (u32 sw_if_index,
adj_walk_cb_t cb,
@@ -195,29 +228,7 @@ adj_glean_walk (u32 sw_if_index,
FOR_EACH_FIB_IP_PROTOCOL(proto)
{
- adj_index_t ai, *aip, *ais = NULL;
- ip46_address_t *conn;
-
- if (vec_len(adj_gleans[proto]) <= sw_if_index ||
- NULL == adj_gleans[proto][sw_if_index])
- continue;
-
- /*
- * Walk first to collect the indices
- * then walk the collection. This is safe
- * to modifications of the hash table
- */
- hash_foreach_mem(conn, ai, adj_gleans[proto][sw_if_index],
- ({
- vec_add1(ais, ai);
- }));
-
- vec_foreach(aip, ais)
- {
- if (ADJ_WALK_RC_STOP == cb(*aip, data))
- break;
- }
- vec_free(ais);
+ adj_glean_walk_proto (proto, sw_if_index, cb, data);
}
}
@@ -235,6 +246,7 @@ adj_glean_get (fib_protocol_t proto,
ip46_address_t *conn;
adj_index_t ai;
+ ASSERT(proto < FIB_PROTOCOL_IP_MAX);
if (vec_len(adj_gleans[proto]) <= sw_if_index ||
NULL == adj_gleans[proto][sw_if_index])
return (ADJ_INDEX_INVALID);
@@ -256,6 +268,7 @@ adj_glean_get_src (fib_protocol_t proto,
const ip_adjacency_t *adj;
adj_index_t ai;
+ ASSERT(proto < FIB_PROTOCOL_IP_MAX);
if (vec_len(adj_gleans[proto]) <= sw_if_index ||
NULL == adj_gleans[proto][sw_if_index])
return (NULL);
@@ -424,11 +437,59 @@ VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_glean_interface_delete);
*/
static void
adj_glean_ethernet_change_mac (ethernet_main_t * em,
- u32 sw_if_index, uword opaque)
+ u32 sw_if_index,
+ uword opaque)
{
adj_glean_walk (sw_if_index, adj_glean_update_rewrite_walk, NULL);
}
+static void
+adj_glean_table_bind (fib_protocol_t fproto,
+ u32 sw_if_index,
+ u32 itf_fib_index)
+{
+ /*
+ * for each glean on the interface trigger a walk back to the children
+ */
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_INTERFACE_BIND,
+ .interface_bind = {
+ .fnbw_to_fib_index = itf_fib_index,
+ },
+ };
+
+ adj_glean_walk_proto (fproto, sw_if_index, adj_glean_start_backwalk, &bw_ctx);
+}
+
+
+/**
+ * Callback function invoked when an interface's IPv6 Table
+ * binding changes
+ */
+static void
+adj_glean_ip6_table_bind (ip6_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ u32 new_fib_index,
+ u32 old_fib_index)
+{
+ adj_glean_table_bind (FIB_PROTOCOL_IP6, sw_if_index, new_fib_index);
+}
+
+/**
+ * Callback function invoked when an interface's IPv4 Table
+ * binding changes
+ */
+static void
+adj_glean_ip4_table_bind (ip4_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ u32 new_fib_index,
+ u32 old_fib_index)
+{
+ adj_glean_table_bind (FIB_PROTOCOL_IP4, sw_if_index, new_fib_index);
+}
+
u8*
format_adj_glean (u8* s, va_list *ap)
{
@@ -519,4 +580,14 @@ adj_glean_module_init (void)
.function_opaque = 0,
};
vec_add1 (ethernet_main.address_change_callbacks, ctx);
+
+ ip6_table_bind_callback_t cbt6 = {
+ .function = adj_glean_ip6_table_bind,
+ };
+ vec_add1 (ip6_main.table_bind_callbacks, cbt6);
+
+ ip4_table_bind_callback_t cbt4 = {
+ .function = adj_glean_ip4_table_bind,
+ };
+ vec_add1 (ip4_main.table_bind_callbacks, cbt4);
}
diff --git a/src/vnet/adj/adj_internal.h b/src/vnet/adj/adj_internal.h
index 3dbf7e2a371..380af46e22a 100644
--- a/src/vnet/adj/adj_internal.h
+++ b/src/vnet/adj/adj_internal.h
@@ -31,24 +31,20 @@
/*
* Debug macro
*/
-#ifdef ADJ_DEBUG
-#define ADJ_DBG(_adj, _fmt, _args...) \
-{ \
- clib_warning("adj:[%d:%p]:" _fmt, \
- _adj - adj_pool, _adj, \
- ##_args); \
+extern vlib_log_class_t adj_logger;
+#define ADJ_DBG(_adj, _fmt, _args...) \
+{ \
+ vlib_log_debug(adj_logger, "adj:[%d:%p]:" _fmt, \
+ _adj - adj_pool, _adj, \
+ ##_args); \
}
-#else
-#define ADJ_DBG(_e, _fmt, _args...)
-#endif
/*
* Vlib nodes
*/
extern vlib_node_registration_t adj_nsh_midchain_node;
extern vlib_node_registration_t adj_nsh_rewrite_node;
-extern vlib_node_registration_t adj_midchain_tx_no_count_node;
-extern vlib_node_registration_t adj_midchain_tx_node;
+extern vlib_node_registration_t adj_midchain_tx;
static inline u32
adj_get_rewrite_node (vnet_link_t linkt)
@@ -128,6 +124,7 @@ extern void adj_nbr_remove(adj_index_t ai,
vnet_link_t link_type,
const ip46_address_t *nh_addr,
u32 sw_if_index);
+extern u32 adj_nbr_get_n_adjs(vnet_link_t link_type, u32 sw_if_index);
extern void adj_glean_remove(ip_adjacency_t *adj);
extern void adj_mcast_remove(fib_protocol_t proto,
u32 sw_if_index);
diff --git a/src/vnet/adj/adj_mcast.c b/src/vnet/adj/adj_mcast.c
index a20f61f6f6b..573105b7228 100644
--- a/src/vnet/adj/adj_mcast.c
+++ b/src/vnet/adj/adj_mcast.c
@@ -82,6 +82,8 @@ adj_mcast_add_or_lock (fib_protocol_t proto,
*/
vnet_update_adjacency_for_sw_interface(vnm, sw_if_index,
adj_get_index(adj));
+
+ adj_delegate_adj_created(adj);
}
else
{
@@ -89,8 +91,6 @@ adj_mcast_add_or_lock (fib_protocol_t proto,
adj_lock(adj_get_index(adj));
}
- adj_delegate_adj_created(adj);
-
return (adj_get_index(adj));
}
diff --git a/src/vnet/adj/adj_midchain.c b/src/vnet/adj/adj_midchain.c
index 9f709ad13be..8e6a940befa 100644
--- a/src/vnet/adj/adj_midchain.c
+++ b/src/vnet/adj/adj_midchain.c
@@ -75,52 +75,37 @@ adj_get_midchain_node (vnet_link_t link)
}
static u8
-adj_midchain_get_feature_arc_index_for_link_type (const ip_adjacency_t *adj)
+adj_midchain_get_feature_arc_index (const ip_adjacency_t *adj)
{
- u8 arc = (u8) ~0;
switch (adj->ia_link)
{
case VNET_LINK_IP4:
- {
- arc = ip4_main.lookup_main.output_feature_arc_index;
- break;
- }
+ return ip4_main.lookup_main.output_feature_arc_index;
case VNET_LINK_IP6:
- {
- arc = ip6_main.lookup_main.output_feature_arc_index;
- break;
- }
+ return ip6_main.lookup_main.output_feature_arc_index;
case VNET_LINK_MPLS:
- {
- arc = mpls_main.output_feature_arc_index;
- break;
- }
+ return mpls_main.output_feature_arc_index;
case VNET_LINK_ETHERNET:
- {
- arc = ethernet_main.output_feature_arc_index;
- break;
- }
+ return ethernet_main.output_feature_arc_index;
case VNET_LINK_NSH:
- {
- arc = nsh_main_placeholder.output_feature_arc_index;
- break;
- }
case VNET_LINK_ARP:
- ASSERT(0);
break;
}
-
- ASSERT (arc != (u8) ~0);
-
- return (arc);
+ ASSERT (0);
+ return (0);
}
static u32
adj_nbr_midchain_get_tx_node (ip_adjacency_t *adj)
{
- return ((adj->ia_flags & ADJ_FLAG_MIDCHAIN_NO_COUNT) ?
- adj_midchain_tx_no_count_node.index :
- adj_midchain_tx_node.index);
+ return (adj_midchain_tx.index);
+}
+
+static u32
+adj_nbr_midchain_get_next_node (ip_adjacency_t *adj)
+{
+ return (vnet_feature_get_end_node(adj_midchain_get_feature_arc_index(adj),
+ adj->rewrite_header.sw_if_index));
}
/**
@@ -131,17 +116,7 @@ adj_nbr_midchain_get_tx_node (ip_adjacency_t *adj)
void
adj_midchain_teardown (ip_adjacency_t *adj)
{
- vlib_main_t *vm = vlib_get_main();
-
dpo_reset(&adj->sub_type.midchain.next_dpo);
-
- vlib_worker_thread_barrier_sync(vm);
- adj->ia_cfg_index = vnet_feature_modify_end_node(
- adj_midchain_get_feature_arc_index_for_link_type (adj),
- adj->rewrite_header.sw_if_index,
- vlib_get_node_by_name (vlib_get_main(),
- (u8*) "interface-output")->index);
- vlib_worker_thread_barrier_release(vm);
}
/**
@@ -155,9 +130,7 @@ adj_midchain_setup (adj_index_t adj_index,
const void *data,
adj_flags_t flags)
{
- vlib_main_t *vm = vlib_get_main();
ip_adjacency_t *adj;
- u32 tx_node;
ASSERT(ADJ_INDEX_INVALID != adj_index);
@@ -181,15 +154,6 @@ adj_midchain_setup (adj_index_t adj_index,
adj->rewrite_header.flags &= ~VNET_REWRITE_FIXUP_FLOW_HASH;
}
- tx_node = adj_nbr_midchain_get_tx_node(adj);
-
- vlib_worker_thread_barrier_sync(vm);
- adj->ia_cfg_index = vnet_feature_modify_end_node(
- adj_midchain_get_feature_arc_index_for_link_type (adj),
- adj->rewrite_header.sw_if_index,
- tx_node);
- vlib_worker_thread_barrier_release(vm);
-
/*
* stack the midchain on the drop so it's ready to forward in the adj-midchain-tx.
* The graph arc used/created here is from the midchain-tx node to the
@@ -197,7 +161,7 @@ adj_midchain_setup (adj_index_t adj_index,
* node are any output features, then the midchain-tx. from there we
* need to get to the stacked child's node.
*/
- dpo_stack_from_node(tx_node,
+ dpo_stack_from_node(adj_nbr_midchain_get_tx_node(adj),
&adj->sub_type.midchain.next_dpo,
drop_dpo_get(vnet_link_to_dpo_proto(adj->ia_link)));
}
@@ -238,7 +202,7 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index,
adj_nbr_update_rewrite_internal(adj,
IP_LOOKUP_NEXT_MIDCHAIN,
adj_get_midchain_node(adj->ia_link),
- adj_nbr_midchain_get_tx_node(adj),
+ adj_nbr_midchain_get_next_node(adj),
rewrite);
}
@@ -260,11 +224,6 @@ adj_nbr_midchain_update_next_node (adj_index_t adj_index,
adj->ia_node_index,
next_node);
- adj->ia_cfg_index = vnet_feature_modify_end_node(
- adj_midchain_get_feature_arc_index_for_link_type (adj),
- adj->rewrite_header.sw_if_index,
- next_node);
-
vlib_worker_thread_barrier_release(vm);
}
@@ -284,12 +243,7 @@ adj_nbr_midchain_reset_next_node (adj_index_t adj_index)
adj->rewrite_header.next_index =
vlib_node_add_next(vlib_get_main(),
adj->ia_node_index,
- adj_nbr_midchain_get_tx_node(adj));
-
- adj->ia_cfg_index = vnet_feature_modify_end_node(
- adj_midchain_get_feature_arc_index_for_link_type (adj),
- adj->rewrite_header.sw_if_index,
- adj_nbr_midchain_get_tx_node(adj));
+ adj_nbr_midchain_get_next_node(adj));
vlib_worker_thread_barrier_release(vm);
}
diff --git a/src/vnet/adj/adj_midchain.h b/src/vnet/adj/adj_midchain.h
index 85294122f08..eee8c99ae40 100644
--- a/src/vnet/adj/adj_midchain.h
+++ b/src/vnet/adj/adj_midchain.h
@@ -160,6 +160,11 @@ extern void adj_midchain_delegate_restack(adj_index_t ai);
*/
extern void adj_midchain_delegate_unstack(adj_index_t ai);
+/**
+ * @brief remove a midchain delegate (this stacks it on a drop)
+ */
+extern void adj_midchain_delegate_remove (adj_index_t ai);
+
extern u8 adj_is_midchain (adj_index_t ai);
#endif
diff --git a/src/vnet/adj/adj_midchain_delegate.c b/src/vnet/adj/adj_midchain_delegate.c
index 9e788432640..16129ff86ac 100644
--- a/src/vnet/adj/adj_midchain_delegate.c
+++ b/src/vnet/adj/adj_midchain_delegate.c
@@ -132,6 +132,31 @@ adj_midchain_delegate_stack (adj_index_t ai,
}
void
+adj_midchain_delegate_remove (adj_index_t ai)
+{
+ adj_midchain_delegate_t *amd;
+ ip_adjacency_t *adj;
+ adj_delegate_t *ad;
+
+ /*
+ * if there's a delegate, it can be removed
+ */
+ adj = adj_get(ai);
+ ad = adj_delegate_get(adj, ADJ_DELEGATE_MIDCHAIN);
+
+ if (NULL != ad)
+ {
+ adj_nbr_midchain_unstack(ai);
+
+ amd = pool_elt_at_index(amd_pool, ad->ad_index);
+ fib_entry_untrack(amd->amd_fei, amd->amd_sibling);
+ pool_put(amd_pool, amd);
+
+ adj_delegate_remove (ai, ADJ_DELEGATE_MIDCHAIN);
+ }
+}
+
+void
adj_midchain_delegate_unstack (adj_index_t ai)
{
adj_nbr_midchain_unstack(ai);
diff --git a/src/vnet/adj/adj_midchain_node.c b/src/vnet/adj/adj_midchain_node.c
index 170ed19855e..fcc2c6c7647 100644
--- a/src/vnet/adj/adj_midchain_node.c
+++ b/src/vnet/adj/adj_midchain_node.c
@@ -202,16 +202,20 @@ format_adj_midchain_tx_trace (u8 * s, va_list * args)
return (s);
}
-static uword
-adj_midchain_tx (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (adj_midchain_tx) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (adj_midchain_tx_inline(vm, node, frame, 1));
+}
+VLIB_NODE_FN (tunnel_output) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
{
return (adj_midchain_tx_inline(vm, node, frame, 1));
}
-VLIB_REGISTER_NODE (adj_midchain_tx_node) = {
- .function = adj_midchain_tx,
+VLIB_REGISTER_NODE (adj_midchain_tx) = {
.name = "adj-midchain-tx",
.vector_size = sizeof (u32),
@@ -222,20 +226,23 @@ VLIB_REGISTER_NODE (adj_midchain_tx_node) = {
[0] = "error-drop",
},
};
+VLIB_REGISTER_NODE (tunnel_output) = {
+ .name = "tunnel-output",
+ .vector_size = sizeof (u32),
+ .format_trace = format_adj_midchain_tx_trace,
+ .sibling_of = "adj-midchain-tx",
+};
-static uword
-adj_midchain_tx_no_count (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (tunnel_output_no_count) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
{
return (adj_midchain_tx_inline(vm, node, frame, 0));
}
-VLIB_REGISTER_NODE (adj_midchain_tx_no_count_node) = {
- .function = adj_midchain_tx_no_count,
- .name = "adj-midchain-tx-no-count",
+VLIB_REGISTER_NODE (tunnel_output_no_count) = {
+ .name = "tunnel-output-no-count",
.vector_size = sizeof (u32),
-
.format_trace = format_adj_midchain_tx_trace,
.sibling_of = "adj-midchain-tx",
};
diff --git a/src/vnet/adj/adj_nbr.c b/src/vnet/adj/adj_nbr.c
index 8524c6c83ae..b3a027b7af4 100644
--- a/src/vnet/adj/adj_nbr.c
+++ b/src/vnet/adj/adj_nbr.c
@@ -105,6 +105,46 @@ adj_nbr_remove (adj_index_t ai,
}
}
+typedef struct adj_nbr_get_n_adjs_walk_ctx_t_
+{
+ vnet_link_t linkt;
+ u32 count;
+} adj_nbr_get_n_adjs_walk_ctx_t;
+
+static adj_walk_rc_t
+adj_nbr_get_n_adjs_walk (adj_index_t ai,
+ void *data)
+{
+ adj_nbr_get_n_adjs_walk_ctx_t *ctx = data;
+ const ip_adjacency_t *adj;
+
+ adj = adj_get(ai);
+
+ if (ctx->linkt == adj->ia_link)
+ ctx->count++;
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+u32
+adj_nbr_get_n_adjs (vnet_link_t link_type, u32 sw_if_index)
+{
+ adj_nbr_get_n_adjs_walk_ctx_t ctx = {
+ .linkt = link_type,
+ };
+ fib_protocol_t fproto;
+
+ FOR_EACH_FIB_IP_PROTOCOL(fproto)
+ {
+ adj_nbr_walk (sw_if_index,
+ fproto,
+ adj_nbr_get_n_adjs_walk,
+ &ctx);
+ }
+
+ return (ctx.count);
+}
+
adj_index_t
adj_nbr_find (fib_protocol_t nh_proto,
vnet_link_t link_type,
@@ -492,7 +532,7 @@ adj_nbr_update_rewrite_internal (ip_adjacency_t *adj,
fib_walk_sync(FIB_NODE_TYPE_ADJ, walk_ai, &bw_ctx);
/*
- * fib_walk_sync may allocate a new adjacency and potentially cuase a
+ * fib_walk_sync may allocate a new adjacency and potentially cause a
* realloc for adj_pool. When that happens, adj pointer is no longer
* valid here. We refresh the adj pointer accordingly.
*/
@@ -560,7 +600,7 @@ adj_nbr_update_rewrite_internal (ip_adjacency_t *adj,
walk_adj->ia_flags &= ~ADJ_FLAG_SYNC_WALK_ACTIVE;
}
- adj_delegate_adj_modified(adj);
+ adj_delegate_adj_modified(adj_get(ai));
adj_unlock(ai);
adj_unlock(walk_ai);
}
@@ -753,9 +793,15 @@ adj_nbr_interface_state_change_one (adj_index_t ai,
adj_lock (ai);
adj = adj_get(ai);
-
adj->ia_flags |= ADJ_FLAG_SYNC_WALK_ACTIVE;
fib_walk_sync(FIB_NODE_TYPE_ADJ, ai, &bw_ctx);
+
+ /*
+ * fib_walk_sync may allocate a new adjacency and potentially cause a
+ * realloc for adj_pool. When that happens, adj pointer is no longer
+ * valid here. We refresh the adj pointer accordingly.
+ */
+ adj = adj_get(ai);
adj->ia_flags &= ~ADJ_FLAG_SYNC_WALK_ACTIVE;
adj_unlock (ai);
@@ -863,9 +909,15 @@ adj_nbr_interface_delete_one (adj_index_t ai,
adj_lock(ai);
adj = adj_get(ai);
-
adj->ia_flags |= ADJ_FLAG_SYNC_WALK_ACTIVE;
fib_walk_sync(FIB_NODE_TYPE_ADJ, ai, &bw_ctx);
+
+ /*
+ * fib_walk_sync may allocate a new adjacency and potentially cause a
+ * realloc for adj_pool. When that happens, adj pointer is no longer
+ * valid here. We refresh the adj pointer accordingly.
+ */
+ adj = adj_get(ai);
adj->ia_flags &= ~ADJ_FLAG_SYNC_WALK_ACTIVE;
adj_unlock(ai);
diff --git a/src/vnet/adj/adj_nsh.c b/src/vnet/adj/adj_nsh.c
index 00d945729d8..1b4fa6c15b9 100644
--- a/src/vnet/adj/adj_nsh.c
+++ b/src/vnet/adj/adj_nsh.c
@@ -190,7 +190,6 @@ VLIB_REGISTER_NODE (adj_nsh_midchain_node) = {
};
/* Built-in ip4 tx feature path definition */
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (nsh_output, static) =
{
.arc_name = "nsh-output",
@@ -204,4 +203,3 @@ VNET_FEATURE_INIT (nsh_tx_drop, static) =
.node_name = "error-drop",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
diff --git a/src/vnet/adj/rewrite.h b/src/vnet/adj/rewrite.h
index 4234986dc37..06b1b00882e 100644
--- a/src/vnet/adj/rewrite.h
+++ b/src/vnet/adj/rewrite.h
@@ -136,7 +136,7 @@ always_inline void
vnet_rewrite_clear_data_internal (vnet_rewrite_header_t * rw, int max_size)
{
/* Sanity check values carefully for this clib_memset operation */
- ASSERT ((max_size > 0) && (max_size < VLIB_BUFFER_PRE_DATA_SIZE));
+ ASSERT ((max_size > 0) && (max_size < VNET_REWRITE_TOTAL_BYTES));
rw->data_bytes = 0;
clib_memset (rw->data, 0xfe, max_size);
@@ -147,8 +147,8 @@ vnet_rewrite_set_data_internal (vnet_rewrite_header_t * rw,
int max_size, void *data, int data_bytes)
{
/* Sanity check values carefully for this clib_memset operation */
- ASSERT ((max_size > 0) && (max_size < VLIB_BUFFER_PRE_DATA_SIZE));
- ASSERT ((data_bytes >= 0) && (data_bytes < max_size));
+ ASSERT ((max_size > 0) && (max_size <= VNET_REWRITE_TOTAL_BYTES));
+ ASSERT ((data_bytes >= 0) && (data_bytes <= max_size));
rw->data_bytes = data_bytes;
clib_memcpy_fast (rw->data, data, data_bytes);
diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h
index df3806a7630..52f201c081b 100644
--- a/src/vnet/api_errno.h
+++ b/src/vnet/api_errno.h
@@ -18,146 +18,9 @@
#include <stdarg.h>
#include <vppinfra/types.h>
#include <vppinfra/format.h>
+#include <vnet/error.h>
-#define foreach_vnet_api_error \
-_(UNSPECIFIED, -1, "Unspecified Error") \
-_(INVALID_SW_IF_INDEX, -2, "Invalid sw_if_index") \
-_(NO_SUCH_FIB, -3, "No such FIB / VRF") \
-_(NO_SUCH_INNER_FIB, -4, "No such inner FIB / VRF") \
-_(NO_SUCH_LABEL, -5, "No such label") \
-_(NO_SUCH_ENTRY, -6, "No such entry") \
-_(INVALID_VALUE, -7, "Invalid value") \
-_(INVALID_VALUE_2, -8, "Invalid value #2") \
-_(UNIMPLEMENTED, -9, "Unimplemented") \
-_(INVALID_SW_IF_INDEX_2, -10, "Invalid sw_if_index #2") \
-_(SYSCALL_ERROR_1, -11, "System call error #1") \
-_(SYSCALL_ERROR_2, -12, "System call error #2") \
-_(SYSCALL_ERROR_3, -13, "System call error #3") \
-_(SYSCALL_ERROR_4, -14, "System call error #4") \
-_(SYSCALL_ERROR_5, -15, "System call error #5") \
-_(SYSCALL_ERROR_6, -16, "System call error #6") \
-_(SYSCALL_ERROR_7, -17, "System call error #7") \
-_(SYSCALL_ERROR_8, -18, "System call error #8") \
-_(SYSCALL_ERROR_9, -19, "System call error #9") \
-_(SYSCALL_ERROR_10, -20, "System call error #10") \
-_(FEATURE_DISABLED, -30, "Feature disabled by configuration") \
-_(INVALID_REGISTRATION, -31, "Invalid registration") \
-_(NEXT_HOP_NOT_IN_FIB, -50, "Next hop not in FIB") \
-_(UNKNOWN_DESTINATION, -51, "Unknown destination") \
-_(NO_PATHS_IN_ROUTE, -52, "No paths specified in route") \
-_(NEXT_HOP_NOT_FOUND_MP, -53, "Next hop not found (multipath)") \
-_(NO_MATCHING_INTERFACE, -54, "No matching interface for probe") \
-_(INVALID_VLAN, -55, "Invalid VLAN") \
-_(VLAN_ALREADY_EXISTS, -56, "VLAN subif already exists") \
-_(INVALID_SRC_ADDRESS, -57, "Invalid src address") \
-_(INVALID_DST_ADDRESS, -58, "Invalid dst address") \
-_(ADDRESS_LENGTH_MISMATCH, -59, "Address length mismatch") \
-_(ADDRESS_NOT_FOUND_FOR_INTERFACE, -60, "Address not found for interface") \
-_(ADDRESS_NOT_DELETABLE, -61, "Address not deletable") \
-_(IP6_NOT_ENABLED, -62, "ip6 not enabled") \
-_(NO_SUCH_NODE, -63, "No such graph node") \
-_(NO_SUCH_NODE2, -64, "No such graph node #2") \
-_(NO_SUCH_TABLE, -65, "No such table") \
-_(NO_SUCH_TABLE2, -66, "No such table #2") \
-_(NO_SUCH_TABLE3, -67, "No such table #3") \
-_(SUBIF_ALREADY_EXISTS, -68, "Subinterface already exists") \
-_(SUBIF_CREATE_FAILED, -69, "Subinterface creation failed") \
-_(INVALID_MEMORY_SIZE, -70, "Invalid memory size requested") \
-_(INVALID_INTERFACE, -71, "Invalid interface") \
-_(INVALID_VLAN_TAG_COUNT, -72, "Invalid number of tags for requested operation") \
-_(INVALID_ARGUMENT, -73, "Invalid argument") \
-_(UNEXPECTED_INTF_STATE, -74, "Unexpected interface state") \
-_(TUNNEL_EXIST, -75, "Tunnel already exists") \
-_(INVALID_DECAP_NEXT, -76, "Invalid decap-next") \
-_(RESPONSE_NOT_READY, -77, "Response not ready") \
-_(NOT_CONNECTED, -78, "Not connected to the data plane") \
-_(IF_ALREADY_EXISTS, -79, "Interface already exists") \
-_(BOND_SLAVE_NOT_ALLOWED, -80, "Operation not allowed on slave of BondEthernet") \
-_(VALUE_EXIST, -81, "Value already exists") \
-_(SAME_SRC_DST, -82, "Source and destination are the same") \
-_(IP6_MULTICAST_ADDRESS_NOT_PRESENT, -83, "IP6 multicast address required") \
-_(SR_POLICY_NAME_NOT_PRESENT, -84, "Segment routing policy name required") \
-_(NOT_RUNNING_AS_ROOT, -85, "Not running as root") \
-_(ALREADY_CONNECTED, -86, "Connection to the data plane already exists") \
-_(UNSUPPORTED_JNI_VERSION, -87, "Unsupported JNI version") \
-_(IP_PREFIX_INVALID, -88, "IP prefix invalid (masked bits set in address") \
-_(INVALID_WORKER, -89, "Invalid worker thread") \
-_(LISP_DISABLED, -90, "LISP is disabled") \
-_(CLASSIFY_TABLE_NOT_FOUND, -91, "Classify table not found") \
-_(INVALID_EID_TYPE, -92, "Unsupported LISP EID type") \
-_(CANNOT_CREATE_PCAP_FILE, -93, "Cannot create pcap file") \
-_(INCORRECT_ADJACENCY_TYPE, -94, "Invalid adjacency type for this operation") \
-_(EXCEEDED_NUMBER_OF_RANGES_CAPACITY, -95, "Operation would exceed configured capacity of ranges") \
-_(EXCEEDED_NUMBER_OF_PORTS_CAPACITY, -96, "Operation would exceed capacity of number of ports") \
-_(INVALID_ADDRESS_FAMILY, -97, "Invalid address family") \
-_(INVALID_SUB_SW_IF_INDEX, -98, "Invalid sub-interface sw_if_index") \
-_(TABLE_TOO_BIG, -99, "Table too big") \
-_(CANNOT_ENABLE_DISABLE_FEATURE, -100, "Cannot enable/disable feature") \
-_(BFD_EEXIST, -101, "Duplicate BFD object") \
-_(BFD_ENOENT, -102, "No such BFD object") \
-_(BFD_EINUSE, -103, "BFD object in use") \
-_(BFD_NOTSUPP, -104, "BFD feature not supported") \
-_(ADDRESS_IN_USE, -105, "Address in use") \
-_(ADDRESS_NOT_IN_USE, -106, "Address not in use") \
-_(QUEUE_FULL, -107, "Queue full") \
-_(APP_UNSUPPORTED_CFG, -108, "Unsupported application config") \
-_(URI_FIFO_CREATE_FAILED, -109, "URI FIFO segment create failed") \
-_(LISP_RLOC_LOCAL, -110, "RLOC address is local") \
-_(BFD_EAGAIN, -111, "BFD object cannot be manipulated at this time") \
-_(INVALID_GPE_MODE, -112, "Invalid GPE mode") \
-_(LISP_GPE_ENTRIES_PRESENT, -113, "LISP GPE entries are present") \
-_(ADDRESS_FOUND_FOR_INTERFACE, -114, "Address found for interface") \
-_(SESSION_CONNECT, -115, "Session failed to connect") \
-_(ENTRY_ALREADY_EXISTS, -116, "Entry already exists") \
-_(SVM_SEGMENT_CREATE_FAIL, -117, "Svm segment create fail") \
-_(APPLICATION_NOT_ATTACHED, -118, "Application not attached") \
-_(BD_ALREADY_EXISTS, -119, "Bridge domain already exists") \
-_(BD_IN_USE, -120, "Bridge domain has member interfaces") \
-_(BD_NOT_MODIFIABLE, -121, "Bridge domain 0 can't be deleted/modified") \
-_(BD_ID_EXCEED_MAX, -122, "Bridge domain ID exceeds 16M limit") \
-_(SUBIF_DOESNT_EXIST, -123, "Subinterface doesn't exist") \
-_(L2_MACS_EVENT_CLINET_PRESENT, -124, "Client already exist for L2 MACs events") \
-_(INVALID_QUEUE, -125, "Invalid queue") \
-_(UNSUPPORTED, -126, "Unsupported") \
-_(DUPLICATE_IF_ADDRESS, -127, "Address already present on another interface") \
-_(APP_INVALID_NS, -128, "Invalid application namespace") \
-_(APP_WRONG_NS_SECRET, -129, "Wrong app namespace secret") \
-_(APP_CONNECT_SCOPE, -130, "Connect scope") \
-_(APP_ALREADY_ATTACHED, -131, "App already attached") \
-_(SESSION_REDIRECT, -132, "Redirect failed") \
-_(ILLEGAL_NAME, -133, "Illegal name") \
-_(NO_NAME_SERVERS, -134, "No name servers configured") \
-_(NAME_SERVER_NOT_FOUND, -135, "Name server not found") \
-_(NAME_RESOLUTION_NOT_ENABLED, -136, "Name resolution not enabled") \
-_(NAME_SERVER_FORMAT_ERROR, -137, "Server format error (bug!)") \
-_(NAME_SERVER_NO_SUCH_NAME, -138, "No such name") \
-_(NAME_SERVER_NO_ADDRESSES, -139, "No addresses available") \
-_(NAME_SERVER_NEXT_SERVER, -140, "Retry with new server") \
-_(APP_CONNECT_FILTERED, -141, "Connect was filtered") \
-_(ACL_IN_USE_INBOUND, -142, "Inbound ACL in use") \
-_(ACL_IN_USE_OUTBOUND, -143, "Outbound ACL in use") \
-_(INIT_FAILED, -144, "Initialization Failed") \
-_(NETLINK_ERROR, -145, "Netlink error") \
-_(BIER_BSL_UNSUP, -146, "BIER bit-string-length unsupported") \
-_(INSTANCE_IN_USE, -147, "Instance in use") \
-_(INVALID_SESSION_ID, -148, "Session ID out of range") \
-_(ACL_IN_USE_BY_LOOKUP_CONTEXT, -149, "ACL in use by a lookup context") \
-_(INVALID_VALUE_3, -150, "Invalid value #3") \
-_(NON_ETHERNET, -151, "Interface is not an Ethernet interface") \
-_(BD_ALREADY_HAS_BVI, -152, "Bridge domain already has a BVI interface") \
-_(INVALID_PROTOCOL, -153, "Invalid Protocol") \
-_(INVALID_ALGORITHM, -154, "Invalid Algorithm") \
-_(RSRC_IN_USE, -155, "Resource In Use") \
-_(KEY_LENGTH, -156, "invalid Key Length") \
-_(FIB_PATH_UNSUPPORTED_NH_PROTO, -157, "Unsupported FIB Path protocol") \
-_(API_ENDIAN_FAILED, -159, "Endian mismatch detected") \
-_(NO_CHANGE, -160, "No change in table") \
-_(MISSING_CERT_KEY, -161, "Missing certifcate or key") \
-_(LIMIT_EXCEEDED, -162, "limit exceeded") \
-_(IKE_NO_PORT, -163, "port not managed by IKE") \
-_(UDP_PORT_TAKEN, -164, "UDP port already taken") \
-_(EAGAIN, -165, "Retry stream call with cursor") \
-_(INVALID_VALUE_4, -166, "Invalid value #4") \
+#define foreach_vnet_api_error foreach_vnet_error
typedef enum
{
@@ -167,29 +30,25 @@ typedef enum
VNET_API_N_ERROR,
} vnet_api_error_t;
-/* *INDENT-OFF* */
-static inline u8 *
-format_vnet_api_errno (u8 * s, va_list * args)
+format_function_t format_vnet_api_errno;
+
+static_always_inline vnet_api_error_t
+vnet_api_error (clib_error_t *err)
{
- vnet_api_error_t api_error = va_arg (*args, vnet_api_error_t);
-#ifdef _
-#undef _
-#endif
-#define _(a, b, c) \
- case b: \
- s = format (s, "%s", c); \
- break;
- switch (api_error)
- {
- foreach_vnet_api_error
- default:
- s = format (s, "UNKNOWN");
- break;
- }
- return s;
-#undef _
+ if (err == 0)
+ return 0;
+ if (err->code >= 0)
+ return VNET_API_ERROR_BUG;
+ return err->code;
+}
+
+static_always_inline vnet_api_error_t
+vnet_get_api_error_and_free (clib_error_t *err)
+{
+ vnet_api_error_t rv = vnet_api_error (err);
+ clib_error_free (err);
+ return rv;
}
-/* *INDENT-ON* */
#endif /* included_vnet_api_errno_h */
diff --git a/src/vnet/arp/arp.api b/src/vnet/arp/arp.api
index 27bfa3b65c6..7de06f7f7e1 100644
--- a/src/vnet/arp/arp.api
+++ b/src/vnet/arp/arp.api
@@ -98,3 +98,121 @@ define proxy_arp_intfc_details
u32 context;
u32 sw_if_index;
};
+
+counters arp {
+ replies_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "ARP replies sent";
+ };
+ disabled {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ARP Disabled";
+ };
+ l2_type_not_ethernet {
+ severity error;
+ type counter64;
+ units "packets";
+ description "L2 type not ethernet";
+ };
+ l3_type_not_ip4 {
+ severity error;
+ type counter64;
+ units "packets";
+ description "L3 type not IP4";
+ };
+ l3_src_address_not_local {
+ severity error;
+ type counter64;
+ units "packets";
+ description "IP4 source address not local to subnet";
+ };
+ l3_dst_address_not_local {
+ severity error;
+ type counter64;
+ units "packets";
+ description "IP4 destination address not local to subnet";
+ };
+ l3_dst_address_unset {
+ severity error;
+ type counter64;
+ units "packets";
+ description "IP4 destination address is unset";
+ };
+ l3_src_address_is_local {
+ severity error;
+ type counter64;
+ units "packets";
+ description "IP4 source address matches local interface";
+ };
+ l3_src_address_learned {
+ severity info;
+ type counter64;
+ units "packets";
+ description "ARP request IP4 source address learned";
+ };
+ replies_received {
+ severity info;
+ type counter64;
+ units "packets";
+ description "ARP replies received";
+ };
+ opcode_not_request {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ARP opcode not request";
+ };
+ proxy_arp_replies_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "Proxy ARP replies sent";
+ };
+ l2_address_mismatch {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ARP hw addr does not match L2 frame src addr";
+ };
+ gratuitous_arp {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ARP probe or announcement dropped";
+ };
+ interface_no_table {
+ severity error;
+ type counter64;
+ units "packets";
+ description "Interface is not mapped to an IP table";
+ };
+ interface_not_ip_enabled {
+ severity error;
+ type counter64;
+ units "packets";
+ description "Interface is not IP enabled";
+ };
+ unnumbered_mismatch {
+ severity error;
+ type counter64;
+ units "packets";
+ description "RX interface is unnumbered to different subnet";
+ };
+};
+
+paths {
+ "/err/arp-reply" "arp";
+ "/err/arp-disabled" "arp";
+ "/err/arp-input" "arp";
+ "/err/arp-proxy" "arp";
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/arp/arp.c b/src/vnet/arp/arp.c
index ced3c1cb7a7..43b2a93a7b3 100644
--- a/src/vnet/arp/arp.c
+++ b/src/vnet/arp/arp.c
@@ -25,6 +25,7 @@
#include <vnet/pg/pg.h>
#include <vnet/ip-neighbor/ip_neighbor.h>
+#include <vnet/ip-neighbor/ip4_neighbor.h>
#include <vnet/ip-neighbor/ip_neighbor_dp.h>
#include <vlibmemory/api.h>
@@ -190,7 +191,6 @@ always_inline u32
arp_learn (u32 sw_if_index,
const ethernet_arp_ip4_over_ethernet_address_t * addr)
{
- /* *INDENT-OFF* */
ip_neighbor_learn_t l = {
.ip = {
.ip.ip4 = addr->ip4,
@@ -199,11 +199,10 @@ arp_learn (u32 sw_if_index,
.mac = addr->mac,
.sw_if_index = sw_if_index,
};
- /* *INDENT-ON* */
ip_neighbor_learn_dp (&l);
- return (ETHERNET_ARP_ERROR_l3_src_address_learned);
+ return (ARP_ERROR_L3_SRC_ADDRESS_LEARNED);
}
typedef enum arp_input_next_t_
@@ -248,22 +247,21 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
p0 = vlib_get_buffer (vm, pi0);
arp0 = vlib_buffer_get_current (p0);
- error0 = ETHERNET_ARP_ERROR_replies_sent;
+ error0 = ARP_ERROR_REPLIES_SENT;
next0 = ARP_INPUT_NEXT_DROP;
- error0 =
- (arp0->l2_type !=
- clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet) ?
- ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
- error0 =
- (arp0->l3_type !=
- clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
- ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0);
- error0 =
- (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ?
- ETHERNET_ARP_ERROR_l3_dst_address_unset : error0);
-
- if (ETHERNET_ARP_ERROR_replies_sent == error0)
+ error0 = (arp0->l2_type != clib_net_to_host_u16 (
+ ETHERNET_ARP_HARDWARE_TYPE_ethernet) ?
+ ARP_ERROR_L2_TYPE_NOT_ETHERNET :
+ error0);
+ error0 = (arp0->l3_type != clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
+ ARP_ERROR_L3_TYPE_NOT_IP4 :
+ error0);
+ error0 = (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ?
+ ARP_ERROR_L3_DST_ADDRESS_UNSET :
+ error0);
+
+ if (ARP_ERROR_REPLIES_SENT == error0)
{
next0 = ARP_INPUT_NEXT_DISABLED;
vnet_feature_arc_start (am->feature_arc_index,
@@ -289,23 +287,6 @@ typedef enum arp_disabled_next_t_
ARP_DISABLED_N_NEXT,
} arp_disabled_next_t;
-#define foreach_arp_disabled_error \
- _ (DISABLED, "ARP Disabled on this interface") \
-
-typedef enum
-{
-#define _(sym,string) ARP_DISABLED_ERROR_##sym,
- foreach_arp_disabled_error
-#undef _
- ARP_DISABLED_N_ERROR,
-} arp_disabled_error_t;
-
-static char *arp_disabled_error_strings[] = {
-#define _(sym,string) string,
- foreach_arp_disabled_error
-#undef _
-};
-
static uword
arp_disabled (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * frame)
@@ -332,7 +313,7 @@ arp_disabled (vlib_main_t * vm,
u32 pi0, error0;
next0 = ARP_DISABLED_NEXT_DROP;
- error0 = ARP_DISABLED_ERROR_DISABLED;
+ error0 = ARP_ERROR_DISABLED;
pi0 = to_next[0] = from[0];
from += 1;
@@ -371,7 +352,6 @@ arp_dst_fib_check (const fib_node_index_t fei, fib_entry_flag_t * flags)
const fib_entry_t *entry = fib_entry_get (fei);
const fib_entry_src_t *entry_src;
fib_source_t src;
- /* *INDENT-OFF* */
FOR_EACH_SRC_ADDED(entry, entry_src, src,
({
*flags = fib_entry_get_flags_for_source (fei, src);
@@ -380,7 +360,6 @@ arp_dst_fib_check (const fib_node_index_t fei, fib_entry_flag_t * flags)
else if (FIB_ENTRY_FLAG_CONNECTED & *flags)
return ARP_DST_FIB_CONN;
}))
- /* *INDENT-ON* */
return ARP_DST_FIB_NONE;
}
@@ -432,18 +411,22 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
eth_rx = ethernet_buffer_get_header (p0);
next0 = ARP_REPLY_NEXT_DROP;
- error0 = ETHERNET_ARP_ERROR_replies_sent;
+ error0 = ARP_ERROR_REPLIES_SENT;
sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
/* Check that IP address is local and matches incoming interface. */
fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
if (~0 == fib_index0)
{
- error0 = ETHERNET_ARP_ERROR_interface_no_table;
+ error0 = ARP_ERROR_INTERFACE_NO_TABLE;
goto drop;
}
+ dst_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
+ &arp0->ip4_over_ethernet[1].ip4, 32);
+ conn_sw_if_index0 = fib_entry_get_any_resolving_interface (dst_fei);
+
{
/*
* we're looking for FIB entries that indicate the source
@@ -476,7 +459,6 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
* flags we need, or the flags we must not have,
* is not the best source, so check then all.
*/
- /* *INDENT-OFF* */
FOR_EACH_SRC_ADDED(src_fib_entry, src, source,
({
src_flags = fib_entry_get_flags_for_source (src_fei, source);
@@ -485,36 +467,35 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
address. */
if (FIB_ENTRY_FLAG_LOCAL & src_flags)
{
- error0 = ETHERNET_ARP_ERROR_l3_src_address_is_local;
- /*
- * When VPP has an interface whose address is also
- * applied to a TAP interface on the host, then VPP's
- * TAP interface will be unnumbered to the 'real'
- * interface and do proxy ARP from the host.
- * The curious aspect of this setup is that ARP requests
- * from the host will come from the VPP's own address.
- * So don't drop immediately here, instead go see if this
- * is a proxy ARP case.
- */
- goto next_feature;
- }
- /* A Source must also be local to subnet of matching
- * interface address. */
- if ((FIB_ENTRY_FLAG_ATTACHED & src_flags) ||
- (FIB_ENTRY_FLAG_CONNECTED & src_flags))
- {
- attached = 1;
- break;
- }
- /*
- * else
- * The packet was sent from an address that is not
- * connected nor attached i.e. it is not from an
- * address that is covered by a link's sub-net,
- * nor is it a already learned host resp.
- */
+ error0 = ARP_ERROR_L3_SRC_ADDRESS_IS_LOCAL;
+ /*
+ * When VPP has an interface whose address is also
+ * applied to a TAP interface on the host, then VPP's
+ * TAP interface will be unnumbered to the 'real'
+ * interface and do proxy ARP from the host.
+ * The curious aspect of this setup is that ARP requests
+ * from the host will come from the VPP's own address.
+ * So don't drop immediately here, instead go see if this
+ * is a proxy ARP case.
+ */
+ goto next_feature;
+ }
+ /* A Source must also be local to subnet of matching
+ * interface address. */
+ if ((FIB_ENTRY_FLAG_ATTACHED & src_flags) ||
+ (FIB_ENTRY_FLAG_CONNECTED & src_flags))
+ {
+ attached = 1;
+ break;
+ }
+ /*
+ * else
+ * The packet was sent from an address that is not
+ * connected nor attached i.e. it is not from an
+ * address that is covered by a link's sub-net,
+ * nor is it a already learned host resp.
+ */
}));
- /* *INDENT-ON* */
/*
* shorter mask lookup for the next iteration.
@@ -532,24 +513,20 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
while (!attached &&
!fib_entry_is_sourced (src_fei, FIB_SOURCE_DEFAULT_ROUTE));
- if (!attached)
+ if (!attached &&
+ !arp_unnumbered (p0, sw_if_index0, conn_sw_if_index0))
{
/*
- * the matching route is a not attached, i.e. it was
- * added as a result of routing, rather than interface/ARP
- * configuration. If the matching route is not a host route
- * (i.e. a /32)
+ * the matching route is a not attached and not unnumbered,
+ * i.e. it was added as a result of routing, rather than
+ * interface/ARP configuration. If the matching route is not
+ * a host route (i.e. a /32)
*/
- error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local;
+ error0 = ARP_ERROR_L3_SRC_ADDRESS_NOT_LOCAL;
goto drop;
}
}
- dst_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
- &arp0->ip4_over_ethernet[1].ip4,
- 32);
- conn_sw_if_index0 = fib_entry_get_any_resolving_interface (dst_fei);
-
switch (arp_dst_fib_check (dst_fei, &dst_flags))
{
case ARP_DST_FIB_ADJ:
@@ -562,18 +539,24 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
* blow our ARP cache
*/
if (conn_sw_if_index0 != sw_if_index0)
- error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
+ error0 = ARP_ERROR_L3_DST_ADDRESS_NOT_LOCAL;
else if (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
arp0->ip4_over_ethernet[1].ip4.as_u32)
- error0 = arp_learn (sw_if_index0,
- &arp0->ip4_over_ethernet[0]);
- goto drop;
+ {
+ vlib_increment_simple_counter (
+ &ip_neighbor_counters[AF_IP4]
+ .ipnc[VLIB_RX][IP_NEIGHBOR_CTR_GRAT],
+ vm->thread_index, sw_if_index0, 1);
+ error0 =
+ arp_learn (sw_if_index0, &arp0->ip4_over_ethernet[0]);
+ }
+ goto next_feature;
case ARP_DST_FIB_CONN:
/* destination is connected, continue to process */
break;
case ARP_DST_FIB_NONE:
/* destination is not connected, stop here */
- error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
+ error0 = ARP_ERROR_L3_DST_ADDRESS_NOT_LOCAL;
goto next_feature;
}
@@ -596,10 +579,18 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
(eth_rx->src_address,
arp0->ip4_over_ethernet[0].mac.bytes) && !is_vrrp_reply0)
{
- error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
+ error0 = ARP_ERROR_L2_ADDRESS_MISMATCH;
goto drop;
}
+ vlib_increment_simple_counter (
+ &ip_neighbor_counters[AF_IP4]
+ .ipnc[VLIB_RX][arp0->opcode == clib_host_to_net_u16 (
+ ETHERNET_ARP_OPCODE_reply) ?
+ IP_NEIGHBOR_CTR_REPLY :
+ IP_NEIGHBOR_CTR_REQUEST],
+ vm->thread_index, sw_if_index0, 1);
+
/* Learn or update sender's mapping only for replies to addresses
* that are local to the subnet */
if (arp0->opcode ==
@@ -612,7 +603,7 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
/* a reply for a non-local destination could be a GARP.
* GARPs for hosts we know were handled above, so this one
* we drop */
- error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
+ error0 = ARP_ERROR_L3_DST_ADDRESS_NOT_LOCAL;
goto next_feature;
}
@@ -628,37 +619,38 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
sw_if_index0 != fib_entry_get_resolving_interface (src_fei))
{
/*
- * The interface the ARP is sent to or was received on is not the
- * interface on which the covering prefix is configured.
- * Maybe this is a case for unnumbered.
+ * The interface the ARP is sent to or was received on is
+ * not the interface on which the covering prefix is
+ * configured. Maybe this is a case for unnumbered.
*/
if (!arp_unnumbered (p0, sw_if_index0, conn_sw_if_index0))
{
- error0 = ETHERNET_ARP_ERROR_unnumbered_mismatch;
+ error0 = ARP_ERROR_UNNUMBERED_MISMATCH;
goto drop;
}
}
if (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
arp0->ip4_over_ethernet[1].ip4.as_u32)
{
- error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
+ error0 = ARP_ERROR_GRATUITOUS_ARP;
goto drop;
}
- next0 = arp_mk_reply (vnm, p0, sw_if_index0,
- if_addr0, arp0, eth_rx);
+ next0 = arp_mk_reply (vnm, p0, sw_if_index0, if_addr0, arp0, eth_rx);
/* We are going to reply to this request, so, in the absence of
errors, learn the sender */
if (!error0)
error0 = arp_learn (sw_if_index0, &arp0->ip4_over_ethernet[1]);
+ vlib_increment_simple_counter (
+ &ip_neighbor_counters[AF_IP4].ipnc[VLIB_TX][IP_NEIGHBOR_CTR_REPLY],
+ vm->thread_index, sw_if_index0, 1);
n_replies_sent += 1;
goto enqueue;
next_feature:
vnet_feature_next (&next0, p0);
- goto enqueue;
drop:
p0->error = node->errors[error0];
@@ -671,28 +663,21 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
- vlib_error_count (vm, node->node_index,
- ETHERNET_ARP_ERROR_replies_sent, n_replies_sent);
+ vlib_error_count (vm, node->node_index, ARP_ERROR_REPLIES_SENT,
+ n_replies_sent);
return frame->n_vectors;
}
-static char *ethernet_arp_error_strings[] = {
-#define _(sym,string) string,
- foreach_ethernet_arp_error
-#undef _
-};
-
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (arp_input_node, static) =
{
.function = arp_input,
.name = "arp-input",
.vector_size = sizeof (u32),
- .n_errors = ETHERNET_ARP_N_ERROR,
- .error_strings = ethernet_arp_error_strings,
+ .n_errors = ARP_N_ERROR,
+ .error_counters = arp_error_counters,
.n_next_nodes = ARP_INPUT_N_NEXT,
.next_nodes = {
[ARP_INPUT_NEXT_DROP] = "error-drop",
@@ -707,8 +692,8 @@ VLIB_REGISTER_NODE (arp_disabled_node, static) =
.function = arp_disabled,
.name = "arp-disabled",
.vector_size = sizeof (u32),
- .n_errors = ARP_DISABLED_N_ERROR,
- .error_strings = arp_disabled_error_strings,
+ .n_errors = ARP_N_ERROR,
+ .error_counters = arp_error_counters,
.n_next_nodes = ARP_DISABLED_N_NEXT,
.next_nodes = {
[ARP_INPUT_NEXT_DROP] = "error-drop",
@@ -722,8 +707,8 @@ VLIB_REGISTER_NODE (arp_reply_node, static) =
.function = arp_reply,
.name = "arp-reply",
.vector_size = sizeof (u32),
- .n_errors = ETHERNET_ARP_N_ERROR,
- .error_strings = ethernet_arp_error_strings,
+ .n_errors = ARP_N_ERROR,
+ .error_counters = arp_error_counters,
.n_next_nodes = ARP_REPLY_N_NEXT,
.next_nodes = {
[ARP_REPLY_NEXT_DROP] = "error-drop",
@@ -771,7 +756,6 @@ VNET_FEATURE_INIT (arp_drop_feat_node, static) =
.runs_before = 0, /* last feature */
};
-/* *INDENT-ON* */
typedef struct
{
@@ -870,7 +854,7 @@ VNET_SW_INTERFACE_ADD_DEL_FUNCTION (vnet_arp_add_del_sw_interface);
const static ip_neighbor_vft_t arp_vft = {
.inv_proxy4_add = arp_proxy_add,
.inv_proxy4_del = arp_proxy_del,
- .inv_proxy4_enable = arp_proxy_disable,
+ .inv_proxy4_enable = arp_proxy_enable,
.inv_proxy4_disable = arp_proxy_disable,
};
@@ -896,12 +880,39 @@ ethernet_arp_init (vlib_main_t * vm)
vlib_node_runtime_t *rt =
vlib_node_get_runtime (vm, arp_input_node.index);
-#define _(a,b) \
- vnet_pcap_drop_trace_filter_add_del \
- (rt->errors[ETHERNET_ARP_ERROR_##a], \
- 1 /* is_add */);
- foreach_ethernet_arp_error
-#undef _
+ vnet_pcap_drop_trace_filter_add_del (rt->errors[ARP_ERROR_REPLIES_SENT],
+ 1);
+ vnet_pcap_drop_trace_filter_add_del (rt->errors[ARP_ERROR_DISABLED], 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_L2_TYPE_NOT_ETHERNET], 1);
+ vnet_pcap_drop_trace_filter_add_del (rt->errors[ARP_ERROR_L3_TYPE_NOT_IP4],
+ 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_L3_SRC_ADDRESS_NOT_LOCAL], 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_L3_DST_ADDRESS_NOT_LOCAL], 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_L3_DST_ADDRESS_UNSET], 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_L3_SRC_ADDRESS_IS_LOCAL], 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_L3_SRC_ADDRESS_LEARNED], 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_REPLIES_RECEIVED], 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_OPCODE_NOT_REQUEST], 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_PROXY_ARP_REPLIES_SENT], 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_L2_ADDRESS_MISMATCH], 1);
+ vnet_pcap_drop_trace_filter_add_del (rt->errors[ARP_ERROR_GRATUITOUS_ARP],
+ 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_INTERFACE_NO_TABLE], 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_INTERFACE_NOT_IP_ENABLED], 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_UNNUMBERED_MISMATCH], 1);
}
{
@@ -916,13 +927,11 @@ ethernet_arp_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ethernet_arp_init) =
{
.runs_after = VLIB_INITS("ethernet_init",
"ip_neighbor_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/arp/arp.h b/src/vnet/arp/arp.h
index 7446564b0cf..f8cab8ae78d 100644
--- a/src/vnet/arp/arp.h
+++ b/src/vnet/arp/arp.h
@@ -19,32 +19,7 @@
#include <vnet/ethernet/ethernet.h>
#include <vnet/ip/ip.h>
#include <vnet/ethernet/arp_packet.h>
-
-#define foreach_ethernet_arp_error \
- _ (replies_sent, "ARP replies sent") \
- _ (l2_type_not_ethernet, "L2 type not ethernet") \
- _ (l3_type_not_ip4, "L3 type not IP4") \
- _ (l3_src_address_not_local, "IP4 source address not local to subnet") \
- _ (l3_dst_address_not_local, "IP4 destination address not local to subnet") \
- _ (l3_dst_address_unset, "IP4 destination address is unset") \
- _ (l3_src_address_is_local, "IP4 source address matches local interface") \
- _ (l3_src_address_learned, "ARP request IP4 source address learned") \
- _ (replies_received, "ARP replies received") \
- _ (opcode_not_request, "ARP opcode not request") \
- _ (proxy_arp_replies_sent, "Proxy ARP replies sent") \
- _ (l2_address_mismatch, "ARP hw addr does not match L2 frame src addr") \
- _ (gratuitous_arp, "ARP probe or announcement dropped") \
- _ (interface_no_table, "Interface is not mapped to an IP table") \
- _ (interface_not_ip_enabled, "Interface is not IP enabled") \
- _ (unnumbered_mismatch, "RX interface is unnumbered to different subnet") \
-
-typedef enum
-{
-#define _(sym,string) ETHERNET_ARP_ERROR_##sym,
- foreach_ethernet_arp_error
-#undef _
- ETHERNET_ARP_N_ERROR,
-} ethernet_arp_reply_error_t;
+#include <vnet/arp/arp.api_enum.h>
extern int arp_proxy_add (u32 fib_index,
const ip4_address_t * lo_addr,
diff --git a/src/vnet/arp/arp_packet.h b/src/vnet/arp/arp_packet.h
index a860c258f75..66ab384a33e 100644
--- a/src/vnet/arp/arp_packet.h
+++ b/src/vnet/arp/arp_packet.h
@@ -68,6 +68,8 @@ arp_mk_reply (vnet_main_t * vnm,
clib_mem_unaligned (&arp0->ip4_over_ethernet[0].ip4.data_u32, u32) =
if_addr0->data_u32;
+ p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+
/* Hardware must be ethernet-like. */
ASSERT (vec_len (hw_if0->hw_address) == 6);
diff --git a/src/vnet/arp/arp_proxy.c b/src/vnet/arp/arp_proxy.c
index e3f5b4ae67b..39f624d5a1d 100644
--- a/src/vnet/arp/arp_proxy.c
+++ b/src/vnet/arp/arp_proxy.c
@@ -223,7 +223,6 @@ set_arp_proxy (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
/*?
* Enable proxy-arp on an interface. The vpp stack will answer ARP
* requests for the indicated address range. Multiple proxy-arp
@@ -249,15 +248,12 @@ VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = {
"set interface proxy-arp <intfc> [enable|disable]",
.function = set_int_proxy_arp_command_fn,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_arp_proxy_command, static) = {
.path = "set arp proxy",
.short_help = "set arp proxy [del] table-ID <table-ID> start <start-address> end <end-addres>",
.function = set_arp_proxy,
};
-/* *INDENT-ON* */
typedef struct
{
@@ -326,14 +322,14 @@ arp_proxy (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
is_request0 = arp0->opcode
== clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request);
- error0 = ETHERNET_ARP_ERROR_replies_sent;
+ error0 = ARP_ERROR_REPLIES_SENT;
sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
next0 = ARP_REPLY_NEXT_DROP;
fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
if (~0 == fib_index0)
{
- error0 = ETHERNET_ARP_ERROR_interface_no_table;
+ error0 = ARP_ERROR_INTERFACE_NO_TABLE;
}
if (0 == error0 && is_request0)
@@ -376,28 +372,28 @@ arp_proxy (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
- vlib_error_count (vm, node->node_index,
- ETHERNET_ARP_ERROR_replies_sent, n_arp_replies_sent);
+ vlib_error_count (vm, node->node_index, ARP_ERROR_REPLIES_SENT,
+ n_arp_replies_sent);
return frame->n_vectors;
}
-static char *ethernet_arp_error_strings[] = {
-#define _(sym,string) string,
- foreach_ethernet_arp_error
-#undef _
-};
-
VLIB_REGISTER_NODE (arp_proxy_node, static) =
{
- .function = arp_proxy,.name = "arp-proxy",.vector_size =
- sizeof (u32),.n_errors = ETHERNET_ARP_N_ERROR,.error_strings =
- ethernet_arp_error_strings,.n_next_nodes = ARP_REPLY_N_NEXT,.next_nodes =
+ .function = arp_proxy,
+ .name = "arp-proxy",
+ .vector_size = sizeof (u32),
+ .n_errors = ARP_N_ERROR,
+ .error_counters = arp_error_counters,
+ .n_next_nodes = ARP_REPLY_N_NEXT,
+ .next_nodes =
{
- [ARP_REPLY_NEXT_DROP] = "error-drop",
- [ARP_REPLY_NEXT_REPLY_TX] = "interface-output",}
-,.format_buffer = format_ethernet_arp_header,.format_trace =
- format_ethernet_arp_input_trace,};
+ [ARP_REPLY_NEXT_DROP] = "error-drop",
+ [ARP_REPLY_NEXT_REPLY_TX] = "interface-output",
+ },
+ .format_buffer = format_ethernet_arp_header,
+ .format_trace = format_ethernet_arp_input_trace,
+};
static clib_error_t *
show_ip4_arp (vlib_main_t * vm,
@@ -435,13 +431,11 @@ show_ip4_arp (vlib_main_t * vm,
* Fib_index 0 6.0.0.1 - 6.0.0.11
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip4_arp_command, static) = {
.path = "show arp proxy",
.function = show_ip4_arp,
.short_help = "show ip arp",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/bfd/bfd.api b/src/vnet/bfd/bfd.api
index f53cc7630fd..d3b3ed21a26 100644
--- a/src/vnet/bfd/bfd.api
+++ b/src/vnet/bfd/bfd.api
@@ -107,6 +107,26 @@ autoreply define bfd_udp_add
u8 bfd_key_id;
u32 conf_key_id;
};
+define bfd_udp_upd
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ u32 desired_min_tx;
+ u32 required_min_rx;
+ vl_api_address_t local_addr;
+ vl_api_address_t peer_addr;
+ u8 detect_mult;
+ bool is_authenticated;
+ u8 bfd_key_id;
+ u32 conf_key_id;
+};
+define bfd_udp_upd_reply
+{
+ u32 context;
+ i32 retval;
+ u32 stats_index;
+};
/** \brief Modify UDP BFD session on interface
@param client_index - opaque cookie to identify the sender
@@ -339,6 +359,107 @@ autoreply define bfd_udp_auth_deactivate
bool is_delayed;
};
+/* must be compatible with bfd_error_t */
+counters bfd_udp {
+ none {
+ severity info;
+ type counter64;
+ units "packets";
+ description "OK";
+ };
+ bad {
+ severity error;
+ type counter64;
+ units "packets";
+ description "bad packet";
+ };
+ disabled {
+ severity error;
+ type counter64;
+ units "packets";
+ description "bfd packets received on disabled interfaces";
+ };
+ version {
+ severity error;
+ type counter64;
+ units "packets";
+ description "version";
+ };
+ length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "too short";
+ };
+ detect_multi {
+ severity error;
+ type counter64;
+ units "packets";
+ description "detect-multi";
+ };
+ multi_point {
+ severity error;
+ type counter64;
+ units "packets";
+ description "multi-point";
+ };
+ my_disc {
+ severity error;
+ type counter64;
+ units "packets";
+ description "my-disc";
+ };
+ your_disc {
+ severity error;
+ type counter64;
+ units "packets";
+ description "your-disc";
+ };
+ admin_down {
+ severity error;
+ type counter64;
+ units "packets";
+ description "session admin-down";
+ };
+ no_session {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no-session";
+ };
+ failed_verification {
+ severity error;
+ type counter64;
+ units "packets";
+ description "failed-verification";
+ };
+ src_mismatch {
+ severity error;
+ type counter64;
+ units "packets";
+ description "src-mismatch";
+ };
+ dst_mismatch {
+ severity error;
+ type counter64;
+ units "packets";
+ description "dst-mismatch";
+ };
+ ttl {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ttl";
+ };
+};
+
+paths {
+ "/err/bfd-udp4-input" "bfd";
+ "/err/bfd-udp6-input" "bfd";
+ "/err/bfd-udp4-echo-input" "bfd";
+ "/err/bfd-udp6-echo-input" "bfd";
+};
+
/*
* Local Variables:
* eval: (c-set-style "gnu")
diff --git a/src/vnet/bfd/bfd_api.c b/src/vnet/bfd/bfd_api.c
index 0ae8508f865..816e71081ff 100644
--- a/src/vnet/bfd/bfd_api.c
+++ b/src/vnet/bfd/bfd_api.c
@@ -71,6 +71,27 @@ vl_api_bfd_udp_add_t_handler (vl_api_bfd_udp_add_t * mp)
}
static void
+vl_api_bfd_udp_upd_t_handler (vl_api_bfd_udp_add_t *mp)
+{
+ vl_api_bfd_udp_upd_reply_t *rmp;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ BFD_UDP_API_PARAM_COMMON_CODE;
+
+ rv = bfd_udp_upd_session (
+ BFD_UDP_API_PARAM_FROM_MP (mp), clib_net_to_host_u32 (mp->desired_min_tx),
+ clib_net_to_host_u32 (mp->required_min_rx), mp->detect_mult,
+ mp->is_authenticated, clib_net_to_host_u32 (mp->conf_key_id),
+ mp->bfd_key_id);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO2 (VL_API_BFD_UDP_UPD_REPLY,
+ ({ rmp->stats_index = clib_host_to_net_u32 (0); }));
+}
+
+static void
vl_api_bfd_udp_mod_t_handler (vl_api_bfd_udp_mod_t * mp)
{
vl_api_bfd_udp_mod_reply_t *rmp;
@@ -196,7 +217,6 @@ bfd_event (bfd_main_t * bm, bfd_session_t * bs)
vpe_api_main_t *vam = &vpe_api_main;
vpe_client_registration_t *reg;
vl_api_registration_t *vl_reg;
- /* *INDENT-OFF* */
pool_foreach (reg, vam->bfd_events_registrations) {
vl_reg = vl_api_client_index_to_registration (reg->client_index);
if (vl_reg)
@@ -210,7 +230,6 @@ bfd_event (bfd_main_t * bm, bfd_session_t * bs)
}
}
}
- /* *INDENT-ON* */
}
static void
@@ -223,13 +242,11 @@ vl_api_bfd_udp_session_dump_t_handler (vl_api_bfd_udp_session_dump_t * mp)
return;
bfd_session_t *bs = NULL;
- /* *INDENT-OFF* */
pool_foreach (bs, bfd_main.sessions) {
if (bs->transport == BFD_TRANSPORT_UDP4 ||
bs->transport == BFD_TRANSPORT_UDP6)
send_bfd_udp_session_details (reg, mp->context, bs);
}
- /* *INDENT-ON* */
}
static void
@@ -280,7 +297,6 @@ vl_api_bfd_auth_keys_dump_t_handler (vl_api_bfd_auth_keys_dump_t * mp)
bfd_auth_key_t *key = NULL;
vl_api_bfd_auth_keys_details_t *rmp = NULL;
- /* *INDENT-OFF* */
pool_foreach (key, bfd_main.auth_keys) {
rmp = vl_msg_api_alloc (sizeof (*rmp));
clib_memset (rmp, 0, sizeof (*rmp));
@@ -291,7 +307,6 @@ vl_api_bfd_auth_keys_dump_t_handler (vl_api_bfd_auth_keys_dump_t * mp)
rmp->use_count = clib_host_to_net_u32 (key->use_count);
vl_api_send_msg (reg, (u8 *)rmp);
}
- /* *INDENT-ON* */
}
static void
@@ -373,7 +388,6 @@ vl_api_bfd_udp_get_echo_source_t_handler (vl_api_bfd_udp_get_echo_source_t *
bfd_udp_get_echo_source (&is_set, &sw_if_index, &have_usable_ip4, &ip4,
&have_usable_ip6, &ip6);
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_BFD_UDP_GET_ECHO_SOURCE_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
@@ -407,7 +421,6 @@ vl_api_bfd_udp_get_echo_source_t_handler (vl_api_bfd_udp_get_echo_source_t *
rmp->have_usable_ip6 = false;
}
}))
- /* *INDENT-ON* */
}
#include <vnet/bfd/bfd.api.c>
diff --git a/src/vnet/bfd/bfd_api.h b/src/vnet/bfd/bfd_api.h
index 2a6c69b78b6..f051e6b679c 100644
--- a/src/vnet/bfd/bfd_api.h
+++ b/src/vnet/bfd/bfd_api.h
@@ -45,6 +45,15 @@ bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr,
u8 bfd_key_id);
/**
+ * @brief create a new or modify and existing bfd session
+ */
+vnet_api_error_t
+bfd_udp_upd_session (u32 sw_if_index, const ip46_address_t *local_addr,
+ const ip46_address_t *peer_addr, u32 desired_min_tx_usec,
+ u32 required_min_rx_usec, u8 detect_mult,
+ u8 is_authenticated, u32 conf_key_id, u8 bfd_key_id);
+
+/**
* @brief modify existing session
*/
vnet_api_error_t
diff --git a/src/vnet/bfd/bfd_cli.c b/src/vnet/bfd/bfd_cli.c
index 1d100b077eb..33942bb89e6 100644
--- a/src/vnet/bfd/bfd_cli.c
+++ b/src/vnet/bfd/bfd_cli.c
@@ -134,12 +134,10 @@ show_bfd (vlib_main_t * vm, unformat_input_t * input,
bfd_auth_key_t *key = NULL;
u8 *s = format (NULL, "%=10s %=25s %=10s\n", "Configuration Key ID",
"Type", "Use Count");
- /* *INDENT-OFF* */
pool_foreach (key, bm->auth_keys) {
s = format (s, "%10u %-25s %10u\n", key->conf_key_id,
bfd_auth_type_str (key->auth_type), key->use_count);
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "%v\n", s);
vec_free (s);
vlib_cli_output (vm, "Number of configured BFD keys: %lu\n",
@@ -149,11 +147,9 @@ show_bfd (vlib_main_t * vm, unformat_input_t * input,
{
u8 *s = format (NULL, "%=10s %=32s %=20s %=20s\n", "Index", "Property",
"Local value", "Remote value");
- /* *INDENT-OFF* */
pool_foreach (bs, bm->sessions) {
s = format (s, "%U", format_bfd_session_cli, vm, bs);
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "%v", s);
vec_free (s);
vlib_cli_output (vm, "Number of configured BFD sessions: %lu\n",
@@ -212,13 +208,11 @@ show_bfd (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_bfd_command, static) = {
.path = "show bfd",
.short_help = "show bfd [keys|sessions|echo-source]",
.function = show_bfd,
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_cli_key_add (vlib_main_t * vm, unformat_input_t * input,
@@ -310,7 +304,6 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_key_add_command, static) = {
.path = "bfd key set",
.short_help = "bfd key set"
@@ -319,7 +312,6 @@ VLIB_CLI_COMMAND (bfd_cli_key_add_command, static) = {
" secret <secret>",
.function = bfd_cli_key_add,
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_cli_key_del (vlib_main_t * vm, unformat_input_t * input,
@@ -355,13 +347,11 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_key_del_command, static) = {
.path = "bfd key del",
.short_help = "bfd key del conf-key-id <id>",
.function = bfd_cli_key_del,
};
-/* *INDENT-ON* */
#define INTERFACE_STR "interface"
#define LOCAL_ADDR_STR "local-addr"
@@ -397,23 +387,30 @@ WARN_OFF(tautological-compare) \
goto out; \
}
+static uword
+bfd_cli_unformat_ip46_address (unformat_input_t *input, va_list *args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ return unformat_user (input, unformat_ip46_address, ip46, IP46_TYPE_ANY);
+}
+
static clib_error_t *
bfd_cli_udp_session_add (vlib_main_t * vm, unformat_input_t * input,
CLIB_UNUSED (vlib_cli_command_t * lmd))
{
clib_error_t *ret = NULL;
unformat_input_t _line_input, *line_input = &_line_input;
-#define foreach_bfd_cli_udp_session_add_cli_param(F) \
- F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
- unformat_vnet_sw_interface, &vnet_main) \
- F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (u32, desired_min_tx, DESIRED_MIN_TX_STR, mandatory, "%u") \
- F (u32, required_min_rx, REQUIRED_MIN_RX_STR, mandatory, "%u") \
- F (u32, detect_mult, DETECT_MULT_STR, mandatory, "%u") \
- F (u32, conf_key_id, CONF_KEY_ID_STR, optional, "%u") \
+#define foreach_bfd_cli_udp_session_add_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main) \
+ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (u32, desired_min_tx, DESIRED_MIN_TX_STR, mandatory, "%u") \
+ F (u32, required_min_rx, REQUIRED_MIN_RX_STR, mandatory, "%u") \
+ F (u32, detect_mult, DETECT_MULT_STR, mandatory, "%u") \
+ F (u32, conf_key_id, CONF_KEY_ID_STR, optional, "%u") \
F (u32, bfd_key_id, BFD_KEY_ID_STR, optional, "%u")
foreach_bfd_cli_udp_session_add_cli_param (DECLARE);
@@ -477,7 +474,6 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_udp_session_add_command, static) = {
.path = "bfd udp session add",
.short_help = "bfd udp session add"
@@ -493,7 +489,6 @@ VLIB_CLI_COMMAND (bfd_cli_udp_session_add_command, static) = {
"]",
.function = bfd_cli_udp_session_add,
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_cli_udp_session_mod (vlib_main_t * vm, unformat_input_t * input,
@@ -501,15 +496,15 @@ bfd_cli_udp_session_mod (vlib_main_t * vm, unformat_input_t * input,
{
clib_error_t *ret = NULL;
unformat_input_t _line_input, *line_input = &_line_input;
-#define foreach_bfd_cli_udp_session_mod_cli_param(F) \
- F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
- unformat_vnet_sw_interface, &vnet_main) \
- F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (u32, desired_min_tx, DESIRED_MIN_TX_STR, mandatory, "%u") \
- F (u32, required_min_rx, REQUIRED_MIN_RX_STR, mandatory, "%u") \
+#define foreach_bfd_cli_udp_session_mod_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main) \
+ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (u32, desired_min_tx, DESIRED_MIN_TX_STR, mandatory, "%u") \
+ F (u32, required_min_rx, REQUIRED_MIN_RX_STR, mandatory, "%u") \
F (u32, detect_mult, DETECT_MULT_STR, mandatory, "%u")
foreach_bfd_cli_udp_session_mod_cli_param (DECLARE);
@@ -556,7 +551,6 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_udp_session_mod_command, static) = {
.path = "bfd udp session mod",
.short_help = "bfd udp session mod interface"
@@ -568,7 +562,6 @@ VLIB_CLI_COMMAND (bfd_cli_udp_session_mod_command, static) = {
" <detect multiplier> ",
.function = bfd_cli_udp_session_mod,
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_cli_udp_session_del (vlib_main_t * vm, unformat_input_t * input,
@@ -576,13 +569,13 @@ bfd_cli_udp_session_del (vlib_main_t * vm, unformat_input_t * input,
{
clib_error_t *ret = NULL;
unformat_input_t _line_input, *line_input = &_line_input;
-#define foreach_bfd_cli_udp_session_del_cli_param(F) \
- F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
- unformat_vnet_sw_interface, &vnet_main) \
- F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address)
+#define foreach_bfd_cli_udp_session_del_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main) \
+ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address)
foreach_bfd_cli_udp_session_del_cli_param (DECLARE);
@@ -620,7 +613,6 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_udp_session_del_command, static) = {
.path = "bfd udp session del",
.short_help = "bfd udp session del interface"
@@ -629,7 +621,6 @@ VLIB_CLI_COMMAND (bfd_cli_udp_session_del_command, static) = {
"<peer-address> ",
.function = bfd_cli_udp_session_del,
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_cli_udp_session_set_flags (vlib_main_t * vm, unformat_input_t * input,
@@ -637,14 +628,14 @@ bfd_cli_udp_session_set_flags (vlib_main_t * vm, unformat_input_t * input,
{
clib_error_t *ret = NULL;
unformat_input_t _line_input, *line_input = &_line_input;
-#define foreach_bfd_cli_udp_session_set_flags_cli_param(F) \
- F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
- unformat_vnet_sw_interface, &vnet_main) \
- F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (u8 *, admin_up_down_token, ADMIN_STR, mandatory, "%v", \
+#define foreach_bfd_cli_udp_session_set_flags_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main) \
+ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (u8 *, admin_up_down_token, ADMIN_STR, mandatory, "%v", \
&admin_up_down_token)
foreach_bfd_cli_udp_session_set_flags_cli_param (DECLARE);
@@ -702,7 +693,6 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_udp_session_set_flags_command, static) = {
.path = "bfd udp session set-flags",
.short_help = "bfd udp session set-flags"
@@ -712,7 +702,6 @@ VLIB_CLI_COMMAND (bfd_cli_udp_session_set_flags_command, static) = {
" admin <up|down>",
.function = bfd_cli_udp_session_set_flags,
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_cli_udp_session_auth_activate (vlib_main_t * vm,
@@ -721,15 +710,15 @@ bfd_cli_udp_session_auth_activate (vlib_main_t * vm,
{
clib_error_t *ret = NULL;
unformat_input_t _line_input, *line_input = &_line_input;
-#define foreach_bfd_cli_udp_session_auth_activate_cli_param(F) \
- F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
- unformat_vnet_sw_interface, &vnet_main) \
- F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (u8 *, delayed_token, DELAYED_STR, optional, "%v") \
- F (u32, conf_key_id, CONF_KEY_ID_STR, mandatory, "%u") \
+#define foreach_bfd_cli_udp_session_auth_activate_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main) \
+ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (u8 *, delayed_token, DELAYED_STR, optional, "%v") \
+ F (u32, conf_key_id, CONF_KEY_ID_STR, mandatory, "%u") \
F (u32, bfd_key_id, BFD_KEY_ID_STR, mandatory, "%u")
foreach_bfd_cli_udp_session_auth_activate_cli_param (DECLARE);
@@ -799,7 +788,6 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_udp_session_auth_activate_command, static) = {
.path = "bfd udp session auth activate",
.short_help = "bfd udp session auth activate"
@@ -818,13 +806,13 @@ bfd_cli_udp_session_auth_deactivate (vlib_main_t *vm, unformat_input_t *input,
{
clib_error_t *ret = NULL;
unformat_input_t _line_input, *line_input = &_line_input;
-#define foreach_bfd_cli_udp_session_auth_deactivate_cli_param(F) \
- F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
- unformat_vnet_sw_interface, &vnet_main) \
- F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
+#define foreach_bfd_cli_udp_session_auth_deactivate_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main) \
+ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
F (u8 *, delayed_token, DELAYED_STR, optional, "%v")
foreach_bfd_cli_udp_session_auth_deactivate_cli_param (DECLARE);
@@ -884,7 +872,6 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_udp_session_auth_deactivate_command, static) = {
.path = "bfd udp session auth deactivate",
.short_help = "bfd udp session auth deactivate"
@@ -894,7 +881,6 @@ VLIB_CLI_COMMAND (bfd_cli_udp_session_auth_deactivate_command, static) = {
"[ delayed <yes|no> ]",
.function = bfd_cli_udp_session_auth_deactivate,
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_cli_udp_set_echo_source (vlib_main_t * vm, unformat_input_t * input,
@@ -941,13 +927,11 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_udp_set_echo_source_cmd, static) = {
.path = "bfd udp echo-source set",
.short_help = "bfd udp echo-source set interface <interface>",
.function = bfd_cli_udp_set_echo_source,
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_cli_udp_del_echo_source (vlib_main_t * vm, unformat_input_t * input,
@@ -964,13 +948,11 @@ bfd_cli_udp_del_echo_source (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_udp_del_echo_source_cmd, static) = {
.path = "bfd udp echo-source del",
.short_help = "bfd udp echo-source del",
.function = bfd_cli_udp_del_echo_source,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c
index 27616db3deb..1423da91158 100644
--- a/src/vnet/bfd/bfd_main.c
+++ b/src/vnet/bfd/bfd_main.c
@@ -30,14 +30,25 @@
#include <vlib/log.h>
#include <vnet/crypto/crypto.h>
+static void
+bfd_validate_counters (bfd_main_t *bm)
+{
+ vlib_validate_combined_counter (&bm->rx_counter, pool_elts (bm->sessions));
+ vlib_validate_combined_counter (&bm->rx_echo_counter,
+ pool_elts (bm->sessions));
+ vlib_validate_combined_counter (&bm->tx_counter, pool_elts (bm->sessions));
+ vlib_validate_combined_counter (&bm->tx_echo_counter,
+ pool_elts (bm->sessions));
+}
+
static u64
bfd_calc_echo_checksum (u32 discriminator, u64 expire_time, u32 secret)
{
u64 checksum = 0;
#if defined(clib_crc32c_uses_intrinsics) && !defined (__i386__)
- checksum = crc32_u64 (0, discriminator);
- checksum = crc32_u64 (checksum, expire_time);
- checksum = crc32_u64 (checksum, secret);
+ checksum = clib_crc32c_u64 (0, discriminator);
+ checksum = clib_crc32c_u64 (checksum, expire_time);
+ checksum = clib_crc32c_u64 (checksum, secret);
#else
checksum = clib_xxhash (discriminator ^ expire_time ^ secret);
#endif
@@ -172,7 +183,7 @@ bfd_set_poll_state (bfd_session_t * bs, bfd_poll_state_e state)
}
static void
-bfd_recalc_tx_interval (bfd_main_t * bm, bfd_session_t * bs)
+bfd_recalc_tx_interval (bfd_session_t *bs)
{
bs->transmit_interval_nsec =
clib_max (bs->effective_desired_min_tx_nsec, bs->remote_min_rx_nsec);
@@ -181,7 +192,7 @@ bfd_recalc_tx_interval (bfd_main_t * bm, bfd_session_t * bs)
}
static void
-bfd_recalc_echo_tx_interval (bfd_main_t * bm, bfd_session_t * bs)
+bfd_recalc_echo_tx_interval (bfd_session_t *bs)
{
bs->echo_transmit_interval_nsec =
clib_max (bs->effective_desired_min_tx_nsec, bs->remote_min_echo_rx_nsec);
@@ -240,7 +251,7 @@ bfd_calc_next_tx (bfd_main_t * bm, bfd_session_t * bs, u64 now)
}
static void
-bfd_calc_next_echo_tx (bfd_main_t * bm, bfd_session_t * bs, u64 now)
+bfd_calc_next_echo_tx (bfd_session_t *bs, u64 now)
{
bs->echo_tx_timeout_nsec =
bs->echo_last_tx_nsec + bs->echo_transmit_interval_nsec;
@@ -261,7 +272,7 @@ bfd_calc_next_echo_tx (bfd_main_t * bm, bfd_session_t * bs, u64 now)
}
static void
-bfd_recalc_detection_time (bfd_main_t * bm, bfd_session_t * bs)
+bfd_recalc_detection_time (bfd_session_t *bs)
{
if (bs->local_state == BFD_STATE_init || bs->local_state == BFD_STATE_up)
{
@@ -385,26 +396,24 @@ bfd_set_effective_desired_min_tx (bfd_main_t * bm,
bs->effective_desired_min_tx_nsec = desired_min_tx_nsec;
BFD_DBG ("Set effective desired min tx to " BFD_CLK_FMT,
BFD_CLK_PRN (bs->effective_desired_min_tx_nsec));
- bfd_recalc_detection_time (bm, bs);
- bfd_recalc_tx_interval (bm, bs);
- bfd_recalc_echo_tx_interval (bm, bs);
+ bfd_recalc_detection_time (bs);
+ bfd_recalc_tx_interval (bs);
+ bfd_recalc_echo_tx_interval (bs);
bfd_calc_next_tx (bm, bs, now);
}
static void
-bfd_set_effective_required_min_rx (bfd_main_t * bm,
- bfd_session_t * bs,
- u64 required_min_rx_nsec)
+bfd_set_effective_required_min_rx (bfd_session_t *bs, u64 required_min_rx_nsec)
{
bs->effective_required_min_rx_nsec = required_min_rx_nsec;
BFD_DBG ("Set effective required min rx to " BFD_CLK_FMT,
BFD_CLK_PRN (bs->effective_required_min_rx_nsec));
- bfd_recalc_detection_time (bm, bs);
+ bfd_recalc_detection_time (bs);
}
static void
-bfd_set_remote_required_min_rx (bfd_main_t * bm, bfd_session_t * bs,
- u64 now, u32 remote_required_min_rx_usec)
+bfd_set_remote_required_min_rx (bfd_session_t *bs,
+ u32 remote_required_min_rx_usec)
{
if (bs->remote_min_rx_usec != remote_required_min_rx_usec)
{
@@ -412,14 +421,13 @@ bfd_set_remote_required_min_rx (bfd_main_t * bm, bfd_session_t * bs,
bs->remote_min_rx_nsec = bfd_usec_to_nsec (remote_required_min_rx_usec);
BFD_DBG ("Set remote min rx to " BFD_CLK_FMT,
BFD_CLK_PRN (bs->remote_min_rx_nsec));
- bfd_recalc_detection_time (bm, bs);
- bfd_recalc_tx_interval (bm, bs);
+ bfd_recalc_detection_time (bs);
+ bfd_recalc_tx_interval (bs);
}
}
static void
-bfd_set_remote_required_min_echo_rx (bfd_main_t * bm, bfd_session_t * bs,
- u64 now,
+bfd_set_remote_required_min_echo_rx (bfd_session_t *bs,
u32 remote_required_min_echo_rx_usec)
{
if (bs->remote_min_echo_rx_usec != remote_required_min_echo_rx_usec)
@@ -429,7 +437,7 @@ bfd_set_remote_required_min_echo_rx (bfd_main_t * bm, bfd_session_t * bs,
bfd_usec_to_nsec (bs->remote_min_echo_rx_usec);
BFD_DBG ("Set remote min echo rx to " BFD_CLK_FMT,
BFD_CLK_PRN (bs->remote_min_echo_rx_nsec));
- bfd_recalc_echo_tx_interval (bm, bs);
+ bfd_recalc_echo_tx_interval (bs);
}
}
@@ -450,14 +458,21 @@ bfd_session_start (bfd_main_t * bm, bfd_session_t * bs)
BFD_DBG ("\nStarting session: %U", format_bfd_session, bs);
vlib_log_info (bm->log_class, "start BFD session: %U",
format_bfd_session_brief, bs);
- bfd_set_effective_required_min_rx (bm, bs, bs->config_required_min_rx_nsec);
- bfd_recalc_tx_interval (bm, bs);
+ bfd_set_effective_required_min_rx (bs, bs->config_required_min_rx_nsec);
+ bfd_recalc_tx_interval (bs);
vlib_process_signal_event (bm->vlib_main, bm->bfd_process_node_index,
BFD_EVENT_NEW_SESSION, bs->bs_idx);
bfd_notify_listeners (bm, BFD_LISTEN_EVENT_CREATE, bs);
}
void
+bfd_session_stop (bfd_main_t *bm, bfd_session_t *bs)
+{
+ BFD_DBG ("\nStopping session: %U", format_bfd_session, bs);
+ bfd_notify_listeners (bm, BFD_LISTEN_EVENT_DELETE, bs);
+}
+
+void
bfd_session_set_flags (vlib_main_t * vm, bfd_session_t * bs, u8 admin_up_down)
{
bfd_main_t *bm = &bfd_main;
@@ -485,30 +500,29 @@ bfd_session_set_flags (vlib_main_t * vm, bfd_session_t * bs, u8 admin_up_down)
}
u8 *
-bfd_input_format_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- const bfd_input_trace_t *t = va_arg (*args, bfd_input_trace_t *);
- const bfd_pkt_t *pkt = (bfd_pkt_t *) t->data;
- if (t->len > STRUCT_SIZE_OF (bfd_pkt_t, head))
- {
- s = format (s, "BFD v%u, diag=%u(%s), state=%u(%s),\n"
- " flags=(P:%u, F:%u, C:%u, A:%u, D:%u, M:%u), "
- "detect_mult=%u, length=%u\n",
- bfd_pkt_get_version (pkt), bfd_pkt_get_diag_code (pkt),
- bfd_diag_code_string (bfd_pkt_get_diag_code (pkt)),
- bfd_pkt_get_state (pkt),
- bfd_state_string (bfd_pkt_get_state (pkt)),
- bfd_pkt_get_poll (pkt), bfd_pkt_get_final (pkt),
- bfd_pkt_get_control_plane_independent (pkt),
- bfd_pkt_get_auth_present (pkt), bfd_pkt_get_demand (pkt),
- bfd_pkt_get_multipoint (pkt), pkt->head.detect_mult,
- pkt->head.length);
- if (t->len >= sizeof (bfd_pkt_t) &&
- pkt->head.length >= sizeof (bfd_pkt_t))
+format_bfd_pkt (u8 *s, va_list *args)
+{
+ u32 len = va_arg (*args, u32);
+ u8 *data = va_arg (*args, u8 *);
+
+ const bfd_pkt_t *pkt = (bfd_pkt_t *) data;
+ if (len > STRUCT_SIZE_OF (bfd_pkt_t, head))
+ {
+ s = format (
+ s,
+ "BFD v%u, diag=%u(%s), state=%u(%s),\n"
+ " flags=(P:%u, F:%u, C:%u, A:%u, D:%u, M:%u), "
+ "detect_mult=%u, length=%u",
+ bfd_pkt_get_version (pkt), bfd_pkt_get_diag_code (pkt),
+ bfd_diag_code_string (bfd_pkt_get_diag_code (pkt)),
+ bfd_pkt_get_state (pkt), bfd_state_string (bfd_pkt_get_state (pkt)),
+ bfd_pkt_get_poll (pkt), bfd_pkt_get_final (pkt),
+ bfd_pkt_get_control_plane_independent (pkt),
+ bfd_pkt_get_auth_present (pkt), bfd_pkt_get_demand (pkt),
+ bfd_pkt_get_multipoint (pkt), pkt->head.detect_mult, pkt->head.length);
+ if (len >= sizeof (bfd_pkt_t) && pkt->head.length >= sizeof (bfd_pkt_t))
{
- s = format (s, " my discriminator: %u\n",
+ s = format (s, "\n my discriminator: %u\n",
clib_net_to_host_u32 (pkt->my_disc));
s = format (s, " your discriminator: %u\n",
clib_net_to_host_u32 (pkt->your_disc));
@@ -519,16 +533,16 @@ bfd_input_format_trace (u8 * s, va_list * args)
s = format (s, " required min echo rx interval: %u",
clib_net_to_host_u32 (pkt->req_min_echo_rx));
}
- if (t->len >= sizeof (bfd_pkt_with_common_auth_t) &&
+ if (len >= sizeof (bfd_pkt_with_common_auth_t) &&
pkt->head.length >= sizeof (bfd_pkt_with_common_auth_t) &&
bfd_pkt_get_auth_present (pkt))
{
const bfd_pkt_with_common_auth_t *with_auth = (void *) pkt;
const bfd_auth_common_t *common = &with_auth->common_auth;
s = format (s, "\n auth len: %u\n", common->len);
- s = format (s, " auth type: %u:%s\n", common->type,
+ s = format (s, " auth type: %u:%s", common->type,
bfd_auth_type_str (common->type));
- if (t->len >= sizeof (bfd_pkt_with_sha1_auth_t) &&
+ if (len >= sizeof (bfd_pkt_with_sha1_auth_t) &&
pkt->head.length >= sizeof (bfd_pkt_with_sha1_auth_t) &&
(BFD_AUTH_TYPE_keyed_sha1 == common->type ||
BFD_AUTH_TYPE_meticulous_keyed_sha1 == common->type))
@@ -542,15 +556,23 @@ bfd_input_format_trace (u8 * s, va_list * args)
sizeof (sha1->hash));
}
}
- else
- {
- s = format (s, "\n");
- }
}
return s;
}
+u8 *
+bfd_input_format_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ const bfd_input_trace_t *t = va_arg (*args, bfd_input_trace_t *);
+
+ s = format (s, "%U", format_bfd_pkt, t->len, t->data);
+
+ return s;
+}
+
typedef struct
{
u32 bs_idx;
@@ -651,8 +673,7 @@ bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now,
clib_max
(bs->config_desired_min_tx_nsec,
bm->default_desired_min_tx_nsec));
- bfd_set_effective_required_min_rx (bm, bs,
- bs->config_required_min_rx_nsec);
+ bfd_set_effective_required_min_rx (bs, bs->config_required_min_rx_nsec);
bfd_set_timer (bm, bs, now, handling_wakeup);
break;
case BFD_STATE_down:
@@ -661,8 +682,7 @@ bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now,
clib_max
(bs->config_desired_min_tx_nsec,
bm->default_desired_min_tx_nsec));
- bfd_set_effective_required_min_rx (bm, bs,
- bs->config_required_min_rx_nsec);
+ bfd_set_effective_required_min_rx (bs, bs->config_required_min_rx_nsec);
bfd_set_timer (bm, bs, now, handling_wakeup);
break;
case BFD_STATE_init:
@@ -676,7 +696,7 @@ bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now,
bs->config_desired_min_tx_nsec);
if (BFD_POLL_NOT_NEEDED == bs->poll_state)
{
- bfd_set_effective_required_min_rx (bm, bs,
+ bfd_set_effective_required_min_rx (bs,
bs->config_required_min_rx_nsec);
}
bfd_set_timer (bm, bs, now, handling_wakeup);
@@ -694,8 +714,7 @@ bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now,
}
static void
-bfd_on_config_change (vlib_main_t * vm, vlib_node_runtime_t * rt,
- bfd_main_t * bm, bfd_session_t * bs, u64 now)
+bfd_on_config_change (bfd_main_t *bm, bfd_session_t *bs, u64 now)
{
/*
* if remote demand mode is set and we need to do a poll, set the next
@@ -706,7 +725,7 @@ bfd_on_config_change (vlib_main_t * vm, vlib_node_runtime_t * rt,
{
bs->tx_timeout_nsec = now;
}
- bfd_recalc_detection_time (bm, bs);
+ bfd_recalc_detection_time (bs);
bfd_set_timer (bm, bs, now, 0);
}
@@ -727,17 +746,18 @@ bfd_add_transport_layer (vlib_main_t * vm, u32 bi, bfd_session_t * bs)
}
static int
-bfd_transport_control_frame (vlib_main_t * vm, u32 bi, bfd_session_t * bs)
+bfd_transport_control_frame (vlib_main_t *vm, vlib_node_runtime_t *rt, u32 bi,
+ bfd_session_t *bs)
{
switch (bs->transport)
{
case BFD_TRANSPORT_UDP4:
BFD_DBG ("Transport bfd via udp4, bs_idx=%u", bs->bs_idx);
- return bfd_transport_udp4 (vm, bi, bs);
+ return bfd_transport_udp4 (vm, rt, bi, bs, 0 /* is_echo */);
break;
case BFD_TRANSPORT_UDP6:
BFD_DBG ("Transport bfd via udp6, bs_idx=%u", bs->bs_idx);
- return bfd_transport_udp6 (vm, bi, bs);
+ return bfd_transport_udp6 (vm, rt, bi, bs, 0 /* is_echo */);
break;
}
return 0;
@@ -761,17 +781,18 @@ bfd_echo_add_transport_layer (vlib_main_t * vm, u32 bi, bfd_session_t * bs)
}
static int
-bfd_transport_echo (vlib_main_t * vm, u32 bi, bfd_session_t * bs)
+bfd_transport_echo (vlib_main_t *vm, vlib_node_runtime_t *rt, u32 bi,
+ bfd_session_t *bs)
{
switch (bs->transport)
{
case BFD_TRANSPORT_UDP4:
BFD_DBG ("Transport bfd echo via udp4, bs_idx=%u", bs->bs_idx);
- return bfd_transport_udp4 (vm, bi, bs);
+ return bfd_transport_udp4 (vm, rt, bi, bs, 1 /* is_echo */);
break;
case BFD_TRANSPORT_UDP6:
BFD_DBG ("Transport bfd echo via udp6, bs_idx=%u", bs->bs_idx);
- return bfd_transport_udp6 (vm, bi, bs);
+ return bfd_transport_udp6 (vm, rt, bi, bs, 1 /* is_echo */);
break;
}
return 0;
@@ -861,8 +882,7 @@ bfd_is_echo_possible (bfd_session_t * bs)
}
static void
-bfd_init_control_frame (bfd_main_t * bm, bfd_session_t * bs,
- vlib_buffer_t * b)
+bfd_init_control_frame (bfd_session_t *bs, vlib_buffer_t *b)
{
bfd_pkt_t *pkt = vlib_buffer_get_current (b);
u32 bfd_length = 0;
@@ -891,9 +911,39 @@ bfd_init_control_frame (bfd_main_t * bm, bfd_session_t * bs,
b->current_length = bfd_length;
}
+typedef struct
+{
+ u32 bs_idx;
+ u32 len;
+ u8 data[400];
+} bfd_process_trace_t;
+
static void
-bfd_send_echo (vlib_main_t * vm, vlib_node_runtime_t * rt,
- bfd_main_t * bm, bfd_session_t * bs, u64 now)
+bfd_process_trace_buf (vlib_main_t *vm, vlib_node_runtime_t *rt,
+ vlib_buffer_t *b, bfd_session_t *bs)
+{
+ u32 n_trace = vlib_get_trace_count (vm, rt);
+ if (n_trace > 0)
+ {
+ bfd_process_trace_t *tr;
+ if (vlib_trace_buffer (vm, rt, 0, b, 0))
+ {
+ tr = vlib_add_trace (vm, rt, b, sizeof (*tr));
+ tr->bs_idx = bs->bs_idx;
+ u64 len = (b->current_length < sizeof (tr->data)) ?
+ b->current_length :
+ sizeof (tr->data);
+ tr->len = len;
+ clib_memcpy_fast (tr->data, vlib_buffer_get_current (b), len);
+ --n_trace;
+ vlib_set_trace_count (vm, rt, n_trace);
+ }
+ }
+}
+
+static void
+bfd_send_echo (vlib_main_t *vm, vlib_node_runtime_t *rt, bfd_main_t *bm,
+ bfd_session_t *bs, u64 now)
{
if (!bfd_is_echo_possible (bs))
{
@@ -921,6 +971,7 @@ bfd_send_echo (vlib_main_t * vm, vlib_node_runtime_t * rt,
bfd_calc_echo_checksum (bs->local_discr, pkt->expire_time_nsec,
bs->echo_secret);
b->current_length = sizeof (*pkt);
+ bfd_process_trace_buf (vm, rt, b, bs);
if (!bfd_echo_add_transport_layer (vm, bi, bs))
{
BFD_ERR ("cannot send echo packet out, turning echo off");
@@ -928,7 +979,7 @@ bfd_send_echo (vlib_main_t * vm, vlib_node_runtime_t * rt,
vlib_buffer_free_one (vm, bi);
return;
}
- if (!bfd_transport_echo (vm, bi, bs))
+ if (!bfd_transport_echo (vm, rt, bi, bs))
{
BFD_ERR ("cannot send echo packet out, turning echo off");
bs->echo = 0;
@@ -936,7 +987,7 @@ bfd_send_echo (vlib_main_t * vm, vlib_node_runtime_t * rt,
return;
}
bs->echo_last_tx_nsec = now;
- bfd_calc_next_echo_tx (bm, bs, now);
+ bfd_calc_next_echo_tx (bs, now);
}
else
{
@@ -947,8 +998,8 @@ bfd_send_echo (vlib_main_t * vm, vlib_node_runtime_t * rt,
}
static void
-bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt,
- bfd_main_t * bm, bfd_session_t * bs, u64 now)
+bfd_send_periodic (vlib_main_t *vm, vlib_node_runtime_t *rt, bfd_main_t *bm,
+ bfd_session_t *bs, u64 now)
{
if (!bs->remote_min_rx_usec && BFD_POLL_NOT_NEEDED == bs->poll_state)
{
@@ -980,7 +1031,7 @@ bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt,
}
vlib_buffer_t *b = vlib_get_buffer (vm, bi);
ASSERT (b->current_data == 0);
- bfd_init_control_frame (bm, bs, b);
+ bfd_init_control_frame (bs, b);
switch (bs->poll_state)
{
case BFD_POLL_NEEDED:
@@ -1005,8 +1056,9 @@ bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt,
break;
}
bfd_add_auth_section (vm, b, bs);
+ bfd_process_trace_buf (vm, rt, b, bs);
bfd_add_transport_layer (vm, bi, bs);
- if (!bfd_transport_control_frame (vm, bi, bs))
+ if (!bfd_transport_control_frame (vm, rt, bi, bs))
{
vlib_buffer_free_one (vm, bi);
}
@@ -1022,12 +1074,11 @@ bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt,
}
void
-bfd_init_final_control_frame (vlib_main_t * vm, vlib_buffer_t * b,
- bfd_main_t * bm, bfd_session_t * bs,
- int is_local)
+bfd_init_final_control_frame (vlib_main_t *vm, vlib_buffer_t *b,
+ bfd_session_t *bs)
{
BFD_DBG ("Send final control frame for bs_idx=%lu", bs->bs_idx);
- bfd_init_control_frame (bm, bs, b);
+ bfd_init_control_frame (bs, b);
bfd_pkt_set_final (vlib_buffer_get_current (b));
bfd_add_auth_section (vm, b, bs);
u32 bi = vlib_get_buffer_index (vm, b);
@@ -1069,7 +1120,7 @@ bfd_check_rx_timeout (vlib_main_t * vm, bfd_main_t * bm, bfd_session_t * bs,
* since it is no longer required to maintain previous session state)
* and then can transmit at its own rate.
*/
- bfd_set_remote_required_min_rx (bm, bs, now, 1);
+ bfd_set_remote_required_min_rx (bs, 1);
}
else if (bs->echo
&& bs->echo_last_rx_nsec +
@@ -1082,15 +1133,14 @@ bfd_check_rx_timeout (vlib_main_t * vm, bfd_main_t * bm, bfd_session_t * bs,
}
void
-bfd_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * rt, bfd_main_t * bm,
- bfd_session_t * bs, u64 now)
+bfd_on_timeout (vlib_main_t *vm, vlib_node_runtime_t *rt, bfd_main_t *bm,
+ bfd_session_t *bs, u64 now)
{
BFD_DBG ("Timeout for bs_idx=%lu", bs->bs_idx);
switch (bs->local_state)
{
case BFD_STATE_admin_down:
- bfd_send_periodic (vm, rt, bm, bs, now);
- break;
+ /* fallthrough */
case BFD_STATE_down:
bfd_send_periodic (vm, rt, bm, bs, now);
break;
@@ -1108,10 +1158,9 @@ bfd_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * rt, bfd_main_t * bm,
bs->echo = 1;
bs->echo_last_rx_nsec = now;
bs->echo_tx_timeout_nsec = now;
- bfd_set_effective_required_min_rx (bm, bs,
- clib_max
- (bm->min_required_min_rx_while_echo_nsec,
- bs->config_required_min_rx_nsec));
+ bfd_set_effective_required_min_rx (
+ bs, clib_max (bm->min_required_min_rx_while_echo_nsec,
+ bs->config_required_min_rx_nsec));
bfd_set_poll_state (bs, BFD_POLL_NEEDED);
}
bfd_send_periodic (vm, rt, bm, bs, now);
@@ -1123,11 +1172,25 @@ bfd_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * rt, bfd_main_t * bm,
}
}
+u8 *
+format_bfd_process_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ bfd_process_trace_t *t = va_arg (*args, bfd_process_trace_t *);
+
+ s =
+ format (s, "bs_idx=%u => %U", t->bs_idx, format_bfd_pkt, t->len, t->data);
+
+ return s;
+}
+
/*
* bfd process node function
*/
static uword
-bfd_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
+bfd_process (vlib_main_t *vm, vlib_node_runtime_t *rt,
+ CLIB_UNUSED (vlib_frame_t *f))
{
bfd_main_t *bm = &bfd_main;
u32 *expired = 0;
@@ -1168,7 +1231,9 @@ bfd_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
now + first_expires_in_ticks * bm->nsec_per_tw_tick;
bm->bfd_process_next_wakeup_nsec = next_expire_nsec;
bfd_unlock (bm);
- timeout = (next_expire_nsec - now) * SEC_PER_NSEC;
+ ASSERT (next_expire_nsec - now <= UINT32_MAX);
+ // cast to u32 to avoid warning
+ timeout = (u32) (next_expire_nsec - now) * SEC_PER_NSEC;
}
BFD_DBG ("vlib_process_wait_for_event_or_clock(vm, %.09f)",
timeout);
@@ -1224,7 +1289,7 @@ bfd_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
{
bfd_session_t *bs =
pool_elt_at_index (bm->sessions, *session_index);
- bfd_on_config_change (vm, rt, bm, bs, now);
+ bfd_on_config_change (bm, bs, now);
}
else
{
@@ -1258,11 +1323,11 @@ bfd_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
bfd_unlock (bm);
if (expired)
{
- _vec_len (expired) = 0;
+ vec_set_len (expired, 0);
}
if (event_data)
{
- _vec_len (event_data) = 0;
+ vec_set_len (event_data, 0);
}
}
@@ -1272,18 +1337,29 @@ bfd_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
/*
* bfd process node declaration
*/
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (bfd_process_node, static) = {
+// clang-format off
+VLIB_REGISTER_NODE (bfd_process_node, static) =
+{
.function = bfd_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "bfd-process",
- .n_next_nodes = 0,
- .next_nodes = {},
+ .flags = (VLIB_NODE_FLAG_TRACE_SUPPORTED),
+ .format_trace = format_bfd_process_trace,
+ .n_next_nodes = BFD_TX_N_NEXT,
+ .next_nodes = {
+ [BFD_TX_IP4_ARP] = "ip4-arp",
+ [BFD_TX_IP6_NDP] = "ip6-discover-neighbor",
+ [BFD_TX_IP4_REWRITE] = "ip4-rewrite",
+ [BFD_TX_IP6_REWRITE] = "ip6-rewrite",
+ [BFD_TX_IP4_MIDCHAIN] = "ip4-midchain",
+ [BFD_TX_IP6_MIDCHAIN] = "ip6-midchain",
+ }
};
-/* *INDENT-ON* */
+// clang-format on
static clib_error_t *
-bfd_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
+bfd_sw_interface_up_down (CLIB_UNUSED (vnet_main_t *vnm),
+ CLIB_UNUSED (u32 sw_if_index), u32 flags)
{
// bfd_main_t *bm = &bfd_main;
// vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
@@ -1297,7 +1373,8 @@ bfd_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (bfd_sw_interface_up_down);
static clib_error_t *
-bfd_hw_interface_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+bfd_hw_interface_up_down (CLIB_UNUSED (vnet_main_t *vnm),
+ CLIB_UNUSED (u32 hw_if_index), u32 flags)
{
// bfd_main_t *bm = &bfd_main;
if (flags & VNET_HW_INTERFACE_FLAG_LINK_UP)
@@ -1346,6 +1423,14 @@ bfd_main_init (vlib_main_t * vm)
bm->owner_thread_index = ~0;
if (n_vlib_mains > 1)
clib_spinlock_init (&bm->lock);
+ bm->rx_counter.name = "bfd rx session counters";
+ bm->rx_counter.stat_segment_name = "/bfd/rx-session-counters";
+ bm->rx_echo_counter.name = "bfd rx session echo counters";
+ bm->rx_echo_counter.stat_segment_name = "/bfd/rx-session-echo-counters";
+ bm->tx_counter.name = "bfd tx session counters";
+ bm->tx_counter.stat_segment_name = "/bfd/tx-session-counters";
+ bm->tx_echo_counter.name = "bfd tx session echo counters";
+ bm->tx_echo_counter.stat_segment_name = "/bfd/tx-session-echo-counters";
return 0;
}
@@ -1381,6 +1466,11 @@ bfd_get_session (bfd_main_t * bm, bfd_transport_e t)
while (hash_get (bm->session_by_disc, result->local_discr));
bfd_set_defaults (bm, result);
hash_set (bm->session_by_disc, result->local_discr, result->bs_idx);
+ bfd_validate_counters (bm);
+ vlib_zero_combined_counter (&bm->rx_counter, result->bs_idx);
+ vlib_zero_combined_counter (&bm->rx_echo_counter, result->bs_idx);
+ vlib_zero_combined_counter (&bm->tx_counter, result->bs_idx);
+ vlib_zero_combined_counter (&bm->tx_echo_counter, result->bs_idx);
bfd_unlock (bm);
return result;
}
@@ -1392,7 +1482,6 @@ bfd_put_session (bfd_main_t * bm, bfd_session_t * bs)
vlib_log_info (bm->log_class, "delete session: %U",
format_bfd_session_brief, bs);
- bfd_notify_listeners (bm, BFD_LISTEN_EVENT_DELETE, bs);
if (bs->auth.curr_key)
{
--bs->auth.curr_key->use_count;
@@ -1402,6 +1491,10 @@ bfd_put_session (bfd_main_t * bm, bfd_session_t * bs)
--bs->auth.next_key->use_count;
}
hash_unset (bm->session_by_disc, bs->local_discr);
+ vlib_zero_combined_counter (&bm->rx_counter, bs->bs_idx);
+ vlib_zero_combined_counter (&bm->rx_echo_counter, bs->bs_idx);
+ vlib_zero_combined_counter (&bm->tx_counter, bs->bs_idx);
+ vlib_zero_combined_counter (&bm->tx_echo_counter, bs->bs_idx);
pool_put (bm->sessions, bs);
bfd_unlock (bm);
}
@@ -1436,14 +1529,14 @@ bfd_find_session_by_disc (bfd_main_t * bm, u32 disc)
*
* @return 1 if bfd packet is valid
*/
-int
-bfd_verify_pkt_common (const bfd_pkt_t * pkt)
+bfd_error_t
+bfd_verify_pkt_common (const bfd_pkt_t *pkt)
{
if (1 != bfd_pkt_get_version (pkt))
{
BFD_ERR ("BFD verification failed - unexpected version: '%d'",
bfd_pkt_get_version (pkt));
- return 0;
+ return BFD_ERROR_VERSION;
}
if (pkt->head.length < sizeof (bfd_pkt_t) ||
(bfd_pkt_get_auth_present (pkt) &&
@@ -1452,25 +1545,25 @@ bfd_verify_pkt_common (const bfd_pkt_t * pkt)
BFD_ERR ("BFD verification failed - unexpected length: '%d' (auth "
"present: %d)",
pkt->head.length, bfd_pkt_get_auth_present (pkt));
- return 0;
+ return BFD_ERROR_LENGTH;
}
if (!pkt->head.detect_mult)
{
BFD_ERR ("BFD verification failed - unexpected detect-mult: '%d'",
pkt->head.detect_mult);
- return 0;
+ return BFD_ERROR_DETECT_MULTI;
}
if (bfd_pkt_get_multipoint (pkt))
{
BFD_ERR ("BFD verification failed - unexpected multipoint: '%d'",
bfd_pkt_get_multipoint (pkt));
- return 0;
+ return BFD_ERROR_MULTI_POINT;
}
if (!pkt->my_disc)
{
BFD_ERR ("BFD verification failed - unexpected my-disc: '%d'",
pkt->my_disc);
- return 0;
+ return BFD_ERROR_MY_DISC;
}
if (!pkt->your_disc)
{
@@ -1479,10 +1572,10 @@ bfd_verify_pkt_common (const bfd_pkt_t * pkt)
{
BFD_ERR ("BFD verification failed - unexpected state: '%s' "
"(your-disc is zero)", bfd_state_string (pkt_state));
- return 0;
+ return BFD_ERROR_YOUR_DISC;
}
}
- return 1;
+ return BFD_ERROR_NONE;
}
static void
@@ -1581,8 +1674,8 @@ bfd_verify_pkt_auth_seq_num (vlib_main_t * vm, bfd_session_t * bs,
static int
bfd_verify_pkt_auth_key_sha1 (vlib_main_t *vm, const bfd_pkt_t *pkt,
- u32 pkt_size, bfd_session_t *bs, u8 bfd_key_id,
- bfd_auth_key_t *auth_key)
+ u32 pkt_size, CLIB_UNUSED (bfd_session_t *bs),
+ u8 bfd_key_id, bfd_auth_key_t *auth_key)
{
ASSERT (auth_key->auth_type == BFD_AUTH_TYPE_keyed_sha1 ||
auth_key->auth_type == BFD_AUTH_TYPE_meticulous_keyed_sha1);
@@ -1634,6 +1727,11 @@ bfd_verify_pkt_auth_key_sha1 (vlib_main_t *vm, const bfd_pkt_t *pkt,
op.len = sizeof (*with_sha1);
op.digest = calculated_hash;
vnet_crypto_process_ops (vm, &op, 1);
+
+ /* Restore the modified data within the packet */
+ clib_memcpy (with_sha1->sha1_auth.hash, hash_from_packet,
+ sizeof (with_sha1->sha1_auth.hash));
+
if (0 ==
memcmp (calculated_hash, hash_from_packet, sizeof (calculated_hash)))
{
@@ -1662,18 +1760,14 @@ bfd_verify_pkt_auth_key (vlib_main_t * vm, const bfd_pkt_t * pkt,
bfd_auth_type_str (auth_key->auth_type));
return 0;
case BFD_AUTH_TYPE_simple_password:
- vlib_log_err (bm->log_class,
- "internal error, not implemented, unexpected auth_type=%d:%s",
- auth_key->auth_type,
- bfd_auth_type_str (auth_key->auth_type));
- return 0;
+ /* fallthrough */
case BFD_AUTH_TYPE_keyed_md5:
/* fallthrough */
case BFD_AUTH_TYPE_meticulous_keyed_md5:
- vlib_log_err
- (bm->log_class,
- "internal error, not implemented, unexpected auth_type=%d:%s",
- auth_key->auth_type, bfd_auth_type_str (auth_key->auth_type));
+ vlib_log_err (
+ bm->log_class,
+ "internal error, not implemented, unexpected auth_type=%d:%s",
+ auth_key->auth_type, bfd_auth_type_str (auth_key->auth_type));
return 0;
case BFD_AUTH_TYPE_keyed_sha1:
/* fallthrough */
@@ -1780,8 +1874,8 @@ bfd_verify_pkt_auth (vlib_main_t * vm, const bfd_pkt_t * pkt, u16 pkt_size,
return 0;
}
-void
-bfd_consume_pkt (vlib_main_t * vm, bfd_main_t * bm, const bfd_pkt_t * pkt,
+bfd_error_t
+bfd_consume_pkt (vlib_main_t *vm, bfd_main_t *bm, const bfd_pkt_t *pkt,
u32 bs_idx)
{
bfd_lock_check (bm);
@@ -1789,7 +1883,7 @@ bfd_consume_pkt (vlib_main_t * vm, bfd_main_t * bm, const bfd_pkt_t * pkt,
bfd_session_t *bs = bfd_find_session_by_idx (bm, bs_idx);
if (!bs || (pkt->your_disc && pkt->your_disc != bs->local_discr))
{
- return;
+ return BFD_ERROR_YOUR_DISC;
}
BFD_DBG ("Scanning bfd packet, bs_idx=%d", bs->bs_idx);
bs->remote_discr = pkt->my_disc;
@@ -1834,11 +1928,9 @@ bfd_consume_pkt (vlib_main_t * vm, bfd_main_t * bm, const bfd_pkt_t * pkt,
bs->remote_desired_min_tx_nsec =
bfd_usec_to_nsec (clib_net_to_host_u32 (pkt->des_min_tx));
bs->remote_detect_mult = pkt->head.detect_mult;
- bfd_set_remote_required_min_rx (bm, bs, now,
- clib_net_to_host_u32 (pkt->req_min_rx));
- bfd_set_remote_required_min_echo_rx (bm, bs, now,
- clib_net_to_host_u32
- (pkt->req_min_echo_rx));
+ bfd_set_remote_required_min_rx (bs, clib_net_to_host_u32 (pkt->req_min_rx));
+ bfd_set_remote_required_min_echo_rx (
+ bs, clib_net_to_host_u32 (pkt->req_min_echo_rx));
if (bfd_pkt_get_final (pkt))
{
if (BFD_POLL_IN_PROGRESS == bs->poll_state)
@@ -1849,10 +1941,10 @@ bfd_consume_pkt (vlib_main_t * vm, bfd_main_t * bm, const bfd_pkt_t * pkt,
{
bfd_set_effective_desired_min_tx (
bm, bs, now, bs->config_desired_min_tx_nsec);
- bfd_set_effective_required_min_rx (bm, bs,
- clib_max (bs->echo *
- bm->min_required_min_rx_while_echo_nsec,
- bs->config_required_min_rx_nsec));
+ bfd_set_effective_required_min_rx (
+ bs,
+ clib_max (bs->echo * bm->min_required_min_rx_while_echo_nsec,
+ bs->config_required_min_rx_nsec));
}
}
else if (BFD_POLL_IN_PROGRESS_AND_QUEUED == bs->poll_state)
@@ -1877,7 +1969,7 @@ bfd_consume_pkt (vlib_main_t * vm, bfd_main_t * bm, const bfd_pkt_t * pkt,
{
BFD_DBG ("Session is admin-down, ignoring packet, bs_idx=%u",
bs->bs_idx);
- return;
+ return BFD_ERROR_ADMIN_DOWN;
}
if (BFD_STATE_admin_down == bs->remote_state)
{
@@ -1914,10 +2006,11 @@ bfd_consume_pkt (vlib_main_t * vm, bfd_main_t * bm, const bfd_pkt_t * pkt,
bfd_set_state (vm, bm, bs, BFD_STATE_down, 0);
}
}
+ return BFD_ERROR_NONE;
}
-int
-bfd_consume_echo_pkt (vlib_main_t * vm, bfd_main_t * bm, vlib_buffer_t * b)
+bfd_session_t *
+bfd_consume_echo_pkt (vlib_main_t *vm, bfd_main_t *bm, vlib_buffer_t *b)
{
bfd_echo_pkt_t *pkt = NULL;
if (b->current_length != sizeof (*pkt))
@@ -1937,7 +2030,7 @@ bfd_consume_echo_pkt (vlib_main_t * vm, bfd_main_t * bm, vlib_buffer_t * b)
if (checksum != pkt->checksum)
{
BFD_DBG ("Invalid echo packet, checksum mismatch");
- return 1;
+ return 0;
}
u64 now = bfd_time_now_nsec (vm, NULL);
if (pkt->expire_time_nsec < now)
@@ -1949,7 +2042,7 @@ bfd_consume_echo_pkt (vlib_main_t * vm, bfd_main_t * bm, vlib_buffer_t * b)
{
bs->echo_last_rx_nsec = now;
}
- return 1;
+ return bs;
}
u8 *
diff --git a/src/vnet/bfd/bfd_main.h b/src/vnet/bfd/bfd_main.h
index 0bdcfb87622..1d4617e1d7c 100644
--- a/src/vnet/bfd/bfd_main.h
+++ b/src/vnet/bfd/bfd_main.h
@@ -258,7 +258,7 @@ typedef enum
} bfd_listen_event_e;
/**
- * session nitification call back function type
+ * session notification call back function type
*/
typedef void (*bfd_notify_fn_t) (bfd_listen_event_e, const bfd_session_t *);
@@ -322,15 +322,27 @@ typedef struct
vlib_log_class_t log_class;
u16 msg_id_base;
+
+ vlib_combined_counter_main_t rx_counter;
+ vlib_combined_counter_main_t rx_echo_counter;
+ vlib_combined_counter_main_t tx_counter;
+ vlib_combined_counter_main_t tx_echo_counter;
} bfd_main_t;
extern bfd_main_t bfd_main;
/** Packet counters */
-#define foreach_bfd_error(F) \
- F (NONE, "good bfd packets (processed)") \
- F (BAD, "invalid bfd packets") \
- F (DISABLED, "bfd packets received on disabled interfaces")
+#define foreach_bfd_error(F) \
+ F (NONE, "good bfd packets (processed)") \
+ F (BAD, "invalid bfd packets") \
+ F (DISABLED, "bfd packets received on disabled interfaces") \
+ F (VERSION, "version") \
+ F (LENGTH, "length") \
+ F (DETECT_MULTI, "detect-multi") \
+ F (MULTI_POINT, "multi-point") \
+ F (MY_DISC, "my-disc") \
+ F (YOUR_DISC, "your-disc") \
+ F (ADMIN_DOWN, "session admin-down")
typedef enum
{
@@ -354,7 +366,6 @@ typedef enum
BFD_EVENT_CONFIG_CHANGED,
} bfd_process_event_e;
-/* *INDENT-OFF* */
/** echo packet structure */
typedef CLIB_PACKED (struct {
/** local discriminator */
@@ -364,7 +375,6 @@ typedef CLIB_PACKED (struct {
/** checksum - based on discriminator, local secret and expire time */
u64 checksum;
}) bfd_echo_pkt_t;
-/* *INDENT-ON* */
static inline void
bfd_lock (bfd_main_t * bm)
@@ -412,17 +422,17 @@ void bfd_put_session (bfd_main_t * bm, bfd_session_t * bs);
bfd_session_t *bfd_find_session_by_idx (bfd_main_t * bm, uword bs_idx);
bfd_session_t *bfd_find_session_by_disc (bfd_main_t * bm, u32 disc);
void bfd_session_start (bfd_main_t * bm, bfd_session_t * bs);
-void bfd_consume_pkt (vlib_main_t * vm, bfd_main_t * bm,
- const bfd_pkt_t * bfd, u32 bs_idx);
-int bfd_consume_echo_pkt (vlib_main_t * vm, bfd_main_t * bm,
- vlib_buffer_t * b);
-int bfd_verify_pkt_common (const bfd_pkt_t * pkt);
+void bfd_session_stop (bfd_main_t *bm, bfd_session_t *bs);
+bfd_error_t bfd_consume_pkt (vlib_main_t *vm, bfd_main_t *bm,
+ const bfd_pkt_t *bfd, u32 bs_idx);
+bfd_session_t *bfd_consume_echo_pkt (vlib_main_t *vm, bfd_main_t *bm,
+ vlib_buffer_t *b);
+bfd_error_t bfd_verify_pkt_common (const bfd_pkt_t *pkt);
int bfd_verify_pkt_auth (vlib_main_t * vm, const bfd_pkt_t * pkt,
u16 pkt_size, bfd_session_t * bs);
void bfd_event (bfd_main_t * bm, bfd_session_t * bs);
-void bfd_init_final_control_frame (vlib_main_t * vm, vlib_buffer_t * b,
- bfd_main_t * bm, bfd_session_t * bs,
- int is_local);
+void bfd_init_final_control_frame (vlib_main_t *vm, vlib_buffer_t *b,
+ bfd_session_t *bs);
u8 *format_bfd_session (u8 * s, va_list * args);
u8 *format_bfd_session_brief (u8 * s, va_list * args);
u8 *format_bfd_auth_key (u8 * s, va_list * args);
@@ -464,6 +474,17 @@ const char *bfd_poll_state_string (bfd_poll_state_e state);
*/
void bfd_register_listener (bfd_notify_fn_t fn);
+typedef enum
+{
+ BFD_TX_IP4_ARP,
+ BFD_TX_IP6_NDP,
+ BFD_TX_IP4_REWRITE,
+ BFD_TX_IP6_REWRITE,
+ BFD_TX_IP4_MIDCHAIN,
+ BFD_TX_IP6_MIDCHAIN,
+ BFD_TX_N_NEXT,
+} bfd_tx_next_t;
+
#endif /* __included_bfd_main_h__ */
/*
diff --git a/src/vnet/bfd/bfd_protocol.h b/src/vnet/bfd/bfd_protocol.h
index 210c561b430..16ee3231ef0 100644
--- a/src/vnet/bfd/bfd_protocol.h
+++ b/src/vnet/bfd/bfd_protocol.h
@@ -46,14 +46,11 @@ typedef enum
u32 bfd_max_key_len_for_auth_type (bfd_auth_type_e auth_type);
const char *bfd_auth_type_str (bfd_auth_type_e auth_type);
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 type;
u8 len;
}) bfd_auth_common_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
/*
* 4.4. Keyed SHA1 and Meticulous Keyed SHA1 Authentication Section Format
@@ -88,9 +85,7 @@ typedef CLIB_PACKED (struct {
*/
u8 hash[20];
}) bfd_auth_sha1_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
/*
* The Mandatory Section of a BFD Control packet has the following
@@ -125,21 +120,16 @@ typedef CLIB_PACKED (struct {
u32 req_min_rx;
u32 req_min_echo_rx;
}) bfd_pkt_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
bfd_pkt_t pkt;
bfd_auth_common_t common_auth;
}) bfd_pkt_with_common_auth_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
bfd_pkt_t pkt;
bfd_auth_sha1_t sha1_auth;
}) bfd_pkt_with_sha1_auth_t;
-/* *INDENT-ON* */
u8 bfd_pkt_get_version (const bfd_pkt_t * pkt);
void bfd_pkt_set_version (bfd_pkt_t * pkt, int version);
diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c
index 4ad5660fdf6..ec42cda1bc4 100644
--- a/src/vnet/bfd/bfd_udp.c
+++ b/src/vnet/bfd/bfd_udp.c
@@ -35,10 +35,20 @@
#include <vnet/dpo/receive_dpo.h>
#include <vnet/fib/fib_entry.h>
#include <vnet/fib/fib_table.h>
+#include <vlib/stats/stats.h>
#include <vnet/bfd/bfd_debug.h>
#include <vnet/bfd/bfd_udp.h>
#include <vnet/bfd/bfd_main.h>
#include <vnet/bfd/bfd_api.h>
+#include <vnet/bfd/bfd.api_enum.h>
+
+#define F(sym, str) \
+ STATIC_ASSERT ((int) BFD_ERROR_##sym == (int) BFD_UDP_ERROR_##sym, \
+ "BFD error enums mismatch");
+foreach_bfd_error (F)
+#undef F
+ STATIC_ASSERT ((int) BFD_N_ERROR <= (int) BFD_UDP_N_ERROR,
+ "BFD error enum sizes mismatch");
typedef struct
{
@@ -52,24 +62,14 @@ typedef struct
int echo_source_is_set;
/* loopback interface used to get echo source ip */
u32 echo_source_sw_if_index;
- /* node index of "ip4-arp" node */
- u32 ip4_arp_idx;
- /* node index of "ip6-discover-neighbor" node */
- u32 ip6_ndp_idx;
- /* node index of "ip4-rewrite" node */
- u32 ip4_rewrite_idx;
- /* node index of "ip6-rewrite" node */
- u32 ip6_rewrite_idx;
- /* node index of "ip4-midchain" node */
- u32 ip4_midchain_idx;
- /* node index of "ip6-midchain" node */
- u32 ip6_midchain_idx;
/* log class */
vlib_log_class_t log_class;
/* number of active udp4 sessions */
u32 udp4_sessions_count;
+ u32 udp4_sessions_count_stat_seg_entry;
/* number of active udp6 sessions */
u32 udp6_sessions_count;
+ u32 udp6_sessions_count_stat_seg_entry;
} bfd_udp_main_t;
static vlib_node_registration_t bfd_udp4_input_node;
@@ -79,6 +79,14 @@ static vlib_node_registration_t bfd_udp_echo6_input_node;
bfd_udp_main_t bfd_udp_main;
+void
+bfd_udp_update_stat_segment_entry (u32 entry, u64 value)
+{
+ vlib_stats_segment_lock ();
+ vlib_stats_set_gauge (entry, value);
+ vlib_stats_segment_unlock ();
+}
+
vnet_api_error_t
bfd_udp_set_echo_source (u32 sw_if_index)
{
@@ -94,7 +102,7 @@ bfd_udp_set_echo_source (u32 sw_if_index)
}
vnet_api_error_t
-bfd_udp_del_echo_source (u32 sw_if_index)
+bfd_udp_del_echo_source ()
{
bfd_udp_main.echo_source_sw_if_index = ~0;
bfd_udp_main.echo_source_is_set = 0;
@@ -123,7 +131,6 @@ bfd_udp_is_echo_available (bfd_transport_e transport)
{
ip4_main_t *im = &ip4_main;
ip_interface_address_t *ia = NULL;
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im->lookup_main, ia,
bfd_udp_main.echo_source_sw_if_index,
0 /* honor unnumbered */, ({
@@ -132,13 +139,11 @@ bfd_udp_is_echo_available (bfd_transport_e transport)
return 1;
}
}));
- /* *INDENT-ON* */
}
else if (BFD_TRANSPORT_UDP6 == transport)
{
ip6_main_t *im = &ip6_main;
ip_interface_address_t *ia = NULL;
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im->lookup_main, ia,
bfd_udp_main.echo_source_sw_if_index,
0 /* honor unnumbered */, ({
@@ -147,7 +152,6 @@ bfd_udp_is_echo_available (bfd_transport_e transport)
return 1;
}
}));
- /* *INDENT-ON* */
}
}
BFD_DBG ("No usable IP address for UDP echo - echo not available");
@@ -179,7 +183,6 @@ bfd_udp_get_echo_src_ip4 (ip4_address_t * addr)
ip_interface_address_t *ia = NULL;
ip4_main_t *im = &ip4_main;
- /* *INDENT-OFF* */
foreach_ip_interface_address (
&im->lookup_main, ia, bfd_udp_main.echo_source_sw_if_index,
0 /* honor unnumbered */, ({
@@ -197,7 +200,6 @@ bfd_udp_get_echo_src_ip4 (ip4_address_t * addr)
return 1;
}
}));
- /* *INDENT-ON* */
BFD_ERR ("cannot find ip4 address, no usable address found");
return 0;
}
@@ -213,7 +215,6 @@ bfd_udp_get_echo_src_ip6 (ip6_address_t * addr)
ip_interface_address_t *ia = NULL;
ip6_main_t *im = &ip6_main;
- /* *INDENT-OFF* */
foreach_ip_interface_address (
&im->lookup_main, ia, bfd_udp_main.echo_source_sw_if_index,
0 /* honor unnumbered */, ({
@@ -226,7 +227,6 @@ bfd_udp_get_echo_src_ip6 (ip6_address_t * addr)
return 1;
}
}));
- /* *INDENT-ON* */
BFD_ERR ("cannot find ip6 address, no usable address found");
return 0;
}
@@ -372,13 +372,25 @@ bfd_add_udp6_transport (vlib_main_t * vm, u32 bi, const bfd_session_t * bs,
}
static void
-bfd_create_frame_to_next_node (vlib_main_t * vm, u32 bi, u32 next_node)
+bfd_create_frame_to_next_node (vlib_main_t *vm, vlib_node_runtime_t *rt,
+ u32 bi, const bfd_session_t *bs, u32 next,
+ vlib_combined_counter_main_t *tx_counter)
{
- vlib_frame_t *f = vlib_get_frame_to_node (vm, next_node);
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ vlib_node_t *from_node = vlib_get_node (vm, rt->node_index);
+ ASSERT (next < vec_len (from_node->next_nodes));
+ u32 to_node_index = from_node->next_nodes[next];
+ vlib_frame_t *f = vlib_get_frame_to_node (vm, to_node_index);
u32 *to_next = vlib_frame_vector_args (f);
to_next[0] = bi;
f->n_vectors = 1;
- vlib_put_frame_to_node (vm, next_node, f);
+ if (b->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ f->frame_flags |= VLIB_NODE_FLAG_TRACE;
+ }
+ vlib_put_frame_to_node (vm, to_node_index, f);
+ vlib_increment_combined_counter (tx_counter, vm->thread_index, bs->bs_idx, 1,
+ vlib_buffer_length_in_chain (vm, b));
}
int
@@ -398,10 +410,10 @@ bfd_udp_calc_next_node (const struct bfd_session_s *bs, u32 * next_node)
switch (bs->transport)
{
case BFD_TRANSPORT_UDP4:
- *next_node = bfd_udp_main.ip4_arp_idx;
+ *next_node = BFD_TX_IP4_ARP;
return 1;
case BFD_TRANSPORT_UDP6:
- *next_node = bfd_udp_main.ip6_ndp_idx;
+ *next_node = BFD_TX_IP6_NDP;
return 1;
}
break;
@@ -409,10 +421,10 @@ bfd_udp_calc_next_node (const struct bfd_session_s *bs, u32 * next_node)
switch (bs->transport)
{
case BFD_TRANSPORT_UDP4:
- *next_node = bfd_udp_main.ip4_rewrite_idx;
+ *next_node = BFD_TX_IP4_REWRITE;
return 1;
case BFD_TRANSPORT_UDP6:
- *next_node = bfd_udp_main.ip6_rewrite_idx;
+ *next_node = BFD_TX_IP6_REWRITE;
return 1;
}
break;
@@ -420,10 +432,10 @@ bfd_udp_calc_next_node (const struct bfd_session_s *bs, u32 * next_node)
switch (bs->transport)
{
case BFD_TRANSPORT_UDP4:
- *next_node = bfd_udp_main.ip4_midchain_idx;
+ *next_node = BFD_TX_IP4_MIDCHAIN;
return 1;
case BFD_TRANSPORT_UDP6:
- *next_node = bfd_udp_main.ip6_midchain_idx;
+ *next_node = BFD_TX_IP6_MIDCHAIN;
return 1;
}
break;
@@ -435,27 +447,35 @@ bfd_udp_calc_next_node (const struct bfd_session_s *bs, u32 * next_node)
}
int
-bfd_transport_udp4 (vlib_main_t * vm, u32 bi, const struct bfd_session_s *bs)
+bfd_transport_udp4 (vlib_main_t *vm, vlib_node_runtime_t *rt, u32 bi,
+ const struct bfd_session_s *bs, int is_echo)
{
u32 next_node;
int rv = bfd_udp_calc_next_node (bs, &next_node);
+ bfd_main_t *bm = bfd_udp_main.bfd_main;
if (rv)
{
- bfd_create_frame_to_next_node (vm, bi, next_node);
+ bfd_create_frame_to_next_node (vm, rt, bi, bs, next_node,
+ is_echo ? &bm->tx_echo_counter :
+ &bm->tx_counter);
}
return rv;
}
int
-bfd_transport_udp6 (vlib_main_t * vm, u32 bi, const struct bfd_session_s *bs)
+bfd_transport_udp6 (vlib_main_t *vm, vlib_node_runtime_t *rt, u32 bi,
+ const struct bfd_session_s *bs, int is_echo)
{
u32 next_node;
int rv = bfd_udp_calc_next_node (bs, &next_node);
+ bfd_main_t *bm = bfd_udp_main.bfd_main;
if (rv)
{
- bfd_create_frame_to_next_node (vm, bi, next_node);
+ bfd_create_frame_to_next_node (vm, rt, bi, bs, next_node,
+ is_echo ? &bm->tx_echo_counter :
+ &bm->tx_counter);
}
- return 1;
+ return rv;
}
static bfd_session_t *
@@ -503,6 +523,7 @@ bfd_udp_add_session_internal (vlib_main_t * vm, bfd_udp_main_t * bum,
}
bfd_udp_session_t *bus = &bs->udp;
clib_memset (bus, 0, sizeof (*bus));
+ bus->adj_index = ADJ_INDEX_INVALID;
bfd_udp_key_t *key = &bus->key;
bfd_udp_key_init (key, sw_if_index, local_addr, peer_addr);
const bfd_session_t *tmp = bfd_lookup_session (bum, key);
@@ -521,15 +542,21 @@ bfd_udp_add_session_internal (vlib_main_t * vm, bfd_udp_main_t * bum,
&key->peer_addr, IP46_TYPE_ANY);
vlib_log_info (bum->log_class, "create BFD session: %U",
format_bfd_session, bs);
+ const ip46_address_t *peer =
+ (vnet_sw_interface_is_p2p (vnet_get_main (), key->sw_if_index) ?
+ &zero_addr :
+ &key->peer_addr);
if (BFD_TRANSPORT_UDP4 == t)
{
bus->adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4, VNET_LINK_IP4,
- &key->peer_addr,
- key->sw_if_index);
+ peer, key->sw_if_index);
BFD_DBG ("adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, VNET_LINK_IP4, %U, %d) "
- "returns %d", format_ip46_address, &key->peer_addr,
- IP46_TYPE_ANY, key->sw_if_index, bus->adj_index);
+ "returns %d",
+ format_ip46_address, peer, IP46_TYPE_ANY, key->sw_if_index,
+ bus->adj_index);
++bum->udp4_sessions_count;
+ bfd_udp_update_stat_segment_entry (
+ bum->udp4_sessions_count_stat_seg_entry, bum->udp4_sessions_count);
if (1 == bum->udp4_sessions_count)
{
udp_register_dst_port (vm, UDP_DST_PORT_bfd4,
@@ -541,12 +568,14 @@ bfd_udp_add_session_internal (vlib_main_t * vm, bfd_udp_main_t * bum,
else
{
bus->adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6, VNET_LINK_IP6,
- &key->peer_addr,
- key->sw_if_index);
+ peer, key->sw_if_index);
BFD_DBG ("adj_nbr_add_or_lock(FIB_PROTOCOL_IP6, VNET_LINK_IP6, %U, %d) "
- "returns %d", format_ip46_address, &key->peer_addr,
- IP46_TYPE_ANY, key->sw_if_index, bus->adj_index);
+ "returns %d",
+ format_ip46_address, peer, IP46_TYPE_ANY, key->sw_if_index,
+ bus->adj_index);
++bum->udp6_sessions_count;
+ bfd_udp_update_stat_segment_entry (
+ bum->udp6_sessions_count_stat_seg_entry, bum->udp6_sessions_count);
if (1 == bum->udp6_sessions_count)
{
udp_register_dst_port (vm, UDP_DST_PORT_bfd6,
@@ -568,8 +597,6 @@ bfd_udp_validate_api_input (u32 sw_if_index,
bfd_udp_main_t *bum = &bfd_udp_main;
vnet_sw_interface_t *sw_if =
vnet_get_sw_interface_or_null (bfd_udp_main.vnet_main, sw_if_index);
- u8 local_ip_valid = 0;
- ip_interface_address_t *ia = NULL;
if (!sw_if)
{
vlib_log_err (bum->log_class,
@@ -585,21 +612,6 @@ bfd_udp_validate_api_input (u32 sw_if_index,
"IP family mismatch (local is ipv4, peer is ipv6)");
return VNET_API_ERROR_INVALID_ARGUMENT;
}
- ip4_main_t *im = &ip4_main;
-
- /* *INDENT-OFF* */
- foreach_ip_interface_address (
- &im->lookup_main, ia, sw_if_index, 0 /* honor unnumbered */, ({
- ip4_address_t *x =
- ip_interface_address_get_address (&im->lookup_main, ia);
- if (x->as_u32 == local_addr->ip4.as_u32)
- {
- /* valid address for this interface */
- local_ip_valid = 1;
- break;
- }
- }));
- /* *INDENT-ON* */
}
else
{
@@ -609,44 +621,6 @@ bfd_udp_validate_api_input (u32 sw_if_index,
"IP family mismatch (local is ipv6, peer is ipv4)");
return VNET_API_ERROR_INVALID_ARGUMENT;
}
-
- if (ip6_address_is_link_local_unicast (&local_addr->ip6))
- {
- const ip6_address_t *ll_addr;
- ll_addr = ip6_get_link_local_address (sw_if_index);
- if (ip6_address_is_equal (ll_addr, &local_addr->ip6))
- {
- /* valid address for this interface */
- local_ip_valid = 1;
- }
- }
- else
- {
- ip6_main_t *im = &ip6_main;
- /* *INDENT-OFF* */
- foreach_ip_interface_address (
- &im->lookup_main, ia, sw_if_index, 0 /* honor unnumbered */, ({
- ip6_address_t *x =
- ip_interface_address_get_address (&im->lookup_main, ia);
- if (local_addr->ip6.as_u64[0] == x->as_u64[0] &&
- local_addr->ip6.as_u64[1] == x->as_u64[1])
- {
- /* valid address for this interface */
- local_ip_valid = 1;
- break;
- }
- }));
- /* *INDENT-ON* */
- }
- }
-
- if (!local_ip_valid)
- {
- vlib_log_err (bum->log_class,
- "local address %U not found on interface with index %u",
- format_ip46_address, local_addr, IP46_TYPE_ANY,
- sw_if_index);
- return VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
}
return 0;
@@ -685,9 +659,8 @@ bfd_udp_find_session_by_api_input (u32 sw_if_index,
static vnet_api_error_t
bfd_api_verify_common (u32 sw_if_index, u32 desired_min_tx_usec,
- u32 required_min_rx_usec, u8 detect_mult,
- const ip46_address_t * local_addr,
- const ip46_address_t * peer_addr)
+ u8 detect_mult, const ip46_address_t *local_addr,
+ const ip46_address_t *peer_addr)
{
bfd_udp_main_t *bum = &bfd_udp_main;
vnet_api_error_t rv =
@@ -714,12 +687,15 @@ bfd_udp_del_session_internal (vlib_main_t * vm, bfd_session_t * bs)
{
bfd_udp_main_t *bum = &bfd_udp_main;
BFD_DBG ("free bfd-udp session, bs_idx=%d", bs->bs_idx);
+ bfd_session_stop (bum->bfd_main, bs);
mhash_unset (&bum->bfd_session_idx_by_bfd_key, &bs->udp.key, NULL);
adj_unlock (bs->udp.adj_index);
switch (bs->transport)
{
case BFD_TRANSPORT_UDP4:
--bum->udp4_sessions_count;
+ bfd_udp_update_stat_segment_entry (
+ bum->udp4_sessions_count_stat_seg_entry, bum->udp4_sessions_count);
if (!bum->udp4_sessions_count)
{
udp_unregister_dst_port (vm, UDP_DST_PORT_bfd4, 1);
@@ -728,6 +704,8 @@ bfd_udp_del_session_internal (vlib_main_t * vm, bfd_session_t * bs)
break;
case BFD_TRANSPORT_UDP6:
--bum->udp6_sessions_count;
+ bfd_udp_update_stat_segment_entry (
+ bum->udp6_sessions_count_stat_seg_entry, bum->udp6_sessions_count);
if (!bum->udp6_sessions_count)
{
udp_unregister_dst_port (vm, UDP_DST_PORT_bfd6, 0);
@@ -738,33 +716,26 @@ bfd_udp_del_session_internal (vlib_main_t * vm, bfd_session_t * bs)
bfd_put_session (bum->bfd_main, bs);
}
-vnet_api_error_t
-bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr,
- const ip46_address_t * peer_addr,
- u32 desired_min_tx_usec, u32 required_min_rx_usec,
- u8 detect_mult, u8 is_authenticated, u32 conf_key_id,
- u8 bfd_key_id)
+static vnet_api_error_t
+bfd_udp_add_and_start_session (u32 sw_if_index,
+ const ip46_address_t *local_addr,
+ const ip46_address_t *peer_addr,
+ u32 desired_min_tx_usec,
+ u32 required_min_rx_usec, u8 detect_mult,
+ u8 is_authenticated, u32 conf_key_id,
+ u8 bfd_key_id)
{
- bfd_main_t *bm = &bfd_main;
- bfd_lock (bm);
-
- vnet_api_error_t rv =
- bfd_api_verify_common (sw_if_index, desired_min_tx_usec,
- required_min_rx_usec, detect_mult,
- local_addr, peer_addr);
bfd_session_t *bs = NULL;
- if (!rv)
- {
- rv =
- bfd_udp_add_session_internal (vlib_get_main (), &bfd_udp_main,
- sw_if_index, desired_min_tx_usec,
- required_min_rx_usec, detect_mult,
- local_addr, peer_addr, &bs);
- }
+ vnet_api_error_t rv;
+
+ rv = bfd_udp_add_session_internal (
+ vlib_get_main (), &bfd_udp_main, sw_if_index, desired_min_tx_usec,
+ required_min_rx_usec, detect_mult, local_addr, peer_addr, &bs);
+
if (!rv && is_authenticated)
{
rv = bfd_auth_activate (bs, conf_key_id, bfd_key_id,
- 0 /* is not delayed */ );
+ 0 /* is not delayed */);
if (rv)
{
bfd_udp_del_session_internal (vlib_get_main (), bs);
@@ -775,15 +746,67 @@ bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr,
bfd_session_start (bfd_udp_main.bfd_main, bs);
}
- bfd_unlock (bm);
return rv;
}
vnet_api_error_t
-bfd_udp_mod_session (u32 sw_if_index,
- const ip46_address_t * local_addr,
+bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr,
const ip46_address_t * peer_addr,
- u32 desired_min_tx_usec,
+ u32 desired_min_tx_usec, u32 required_min_rx_usec,
+ u8 detect_mult, u8 is_authenticated, u32 conf_key_id,
+ u8 bfd_key_id)
+{
+ bfd_main_t *bm = &bfd_main;
+ bfd_lock (bm);
+
+ vnet_api_error_t rv = bfd_api_verify_common (
+ sw_if_index, desired_min_tx_usec, detect_mult, local_addr, peer_addr);
+
+ if (!rv)
+ rv = bfd_udp_add_and_start_session (
+ sw_if_index, local_addr, peer_addr, desired_min_tx_usec,
+ required_min_rx_usec, detect_mult, is_authenticated, conf_key_id,
+ bfd_key_id);
+
+ bfd_unlock (bm);
+ return rv;
+}
+
+vnet_api_error_t
+bfd_udp_upd_session (u32 sw_if_index, const ip46_address_t *local_addr,
+ const ip46_address_t *peer_addr, u32 desired_min_tx_usec,
+ u32 required_min_rx_usec, u8 detect_mult,
+ u8 is_authenticated, u32 conf_key_id, u8 bfd_key_id)
+{
+ bfd_main_t *bm = &bfd_main;
+ bfd_lock (bm);
+
+ vnet_api_error_t rv = bfd_api_verify_common (
+ sw_if_index, desired_min_tx_usec, detect_mult, local_addr, peer_addr);
+ if (!rv)
+ {
+ bfd_session_t *bs = NULL;
+
+ rv = bfd_udp_find_session_by_api_input (sw_if_index, local_addr,
+ peer_addr, &bs);
+ if (VNET_API_ERROR_BFD_ENOENT == rv)
+ rv = bfd_udp_add_and_start_session (
+ sw_if_index, local_addr, peer_addr, desired_min_tx_usec,
+ required_min_rx_usec, detect_mult, is_authenticated, conf_key_id,
+ bfd_key_id);
+ else
+ rv = bfd_session_set_params (bfd_udp_main.bfd_main, bs,
+ desired_min_tx_usec, required_min_rx_usec,
+ detect_mult);
+ }
+
+ bfd_unlock (bm);
+ return rv;
+}
+
+vnet_api_error_t
+bfd_udp_mod_session (u32 sw_if_index, const ip46_address_t *local_addr,
+ const ip46_address_t *peer_addr, u32 desired_min_tx_usec,
u32 required_min_rx_usec, u8 detect_mult)
{
bfd_session_t *bs = NULL;
@@ -903,29 +926,6 @@ typedef enum
BFD_UDP_INPUT_N_NEXT,
} bfd_udp_input_next_t;
-/* Packet counters - BFD control frames */
-#define foreach_bfd_udp_error(F) \
- F (NONE, "good bfd packets (processed)") \
- F (BAD, "invalid bfd packets")
-
-#define F(sym, string) static char BFD_UDP_ERR_##sym##_STR[] = string;
-foreach_bfd_udp_error (F);
-#undef F
-
-static char *bfd_udp_error_strings[] = {
-#define F(sym, string) BFD_UDP_ERR_##sym##_STR,
- foreach_bfd_udp_error (F)
-#undef F
-};
-
-typedef enum
-{
-#define F(sym, str) BFD_UDP_ERROR_##sym,
- foreach_bfd_udp_error (F)
-#undef F
- BFD_UDP_N_ERROR,
-} bfd_udp_error_t;
-
typedef enum
{
BFD_UDP_ECHO_INPUT_NEXT_NORMAL,
@@ -934,28 +934,12 @@ typedef enum
BFD_UDP_ECHO_INPUT_N_NEXT,
} bfd_udp_echo_input_next_t;
-/* Packet counters - BFD ECHO packets */
-#define foreach_bfd_udp_echo_error(F) \
- F (NONE, "good bfd echo packets (processed)") \
- F (BAD, "invalid bfd echo packets")
-
-#define F(sym, string) static char BFD_UDP_ECHO_ERR_##sym##_STR[] = string;
-foreach_bfd_udp_echo_error (F);
-#undef F
-
-static char *bfd_udp_echo_error_strings[] = {
-#define F(sym, string) BFD_UDP_ECHO_ERR_##sym##_STR,
- foreach_bfd_udp_echo_error (F)
-#undef F
-};
-
-typedef enum
+static_always_inline vl_counter_bfd_udp_enum_t
+bfd_error_to_udp (bfd_error_t e)
{
-#define F(sym, str) BFD_UDP_ECHO_ERROR_##sym,
- foreach_bfd_udp_echo_error (F)
-#undef F
- BFD_UDP_ECHO_N_ERROR,
-} bfd_udp_echo_error_t;
+ /* The UDP error is a super set of the proto independent errors */
+ return ((vl_counter_bfd_udp_enum_t) e);
+}
static void
bfd_udp4_find_headers (vlib_buffer_t * b, ip4_header_t ** ip4,
@@ -963,7 +947,7 @@ bfd_udp4_find_headers (vlib_buffer_t * b, ip4_header_t ** ip4,
{
/* sanity check first */
const i32 start = vnet_buffer (b)->l3_hdr_offset;
- if (start < 0 && start < sizeof (b->pre_data))
+ if (start < -(signed) sizeof (b->pre_data))
{
BFD_ERR ("Start of ip header is before pre_data, ignoring");
*ip4 = NULL;
@@ -981,9 +965,9 @@ bfd_udp4_find_headers (vlib_buffer_t * b, ip4_header_t ** ip4,
*udp = (udp_header_t *) ((*ip4) + 1);
}
-static bfd_udp_error_t
-bfd_udp4_verify_transport (const ip4_header_t * ip4,
- const udp_header_t * udp, const bfd_session_t * bs)
+static vl_counter_bfd_udp_enum_t
+bfd_udp4_verify_transport (const ip4_header_t *ip4, const udp_header_t *udp,
+ const bfd_session_t *bs)
{
const bfd_udp_session_t *bus = &bs->udp;
const bfd_udp_key_t *key = &bus->key;
@@ -992,21 +976,21 @@ bfd_udp4_verify_transport (const ip4_header_t * ip4,
BFD_ERR ("IPv4 src addr mismatch, got %U, expected %U",
format_ip4_address, ip4->src_address.as_u8, format_ip4_address,
key->peer_addr.ip4.as_u8);
- return BFD_UDP_ERROR_BAD;
+ return BFD_UDP_ERROR_SRC_MISMATCH;
}
if (ip4->dst_address.as_u32 != key->local_addr.ip4.as_u32)
{
BFD_ERR ("IPv4 dst addr mismatch, got %U, expected %U",
format_ip4_address, ip4->dst_address.as_u8, format_ip4_address,
key->local_addr.ip4.as_u8);
- return BFD_UDP_ERROR_BAD;
+ return BFD_UDP_ERROR_DST_MISMATCH;
}
const u8 expected_ttl = 255;
if (ip4->ttl != expected_ttl)
{
BFD_ERR ("IPv4 unexpected TTL value %u, expected %u", ip4->ttl,
expected_ttl);
- return BFD_UDP_ERROR_BAD;
+ return BFD_UDP_ERROR_TTL;
}
if (clib_net_to_host_u16 (udp->src_port) < 49152)
{
@@ -1022,18 +1006,20 @@ typedef struct
bfd_pkt_t pkt;
} bfd_rpc_update_t;
-static void
-bfd_rpc_update_session (vlib_main_t * vm, u32 bs_idx, const bfd_pkt_t * pkt)
+static bfd_error_t
+bfd_rpc_update_session (vlib_main_t *vm, u32 bs_idx, const bfd_pkt_t *pkt)
{
bfd_main_t *bm = &bfd_main;
+ bfd_error_t err;
bfd_lock (bm);
- bfd_consume_pkt (vm, bm, pkt, bs_idx);
+ err = bfd_consume_pkt (vm, bm, pkt, bs_idx);
bfd_unlock (bm);
+
+ return err;
}
-static bfd_udp_error_t
-bfd_udp4_scan (vlib_main_t * vm, vlib_node_runtime_t * rt,
- vlib_buffer_t * b, bfd_session_t ** bs_out)
+static vl_counter_bfd_udp_enum_t
+bfd_udp4_scan (vlib_main_t *vm, vlib_buffer_t *b, bfd_session_t **bs_out)
{
const bfd_pkt_t *pkt = vlib_buffer_get_current (b);
if (sizeof (*pkt) > b->current_length)
@@ -1057,11 +1043,13 @@ bfd_udp4_scan (vlib_main_t * vm, vlib_node_runtime_t * rt,
BFD_ERR
("BFD packet length is larger than udp payload length (%u > %u)",
pkt->head.length, udp_payload_length);
- return BFD_UDP_ERROR_BAD;
+ return BFD_UDP_ERROR_LENGTH;
}
- if (!bfd_verify_pkt_common (pkt))
+ vl_counter_bfd_udp_enum_t err;
+ if (BFD_UDP_ERROR_NONE !=
+ (err = bfd_error_to_udp (bfd_verify_pkt_common (pkt))))
{
- return BFD_UDP_ERROR_BAD;
+ return err;
}
bfd_session_t *bs = NULL;
if (pkt->your_disc)
@@ -1086,22 +1074,21 @@ bfd_udp4_scan (vlib_main_t * vm, vlib_node_runtime_t * rt,
if (!bs)
{
BFD_ERR ("BFD session lookup failed - no session matches BFD pkt");
- return BFD_UDP_ERROR_BAD;
+ return BFD_UDP_ERROR_NO_SESSION;
}
BFD_DBG ("BFD session found, bs_idx=%u", bs->bs_idx);
if (!bfd_verify_pkt_auth (vm, pkt, b->current_length, bs))
{
BFD_ERR ("Packet verification failed, dropping packet");
- return BFD_UDP_ERROR_BAD;
+ return BFD_UDP_ERROR_FAILED_VERIFICATION;
}
- bfd_udp_error_t err;
if (BFD_UDP_ERROR_NONE != (err = bfd_udp4_verify_transport (ip4, udp, bs)))
{
return err;
}
- bfd_rpc_update_session (vm, bs->bs_idx, pkt);
+ err = bfd_error_to_udp (bfd_rpc_update_session (vm, bs->bs_idx, pkt));
*bs_out = bs;
- return BFD_UDP_ERROR_NONE;
+ return err;
}
static void
@@ -1110,7 +1097,7 @@ bfd_udp6_find_headers (vlib_buffer_t * b, ip6_header_t ** ip6,
{
/* sanity check first */
const i32 start = vnet_buffer (b)->l3_hdr_offset;
- if (start < 0 && start < sizeof (b->pre_data))
+ if (start < -(signed) sizeof (b->pre_data))
{
BFD_ERR ("Start of ip header is before pre_data, ignoring");
*ip6 = NULL;
@@ -1136,9 +1123,9 @@ bfd_udp6_find_headers (vlib_buffer_t * b, ip6_header_t ** ip6,
*udp = (udp_header_t *) ((*ip6) + 1);
}
-static bfd_udp_error_t
-bfd_udp6_verify_transport (const ip6_header_t * ip6,
- const udp_header_t * udp, const bfd_session_t * bs)
+static vl_counter_bfd_udp_enum_t
+bfd_udp6_verify_transport (const ip6_header_t *ip6, const udp_header_t *udp,
+ const bfd_session_t *bs)
{
const bfd_udp_session_t *bus = &bs->udp;
const bfd_udp_key_t *key = &bus->key;
@@ -1148,7 +1135,7 @@ bfd_udp6_verify_transport (const ip6_header_t * ip6,
BFD_ERR ("IP src addr mismatch, got %U, expected %U",
format_ip6_address, ip6, format_ip6_address,
&key->peer_addr.ip6);
- return BFD_UDP_ERROR_BAD;
+ return BFD_UDP_ERROR_SRC_MISMATCH;
}
if (ip6->dst_address.as_u64[0] != key->local_addr.ip6.as_u64[0] &&
ip6->dst_address.as_u64[1] != key->local_addr.ip6.as_u64[1])
@@ -1156,14 +1143,14 @@ bfd_udp6_verify_transport (const ip6_header_t * ip6,
BFD_ERR ("IP dst addr mismatch, got %U, expected %U",
format_ip6_address, ip6, format_ip6_address,
&key->local_addr.ip6);
- return BFD_UDP_ERROR_BAD;
+ return BFD_UDP_ERROR_DST_MISMATCH;
}
const u8 expected_hop_limit = 255;
if (ip6->hop_limit != expected_hop_limit)
{
BFD_ERR ("IPv6 unexpected hop-limit value %u, expected %u",
ip6->hop_limit, expected_hop_limit);
- return BFD_UDP_ERROR_BAD;
+ return BFD_UDP_ERROR_TTL;
}
if (clib_net_to_host_u16 (udp->src_port) < 49152)
{
@@ -1173,9 +1160,8 @@ bfd_udp6_verify_transport (const ip6_header_t * ip6,
return BFD_UDP_ERROR_NONE;
}
-static bfd_udp_error_t
-bfd_udp6_scan (vlib_main_t * vm, vlib_node_runtime_t * rt,
- vlib_buffer_t * b, bfd_session_t ** bs_out)
+static vl_counter_bfd_udp_enum_t
+bfd_udp6_scan (vlib_main_t *vm, vlib_buffer_t *b, bfd_session_t **bs_out)
{
const bfd_pkt_t *pkt = vlib_buffer_get_current (b);
if (sizeof (*pkt) > b->current_length)
@@ -1201,9 +1187,11 @@ bfd_udp6_scan (vlib_main_t * vm, vlib_node_runtime_t * rt,
pkt->head.length, udp_payload_length);
return BFD_UDP_ERROR_BAD;
}
- if (!bfd_verify_pkt_common (pkt))
+ vl_counter_bfd_udp_enum_t err;
+ if (BFD_UDP_ERROR_NONE !=
+ (err = bfd_error_to_udp (bfd_verify_pkt_common (pkt))))
{
- return BFD_UDP_ERROR_BAD;
+ return err;
}
bfd_session_t *bs = NULL;
if (pkt->your_disc)
@@ -1230,22 +1218,21 @@ bfd_udp6_scan (vlib_main_t * vm, vlib_node_runtime_t * rt,
if (!bs)
{
BFD_ERR ("BFD session lookup failed - no session matches BFD pkt");
- return BFD_UDP_ERROR_BAD;
+ return BFD_UDP_ERROR_NO_SESSION;
}
BFD_DBG ("BFD session found, bs_idx=%u", bs->bs_idx);
if (!bfd_verify_pkt_auth (vm, pkt, b->current_length, bs))
{
BFD_ERR ("Packet verification failed, dropping packet");
- return BFD_UDP_ERROR_BAD;
+ return BFD_UDP_ERROR_FAILED_VERIFICATION;
}
- bfd_udp_error_t err;
if (BFD_UDP_ERROR_NONE != (err = bfd_udp6_verify_transport (ip6, udp, bs)))
{
return err;
}
- bfd_rpc_update_session (vm, bs->bs_idx, pkt);
+ err = bfd_error_to_udp (bfd_rpc_update_session (vm, bs->bs_idx, pkt));
*bs_out = bs;
- return BFD_UDP_ERROR_NONE;
+ return err;
}
/*
@@ -1277,7 +1264,7 @@ bfd_udp_input (vlib_main_t * vm, vlib_node_runtime_t * rt,
/* If this pkt is traced, snapshot the data */
if (b0->flags & VLIB_BUFFER_IS_TRACED)
{
- int len;
+ u64 len;
t0 = vlib_add_trace (vm, rt, b0, sizeof (*t0));
len = (b0->current_length < sizeof (t0->data)) ? b0->current_length
: sizeof (t0->data);
@@ -1289,17 +1276,20 @@ bfd_udp_input (vlib_main_t * vm, vlib_node_runtime_t * rt,
bfd_lock (bm);
if (is_ipv6)
{
- error0 = bfd_udp6_scan (vm, rt, b0, &bs);
+ error0 = bfd_udp6_scan (vm, b0, &bs);
}
else
{
- error0 = bfd_udp4_scan (vm, rt, b0, &bs);
+ error0 = bfd_udp4_scan (vm, b0, &bs);
}
b0->error = rt->errors[error0];
next0 = BFD_UDP_INPUT_NEXT_NORMAL;
if (BFD_UDP_ERROR_NONE == error0)
{
+ vlib_increment_combined_counter (
+ &bm->rx_counter, vm->thread_index, bs->bs_idx, 1,
+ vlib_buffer_length_in_chain (vm, b0));
/*
* if everything went fine, check for poll bit, if present, re-use
* the buffer and based on (now updated) session parameters, send
@@ -1310,17 +1300,16 @@ bfd_udp_input (vlib_main_t * vm, vlib_node_runtime_t * rt,
{
b0->current_data = 0;
b0->current_length = 0;
- bfd_init_final_control_frame (vm, b0, bfd_udp_main.bfd_main, bs,
- 0);
+ bfd_init_final_control_frame (vm, b0, bs);
if (is_ipv6)
{
vlib_node_increment_counter (vm, bfd_udp6_input_node.index,
- b0->error, 1);
+ error0, 1);
}
else
{
vlib_node_increment_counter (vm, bfd_udp4_input_node.index,
- b0->error, 1);
+ error0, 1);
}
const bfd_udp_session_t *bus = &bs->udp;
ip_adjacency_t *adj = adj_get (bus->adj_index);
@@ -1360,7 +1349,6 @@ bfd_udp4_input (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
/*
* bfd input graph node declaration
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (bfd_udp4_input_node, static) = {
.function = bfd_udp4_input,
.name = "bfd-udp4-input",
@@ -1368,7 +1356,7 @@ VLIB_REGISTER_NODE (bfd_udp4_input_node, static) = {
.type = VLIB_NODE_TYPE_INTERNAL,
.n_errors = BFD_UDP_N_ERROR,
- .error_strings = bfd_udp_error_strings,
+ .error_counters = bfd_udp_error_counters,
.format_trace = bfd_input_format_trace,
@@ -1381,7 +1369,6 @@ VLIB_REGISTER_NODE (bfd_udp4_input_node, static) = {
[BFD_UDP_INPUT_NEXT_REPLY_MIDCHAIN] = "ip4-midchain",
},
};
-/* *INDENT-ON* */
static uword
bfd_udp6_input (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
@@ -1389,7 +1376,6 @@ bfd_udp6_input (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
return bfd_udp_input (vm, rt, f, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (bfd_udp6_input_node, static) = {
.function = bfd_udp6_input,
.name = "bfd-udp6-input",
@@ -1397,7 +1383,7 @@ VLIB_REGISTER_NODE (bfd_udp6_input_node, static) = {
.type = VLIB_NODE_TYPE_INTERNAL,
.n_errors = BFD_UDP_N_ERROR,
- .error_strings = bfd_udp_error_strings,
+ .error_counters = bfd_udp_error_counters,
.format_trace = bfd_input_format_trace,
@@ -1410,7 +1396,6 @@ VLIB_REGISTER_NODE (bfd_udp6_input_node, static) = {
[BFD_UDP_INPUT_NEXT_REPLY_MIDCHAIN] = "ip6-midchain",
},
};
-/* *INDENT-ON* */
/*
* Process a frame of bfd echo packets
@@ -1439,7 +1424,7 @@ bfd_udp_echo_input (vlib_main_t * vm, vlib_node_runtime_t * rt,
/* If this pkt is traced, snapshot the data */
if (b0->flags & VLIB_BUFFER_IS_TRACED)
{
- int len;
+ u64 len;
t0 = vlib_add_trace (vm, rt, b0, sizeof (*t0));
len = (b0->current_length < sizeof (t0->data)) ? b0->current_length
: sizeof (t0->data);
@@ -1447,8 +1432,9 @@ bfd_udp_echo_input (vlib_main_t * vm, vlib_node_runtime_t * rt,
clib_memcpy_fast (t0->data, vlib_buffer_get_current (b0), len);
}
+ bfd_session_t *bs = NULL;
bfd_lock (bm);
- if (bfd_consume_echo_pkt (vm, bfd_udp_main.bfd_main, b0))
+ if ((bs = bfd_consume_echo_pkt (vm, bfd_udp_main.bfd_main, b0)))
{
b0->error = rt->errors[BFD_UDP_ERROR_NONE];
next0 = BFD_UDP_ECHO_INPUT_NEXT_NORMAL;
@@ -1460,17 +1446,25 @@ bfd_udp_echo_input (vlib_main_t * vm, vlib_node_runtime_t * rt,
if (is_ipv6)
{
vlib_node_increment_counter (vm, bfd_udp_echo6_input_node.index,
- b0->error, 1);
+ BFD_UDP_ERROR_NONE, 1);
}
else
{
vlib_node_increment_counter (vm, bfd_udp_echo4_input_node.index,
- b0->error, 1);
+ BFD_UDP_ERROR_NONE, 1);
}
next0 = BFD_UDP_ECHO_INPUT_NEXT_REPLY_REWRITE;
}
bfd_unlock (bm);
+
+ if (bs)
+ {
+ vlib_increment_combined_counter (
+ &bm->rx_echo_counter, vm->thread_index, bs->bs_idx, 1,
+ vlib_buffer_length_in_chain (vm, b0));
+ }
+
vlib_set_next_frame_buffer (vm, rt, next0, bi0);
from += 1;
@@ -1506,15 +1500,14 @@ bfd_echo_input_format_trace (u8 * s, va_list * args)
/*
* bfd input graph node declaration
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (bfd_udp_echo4_input_node, static) = {
.function = bfd_udp_echo4_input,
.name = "bfd-udp-echo4-input",
.vector_size = sizeof (u32),
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = BFD_UDP_ECHO_N_ERROR,
- .error_strings = bfd_udp_error_strings,
+ .n_errors = BFD_UDP_N_ERROR,
+ .error_counters = bfd_udp_error_counters,
.format_trace = bfd_echo_input_format_trace,
@@ -1526,7 +1519,6 @@ VLIB_REGISTER_NODE (bfd_udp_echo4_input_node, static) = {
[BFD_UDP_ECHO_INPUT_NEXT_REPLY_REWRITE] = "ip4-lookup",
},
};
-/* *INDENT-ON* */
static uword
bfd_udp_echo6_input (vlib_main_t * vm, vlib_node_runtime_t * rt,
@@ -1535,15 +1527,14 @@ bfd_udp_echo6_input (vlib_main_t * vm, vlib_node_runtime_t * rt,
return bfd_udp_echo_input (vm, rt, f, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (bfd_udp_echo6_input_node, static) = {
.function = bfd_udp_echo6_input,
.name = "bfd-udp-echo6-input",
.vector_size = sizeof (u32),
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = BFD_UDP_ECHO_N_ERROR,
- .error_strings = bfd_udp_echo_error_strings,
+ .n_errors = BFD_UDP_N_ERROR,
+ .error_counters = bfd_udp_error_counters,
.format_trace = bfd_echo_input_format_trace,
@@ -1556,46 +1547,73 @@ VLIB_REGISTER_NODE (bfd_udp_echo6_input_node, static) = {
},
};
-/* *INDENT-ON* */
static clib_error_t *
-bfd_udp_sw_if_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_create)
+bfd_udp_sw_if_add_del (CLIB_UNUSED (vnet_main_t *vnm), u32 sw_if_index,
+ u32 is_create)
{
- bfd_session_t **to_be_freed = NULL;
+ u32 *to_be_freed = NULL;
bfd_udp_main_t *bum = &bfd_udp_main;
BFD_DBG ("sw_if_add_del called, sw_if_index=%u, is_create=%u", sw_if_index,
is_create);
if (!is_create)
{
bfd_session_t *bs;
- pool_foreach (bs, bfd_udp_main.bfd_main->sessions)
- {
- if (bs->transport != BFD_TRANSPORT_UDP4 &&
- bs->transport != BFD_TRANSPORT_UDP6)
- {
- continue;
- }
- if (bs->udp.key.sw_if_index != sw_if_index)
- {
- continue;
- }
- vec_add1 (to_be_freed, bs);
- }
- }
- bfd_session_t **bs;
- vec_foreach (bs, to_be_freed)
- {
- vlib_log_notice (bum->log_class,
- "removal of sw_if_index=%u forces removal of bfd session "
- "with bs_idx=%u", sw_if_index, (*bs)->bs_idx);
- bfd_session_set_flags (vlib_get_main (), *bs, 0);
- bfd_udp_del_session_internal (vlib_get_main (), *bs);
- }
+ pool_foreach (bs, bum->bfd_main->sessions)
+ {
+ if (bs->transport != BFD_TRANSPORT_UDP4 &&
+ bs->transport != BFD_TRANSPORT_UDP6)
+ {
+ continue;
+ }
+ if (bs->udp.key.sw_if_index != sw_if_index)
+ {
+ continue;
+ }
+ vec_add1 (to_be_freed, bs->bs_idx);
+ }
+ }
+ u32 *bs_idx;
+ vec_foreach (bs_idx, to_be_freed)
+ {
+ bfd_session_t *bs = pool_elt_at_index (bum->bfd_main->sessions, *bs_idx);
+ vlib_log_notice (bum->log_class,
+ "removal of sw_if_index=%u forces removal of bfd "
+ "session with bs_idx=%u",
+ sw_if_index, bs->bs_idx);
+ bfd_session_set_flags (vlib_get_main (), bs, 0);
+ bfd_udp_del_session_internal (vlib_get_main (), bs);
+ }
return 0;
}
VNET_SW_INTERFACE_ADD_DEL_FUNCTION (bfd_udp_sw_if_add_del);
+clib_error_t *
+bfd_udp_stats_init (bfd_udp_main_t *bum)
+{
+ const char *name4 = "/bfd/udp4/sessions";
+ bum->udp4_sessions_count_stat_seg_entry = vlib_stats_add_gauge ("%s", name4);
+
+ vlib_stats_set_gauge (bum->udp4_sessions_count_stat_seg_entry, 0);
+ if (~0 == bum->udp4_sessions_count_stat_seg_entry)
+ {
+ return clib_error_return (
+ 0, "Could not create stat segment entry for %s", name4);
+ }
+ const char *name6 = "/bfd/udp6/sessions";
+ bum->udp6_sessions_count_stat_seg_entry = vlib_stats_add_gauge ("%s", name6);
+
+ vlib_stats_set_gauge (bum->udp6_sessions_count_stat_seg_entry, 0);
+ if (~0 == bum->udp6_sessions_count_stat_seg_entry)
+ {
+ return clib_error_return (
+ 0, "Could not create stat segment entry for %s", name6);
+ }
+
+ return 0;
+}
+
/*
* setup function
*/
@@ -1608,24 +1626,7 @@ bfd_udp_init (vlib_main_t * vm)
sizeof (bfd_udp_key_t));
bfd_udp_main.bfd_main = &bfd_main;
bfd_udp_main.vnet_main = vnet_get_main ();
- vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "ip4-arp");
- ASSERT (node);
- bfd_udp_main.ip4_arp_idx = node->index;
- node = vlib_get_node_by_name (vm, (u8 *) "ip6-discover-neighbor");
- ASSERT (node);
- bfd_udp_main.ip6_ndp_idx = node->index;
- node = vlib_get_node_by_name (vm, (u8 *) "ip4-rewrite");
- ASSERT (node);
- bfd_udp_main.ip4_rewrite_idx = node->index;
- node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite");
- ASSERT (node);
- bfd_udp_main.ip6_rewrite_idx = node->index;
- node = vlib_get_node_by_name (vm, (u8 *) "ip4-midchain");
- ASSERT (node);
- bfd_udp_main.ip4_midchain_idx = node->index;
- node = vlib_get_node_by_name (vm, (u8 *) "ip6-midchain");
- ASSERT (node);
- bfd_udp_main.ip6_midchain_idx = node->index;
+ bfd_udp_stats_init (&bfd_udp_main);
bfd_udp_main.log_class = vlib_log_register_class ("bfd", "udp");
vlib_log_debug (bfd_udp_main.log_class, "initialized");
diff --git a/src/vnet/bfd/bfd_udp.h b/src/vnet/bfd/bfd_udp.h
index 87868104f98..8f4bfee2bd7 100644
--- a/src/vnet/bfd/bfd_udp.h
+++ b/src/vnet/bfd/bfd_udp.h
@@ -24,7 +24,6 @@
#include <vnet/ip/ip6_packet.h>
#include <vnet/bfd/bfd_api.h>
-/* *INDENT-OFF* */
/** identifier of BFD session based on UDP transport only */
typedef CLIB_PACKED (struct {
union {
@@ -38,7 +37,6 @@ typedef CLIB_PACKED (struct {
/** peer address */
ip46_address_t peer_addr;
}) bfd_udp_key_t;
-/* *INDENT-ON* */
/** UDP transport specific data embedded in bfd_session's union */
typedef struct
@@ -82,22 +80,18 @@ int bfd_add_udp6_transport (vlib_main_t * vm, u32 bi,
/**
* @brief transport packet over udpv4
*
- * @param is_echo 1 if this is echo packet, 0 if control frame
- *
* @return 1 on success, 0 on failure
*/
-int bfd_transport_udp4 (vlib_main_t * vm, u32 bi,
- const struct bfd_session_s *bs);
+int bfd_transport_udp4 (vlib_main_t *vm, vlib_node_runtime_t *rt, u32 bi,
+ const struct bfd_session_s *bs, int is_echo);
/**
* @brief transport packet over udpv6
*
- * @param is_echo 1 if this is echo packet, 0 if control frame
- *
* @return 1 on success, 0 on failure
*/
-int bfd_transport_udp6 (vlib_main_t * vm, u32 bi,
- const struct bfd_session_s *bs);
+int bfd_transport_udp6 (vlib_main_t *vm, vlib_node_runtime_t *rt, u32 bi,
+ const struct bfd_session_s *bs, int is_echo);
/**
* @brief check if the bfd udp layer is echo-capable at this time
diff --git a/src/vnet/bier/bier_update.c b/src/vnet/bier/bier_update.c
index 4108d09f51e..fdb7c5c0865 100644
--- a/src/vnet/bier/bier_update.c
+++ b/src/vnet/bier/bier_update.c
@@ -129,7 +129,14 @@ done:
VLIB_CLI_COMMAND (bier_route_command) = {
.path = "bier route",
- .short_help = "bier route [add|del] sd <sud-domain> set <set> bsl <bit-string-length> bp <bit-position> via [next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight <value>] [preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-connected] [rx-ip4 <interface>] [out-labels <value value value>]",
+ .short_help =
+ "bier route [add|del] sd <sud-domain> set <set> bsl <bit-string-length> "
+ "bp <bit-position> via [next-hop-address] [next-hop-interface] "
+ "[next-hop-table <value>] [weight <value>] [preference <value>] "
+ "[udp-encap-id <value>] [ip4-lookup-in-table <value>] "
+ "[ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] "
+ "[resolve-via-host] [resolve-via-connected] [rx-ip4|rx-ip6 <interface>] "
+ "[out-labels <value value value>]",
.function = vnet_bier_route_cmd,
};
diff --git a/src/vnet/bonding/bond_api.c b/src/vnet/bonding/bond_api.c
index 3fd73d7995f..d9287a8e23d 100644
--- a/src/vnet/bonding/bond_api.c
+++ b/src/vnet/bonding/bond_api.c
@@ -43,8 +43,11 @@ vl_api_bond_delete_t_handler (vl_api_bond_delete_t * mp)
vl_api_bond_delete_reply_t *rmp;
u32 sw_if_index = ntohl (mp->sw_if_index);
+ VALIDATE_SW_IF_INDEX (mp);
+
rv = bond_delete_if (vm, sw_if_index);
+ BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_BOND_DELETE_REPLY);
}
@@ -72,12 +75,10 @@ vl_api_bond_create_t_handler (vl_api_bond_create_t * mp)
int rv = ap->rv;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_BOND_CREATE_REPLY,
({
rmp->sw_if_index = ntohl (ap->sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -105,12 +106,10 @@ vl_api_bond_create2_t_handler (vl_api_bond_create2_t * mp)
int rv = ap->rv;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_BOND_CREATE2_REPLY,
({
rmp->sw_if_index = ntohl (ap->sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -168,6 +167,8 @@ static void
vl_api_sw_interface_set_bond_weight_reply_t *rmp;
int rv = 0;
+ VALIDATE_SW_IF_INDEX (mp);
+
clib_memset (ap, 0, sizeof (*ap));
ap->sw_if_index = ntohl (mp->sw_if_index);
@@ -176,6 +177,7 @@ static void
bond_set_intf_weight (vm, ap);
rv = ap->rv;
+ BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_SW_INTERFACE_SET_BOND_WEIGHT_REPLY);
}
@@ -187,12 +189,15 @@ vl_api_bond_detach_slave_t_handler (vl_api_bond_detach_slave_t * mp)
bond_detach_member_args_t _a, *ap = &_a;
int rv = 0;
+ VALIDATE_SW_IF_INDEX (mp);
+
clib_memset (ap, 0, sizeof (*ap));
ap->member = ntohl (mp->sw_if_index);
bond_detach_member (vm, ap);
rv = ap->rv;
+ BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_BOND_DETACH_SLAVE_REPLY);
}
@@ -204,12 +209,15 @@ vl_api_bond_detach_member_t_handler (vl_api_bond_detach_member_t * mp)
bond_detach_member_args_t _a, *ap = &_a;
int rv = 0;
+ VALIDATE_SW_IF_INDEX (mp);
+
clib_memset (ap, 0, sizeof (*ap));
ap->member = ntohl (mp->sw_if_index);
bond_detach_member (vm, ap);
rv = ap->rv;
+ BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_BOND_DETACH_MEMBER_REPLY);
}
diff --git a/src/vnet/bonding/cli.c b/src/vnet/bonding/cli.c
index a24d1104486..cdc935ff10f 100644
--- a/src/vnet/bonding/cli.c
+++ b/src/vnet/bonding/cli.c
@@ -20,7 +20,7 @@
#include <vlib/unix/unix.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/bonding/node.h>
-#include <vpp/stats/stat_segment.h>
+#include <vlib/stats/stats.h>
void
bond_disable_collecting_distributing (vlib_main_t * vm, member_if_t * mif)
@@ -183,7 +183,6 @@ bond_dump_ifs (bond_interface_details_t ** out_bondifs)
bond_interface_details_t *r_bondifs = NULL;
bond_interface_details_t *bondif = NULL;
- /* *INDENT-OFF* */
pool_foreach (bif, bm->interfaces) {
vec_add2(r_bondifs, bondif, 1);
clib_memset (bondif, 0, sizeof (*bondif));
@@ -201,7 +200,6 @@ bond_dump_ifs (bond_interface_details_t ** out_bondifs)
bondif->active_members = vec_len (bif->active_members);
bondif->members = vec_len (bif->members);
}
- /* *INDENT-ON* */
*out_bondifs = r_bondifs;
@@ -323,10 +321,10 @@ bond_delete_neighbor (vlib_main_t * vm, bond_if_t * bif, member_if_t * mif)
if (bif->mode == BOND_MODE_LACP)
{
- stat_segment_deregister_state_counter
- (bm->stats[bif->sw_if_index][mif->sw_if_index].actor_state);
- stat_segment_deregister_state_counter
- (bm->stats[bif->sw_if_index][mif->sw_if_index].partner_state);
+ vlib_stats_remove_entry (
+ bm->stats[bif->sw_if_index][mif->sw_if_index].actor_state);
+ vlib_stats_remove_entry (
+ bm->stats[bif->sw_if_index][mif->sw_if_index].partner_state);
}
pool_put (bm->neighbors, mif);
@@ -376,11 +374,11 @@ bond_delete_if (vlib_main_t * vm, u32 sw_if_index)
void
bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args)
{
+ vnet_eth_interface_registration_t eir = {};
bond_main_t *bm = &bond_main;
vnet_main_t *vnm = vnet_get_main ();
vnet_sw_interface_t *sw;
bond_if_t *bif;
- vnet_hw_interface_t *hw;
if ((args->mode == BOND_MODE_LACP) && bm->lacp_plugin_loaded == 0)
{
@@ -408,6 +406,16 @@ bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args)
bif->mode = args->mode;
bif->gso = args->gso;
+ if (bif->lb == BOND_LB_L2)
+ bif->hash_func =
+ vnet_hash_function_from_name ("hash-eth-l2", VNET_HASH_FN_TYPE_ETHERNET);
+ else if (bif->lb == BOND_LB_L34)
+ bif->hash_func = vnet_hash_function_from_name ("hash-eth-l34",
+ VNET_HASH_FN_TYPE_ETHERNET);
+ else if (bif->lb == BOND_LB_L23)
+ bif->hash_func = vnet_hash_function_from_name ("hash-eth-l23",
+ VNET_HASH_FN_TYPE_ETHERNET);
+
// Adjust requested interface id
if (bif->id == ~0)
bif->id = bif->dev_instance;
@@ -440,33 +448,26 @@ bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args)
args->hw_addr[1] = 0xfe;
}
memcpy (bif->hw_address, args->hw_addr, 6);
- args->error = ethernet_register_interface
- (vnm, bond_dev_class.index, bif->dev_instance /* device instance */ ,
- bif->hw_address /* ethernet address */ ,
- &bif->hw_if_index, 0 /* flag change */ );
- if (args->error)
- {
- args->rv = VNET_API_ERROR_INVALID_REGISTRATION;
- hash_unset (bm->id_used, bif->id);
- pool_put (bm->interfaces, bif);
- return;
- }
+ eir.dev_class_index = bond_dev_class.index;
+ eir.dev_instance = bif->dev_instance;
+ eir.address = bif->hw_address;
+ bif->hw_if_index = vnet_eth_register_interface (vnm, &eir);
sw = vnet_get_hw_sw_interface (vnm, bif->hw_if_index);
bif->sw_if_index = sw->sw_if_index;
bif->group = bif->sw_if_index;
bif->numa_only = args->numa_only;
- hw = vnet_get_hw_interface (vnm, bif->hw_if_index);
/*
* Add GSO and Checksum offload flags if GSO is enabled on Bond
*/
if (args->gso)
{
- hw->caps |= (VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM);
+ vnet_hw_if_set_caps (vnm, bif->hw_if_index,
+ VNET_HW_IF_CAP_TCP_GSO |
+ VNET_HW_IF_CAP_TX_TCP_CKSUM |
+ VNET_HW_IF_CAP_TX_UDP_CKSUM);
}
if (vlib_get_thread_main ()->n_vlib_mains > 1)
clib_spinlock_init (&bif->lockp);
@@ -517,12 +518,18 @@ bond_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (args.mode == BOND_MODE_LACP)
args.numa_only = 1;
else
- return clib_error_return (0,
- "Only lacp mode supports numa-only so far!");
+ {
+ unformat_free (line_input);
+ return clib_error_return (
+ 0, "Only lacp mode supports numa-only so far!");
+ }
}
else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
+ {
+ unformat_free (line_input);
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
}
unformat_free (line_input);
@@ -538,7 +545,6 @@ bond_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bond_create_command, static) = {
.path = "create bond",
.short_help = "create bond mode {round-robin | active-backup | broadcast | "
@@ -546,7 +552,6 @@ VLIB_CLI_COMMAND (bond_create_command, static) = {
"[hw-addr <mac-address>] [id <if-id>] [gso]",
.function = bond_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
bond_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -587,14 +592,12 @@ bond_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bond_delete__command, static) =
{
.path = "delete bond",
.short_help = "delete bond {<interface> | sw_if_index <sw_idx>}",
.function = bond_delete_command_fn,
};
-/* *INDENT-ON* */
void
bond_add_member (vlib_main_t * vm, bond_add_member_args_t * args)
@@ -632,7 +635,7 @@ bond_add_member (vlib_main_t * vm, bond_add_member_args_t * args)
clib_error_return (0, "bond interface cannot be added as member");
return;
}
- if (bif->gso && !(mif_hw->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO))
+ if (bif->gso && !(mif_hw->caps & VNET_HW_IF_CAP_TCP_GSO))
{
args->rv = VNET_API_ERROR_INVALID_INTERFACE;
args->error =
@@ -641,32 +644,29 @@ bond_add_member (vlib_main_t * vm, bond_add_member_args_t * args)
}
if (bif->mode == BOND_MODE_LACP)
{
- u8 *name = format (0, "/if/lacp/%u/%u/state%c", bif->sw_if_index,
- args->member, 0);
+ u32 actor_idx, partner_idx;
- vec_validate (bm->stats, bif->sw_if_index);
- vec_validate (bm->stats[bif->sw_if_index], args->member);
-
- args->error = stat_segment_register_state_counter
- (name, &bm->stats[bif->sw_if_index][args->member].actor_state);
- if (args->error != 0)
+ actor_idx = vlib_stats_add_gauge ("/if/lacp/%u/%u/state",
+ bif->sw_if_index, args->member);
+ if (actor_idx == ~0)
{
args->rv = VNET_API_ERROR_INVALID_INTERFACE;
- vec_free (name);
return;
}
- vec_reset_length (name);
- name = format (0, "/if/lacp/%u/%u/partner-state%c", bif->sw_if_index,
- args->member, 0);
- args->error = stat_segment_register_state_counter
- (name, &bm->stats[bif->sw_if_index][args->member].partner_state);
- vec_free (name);
- if (args->error != 0)
+ partner_idx = vlib_stats_add_gauge ("/if/lacp/%u/%u/partner-state",
+ bif->sw_if_index, args->member);
+ if (partner_idx == ~0)
{
+ vlib_stats_remove_entry (actor_idx);
args->rv = VNET_API_ERROR_INVALID_INTERFACE;
return;
}
+
+ vec_validate (bm->stats, bif->sw_if_index);
+ vec_validate (bm->stats[bif->sw_if_index], args->member);
+ bm->stats[bif->sw_if_index][args->member].actor_state = actor_idx;
+ bm->stats[bif->sw_if_index][args->member].partner_state = partner_idx;
}
pool_get (bm->neighbors, mif);
@@ -817,14 +817,12 @@ add_member_interface_command_fn (vlib_main_t * vm, unformat_input_t * input,
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (add_member_interface_command, static) = {
.path = "bond add",
.short_help = "bond add <BondEthernetx> <member-interface> "
"[passive] [long-timeout]",
.function = add_member_interface_command_fn,
};
-/* *INDENT-ON* */
void
bond_detach_member (vlib_main_t * vm, bond_detach_member_args_t * args)
@@ -881,13 +879,11 @@ detach_interface_command_fn (vlib_main_t * vm, unformat_input_t * input,
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (detach_interface_command, static) = {
.path = "bond del",
.short_help = "bond del <member-interface>",
.function = detach_interface_command_fn,
};
-/* *INDENT-ON* */
static void
show_bond (vlib_main_t * vm)
@@ -899,7 +895,6 @@ show_bond (vlib_main_t * vm)
"interface name", "sw_if_index", "mode",
"load balance", "active members", "members");
- /* *INDENT-OFF* */
pool_foreach (bif, bm->interfaces)
{
vlib_cli_output (vm, "%-16U %-12d %-13U %-13U %-14u %u",
@@ -908,7 +903,6 @@ show_bond (vlib_main_t * vm)
format_bond_load_balance, bif->lb,
vec_len (bif->active_members), vec_len (bif->members));
}
- /* *INDENT-ON* */
}
static void
@@ -918,7 +912,6 @@ show_bond_details (vlib_main_t * vm)
bond_if_t *bif;
u32 *sw_if_index;
- /* *INDENT-OFF* */
pool_foreach (bif, bm->interfaces)
{
vlib_cli_output (vm, "%U", format_bond_interface_name, bif->dev_instance);
@@ -957,7 +950,6 @@ show_bond_details (vlib_main_t * vm)
vlib_cli_output (vm, " sw_if_index: %d", bif->sw_if_index);
vlib_cli_output (vm, " hw_if_index: %d", bif->hw_if_index);
}
- /* *INDENT-ON* */
}
static clib_error_t *
@@ -985,13 +977,11 @@ show_bond_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_bond_command, static) = {
.path = "show bond",
.short_help = "show bond [details]",
.function = show_bond_fn,
};
-/* *INDENT-ON* */
void
bond_set_intf_weight (vlib_main_t * vm, bond_set_intf_weight_args_t * args)
@@ -1091,14 +1081,12 @@ bond_set_intf_cmd (vlib_main_t * vm, unformat_input_t * input,
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(set_interface_bond_cmd, static) = {
.path = "set interface bond",
.short_help = "set interface bond <interface> | sw_if_index <idx>"
" weight <value>",
.function = bond_set_intf_cmd,
};
-/* *INDENT-ON* */
clib_error_t *
bond_cli_init (vlib_main_t * vm)
diff --git a/src/vnet/bonding/device.c b/src/vnet/bonding/device.c
index 9e949b87214..a0b93fccde1 100644
--- a/src/vnet/bonding/device.c
+++ b/src/vnet/bonding/device.c
@@ -17,16 +17,9 @@
#define _GNU_SOURCE
#include <stdint.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/ip/ip4_packet.h>
-#include <vnet/ip/ip6_packet.h>
-#include <vnet/ip/ip6_hop_by_hop_packet.h>
-#include <vnet/bonding/node.h>
-#include <vppinfra/lb_hash_hash.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ip-neighbor/ip_neighbor.h>
#include <vnet/ip-neighbor/ip4_neighbor.h>
#include <vnet/ip-neighbor/ip6_neighbor.h>
+#include <vnet/bonding/node.h>
#define foreach_bond_tx_error \
_ (NONE, "no error") \
@@ -118,14 +111,6 @@ bond_set_l2_mode_function (vnet_main_t * vnm,
return 0;
}
-static __clib_unused clib_error_t *
-bond_subif_add_del_function (vnet_main_t * vnm, u32 hw_if_index,
- struct vnet_sw_interface_t *st, int is_add)
-{
- /* Nothing for now */
- return 0;
-}
-
static clib_error_t *
bond_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
{
@@ -194,8 +179,8 @@ bond_tx_add_to_queue (bond_per_thread_data_t * ptd, u32 port, u32 bi)
}
static_always_inline u32
-bond_lb_broadcast (vlib_main_t * vm,
- bond_if_t * bif, vlib_buffer_t * b0, uword n_members)
+bond_lb_broadcast (vlib_main_t *vm, bond_if_t *bif, vlib_buffer_t *b0,
+ uword n_members)
{
bond_main_t *bm = &bond_main;
vlib_buffer_t *c0;
@@ -220,230 +205,75 @@ bond_lb_broadcast (vlib_main_t * vm,
}
static_always_inline u32
-bond_lb_l2 (vlib_buffer_t * b0)
-{
- ethernet_header_t *eth = vlib_buffer_get_current (b0);
- u64 *dst = (u64 *) & eth->dst_address[0];
- u64 a = clib_mem_unaligned (dst, u64);
- u32 *src = (u32 *) & eth->src_address[2];
- u32 b = clib_mem_unaligned (src, u32);
-
- return lb_hash_hash_2_tuples (a, b);
-}
-
-static_always_inline u16 *
-bond_locate_ethertype (ethernet_header_t * eth)
+bond_lb_round_robin (bond_if_t *bif, vlib_buffer_t *b0, uword n_members)
{
- u16 *ethertype_p;
- ethernet_vlan_header_t *vlan;
+ bif->lb_rr_last_index++;
+ if (bif->lb_rr_last_index >= n_members)
+ bif->lb_rr_last_index = 0;
- if (!ethernet_frame_is_tagged (clib_net_to_host_u16 (eth->type)))
- {
- ethertype_p = &eth->type;
- }
- else
- {
- vlan = (void *) (eth + 1);
- ethertype_p = &vlan->type;
- if (*ethertype_p == ntohs (ETHERNET_TYPE_VLAN))
- {
- vlan++;
- ethertype_p = &vlan->type;
- }
- }
- return ethertype_p;
+ return bif->lb_rr_last_index;
}
-static_always_inline u32
-bond_lb_l23 (vlib_buffer_t * b0)
+static_always_inline void
+bond_tx_hash (vlib_main_t *vm, bond_per_thread_data_t *ptd, bond_if_t *bif,
+ vlib_buffer_t **b, u32 *h, u32 n_left)
{
- ethernet_header_t *eth = vlib_buffer_get_current (b0);
- u8 ip_version;
- ip4_header_t *ip4;
- u16 ethertype, *ethertype_p;
- u32 *mac1, *mac2, *mac3;
+ u32 n_left_from = n_left;
+ void **data;
- ethertype_p = bond_locate_ethertype (eth);
- ethertype = clib_mem_unaligned (ethertype_p, u16);
+ ASSERT (bif->hash_func != 0);
- if ((ethertype != htons (ETHERNET_TYPE_IP4)) &&
- (ethertype != htons (ETHERNET_TYPE_IP6)))
- return bond_lb_l2 (b0);
-
- ip4 = (ip4_header_t *) (ethertype_p + 1);
- ip_version = (ip4->ip_version_and_header_length >> 4);
-
- if (ip_version == 0x4)
- {
- u32 a, c;
-
- mac1 = (u32 *) & eth->dst_address[0];
- mac2 = (u32 *) & eth->dst_address[4];
- mac3 = (u32 *) & eth->src_address[2];
-
- a = clib_mem_unaligned (mac1, u32) ^ clib_mem_unaligned (mac2, u32) ^
- clib_mem_unaligned (mac3, u32);
- c =
- lb_hash_hash_2_tuples (clib_mem_unaligned (&ip4->address_pair, u64),
- a);
- return c;
- }
- else if (ip_version == 0x6)
+ vec_validate_aligned (ptd->data, n_left - 1, CLIB_CACHE_LINE_BYTES);
+ data = ptd->data;
+ while (n_left >= 8)
{
- u64 a;
- u32 c;
- ip6_header_t *ip6 = (ip6_header_t *) (eth + 1);
-
- mac1 = (u32 *) & eth->dst_address[0];
- mac2 = (u32 *) & eth->dst_address[4];
- mac3 = (u32 *) & eth->src_address[2];
-
- a = clib_mem_unaligned (mac1, u32) ^ clib_mem_unaligned (mac2, u32) ^
- clib_mem_unaligned (mac3, u32);
- c =
- lb_hash_hash (clib_mem_unaligned
- (&ip6->src_address.as_uword[0], uword),
- clib_mem_unaligned (&ip6->src_address.as_uword[1],
- uword),
- clib_mem_unaligned (&ip6->dst_address.as_uword[0],
- uword),
- clib_mem_unaligned (&ip6->dst_address.as_uword[1],
- uword), a);
- return c;
- }
- return bond_lb_l2 (b0);
-}
-
-static_always_inline u32
-bond_lb_l34 (vlib_buffer_t * b0)
-{
- ethernet_header_t *eth = vlib_buffer_get_current (b0);
- u8 ip_version;
- uword is_tcp_udp;
- ip4_header_t *ip4;
- u16 ethertype, *ethertype_p;
-
- ethertype_p = bond_locate_ethertype (eth);
- ethertype = clib_mem_unaligned (ethertype_p, u16);
-
- if ((ethertype != htons (ETHERNET_TYPE_IP4)) &&
- (ethertype != htons (ETHERNET_TYPE_IP6)))
- return (bond_lb_l2 (b0));
+ // Prefetch next iteration
+ vlib_prefetch_buffer_header (b[4], LOAD);
+ vlib_prefetch_buffer_header (b[5], LOAD);
+ vlib_prefetch_buffer_header (b[6], LOAD);
+ vlib_prefetch_buffer_header (b[7], LOAD);
- ip4 = (ip4_header_t *) (ethertype_p + 1);
- ip_version = (ip4->ip_version_and_header_length >> 4);
+ data[0] = vlib_buffer_get_current (b[0]);
+ data[1] = vlib_buffer_get_current (b[1]);
+ data[2] = vlib_buffer_get_current (b[2]);
+ data[3] = vlib_buffer_get_current (b[3]);
- if (ip_version == 0x4)
- {
- u32 a, t1, t2;
- tcp_header_t *tcp = (void *) (ip4 + 1);
-
- is_tcp_udp = (ip4->protocol == IP_PROTOCOL_TCP) ||
- (ip4->protocol == IP_PROTOCOL_UDP);
- t1 = is_tcp_udp ? clib_mem_unaligned (&tcp->src, u16) : 0;
- t2 = is_tcp_udp ? clib_mem_unaligned (&tcp->dst, u16) : 0;
- a = t1 ^ t2;
- return
- lb_hash_hash_2_tuples (clib_mem_unaligned (&ip4->address_pair, u64),
- a);
- }
- else if (ip_version == 0x6)
- {
- u64 a;
- u32 c, t1, t2;
- ip6_header_t *ip6 = (ip6_header_t *) (eth + 1);
- tcp_header_t *tcp = (void *) (ip6 + 1);
-
- is_tcp_udp = 0;
- if (PREDICT_TRUE ((ip6->protocol == IP_PROTOCOL_TCP) ||
- (ip6->protocol == IP_PROTOCOL_UDP)))
- {
- is_tcp_udp = 1;
- tcp = (void *) (ip6 + 1);
- }
- else if (ip6->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
- {
- ip6_hop_by_hop_header_t *hbh =
- (ip6_hop_by_hop_header_t *) (ip6 + 1);
- if ((hbh->protocol == IP_PROTOCOL_TCP)
- || (hbh->protocol == IP_PROTOCOL_UDP))
- {
- is_tcp_udp = 1;
- tcp = (tcp_header_t *) ((u8 *) hbh + ((hbh->length + 1) << 3));
- }
- }
- t1 = is_tcp_udp ? clib_mem_unaligned (&tcp->src, u16) : 0;
- t2 = is_tcp_udp ? clib_mem_unaligned (&tcp->dst, u16) : 0;
- a = t1 ^ t2;
- c =
- lb_hash_hash (clib_mem_unaligned
- (&ip6->src_address.as_uword[0], uword),
- clib_mem_unaligned (&ip6->src_address.as_uword[1],
- uword),
- clib_mem_unaligned (&ip6->dst_address.as_uword[0],
- uword),
- clib_mem_unaligned (&ip6->dst_address.as_uword[1],
- uword), a);
- return c;
+ n_left -= 4;
+ b += 4;
+ data += 4;
}
- return bond_lb_l2 (b0);
-}
+ while (n_left > 0)
+ {
+ data[0] = vlib_buffer_get_current (b[0]);
-static_always_inline u32
-bond_lb_round_robin (bond_if_t * bif, vlib_buffer_t * b0, uword n_members)
-{
- bif->lb_rr_last_index++;
- if (bif->lb_rr_last_index >= n_members)
- bif->lb_rr_last_index = 0;
+ n_left -= 1;
+ b += 1;
+ data += 1;
+ }
- return bif->lb_rr_last_index;
+ bif->hash_func (ptd->data, h, n_left_from);
+ vec_reset_length (ptd->data);
}
static_always_inline void
-bond_tx_inline (vlib_main_t * vm, bond_if_t * bif, vlib_buffer_t ** b,
- u32 * h, u32 n_left, uword n_members, u32 lb_alg)
+bond_tx_no_hash (vlib_main_t *vm, bond_if_t *bif, vlib_buffer_t **b, u32 *h,
+ u32 n_left, uword n_members, u32 lb_alg)
{
- while (n_left >= 4)
+ while (n_left >= 8)
{
// Prefetch next iteration
- if (n_left >= 8)
- {
- vlib_buffer_t **pb = b + 4;
-
- vlib_prefetch_buffer_header (pb[0], LOAD);
- vlib_prefetch_buffer_header (pb[1], LOAD);
- vlib_prefetch_buffer_header (pb[2], LOAD);
- vlib_prefetch_buffer_header (pb[3], LOAD);
+ vlib_prefetch_buffer_header (b[4], LOAD);
+ vlib_prefetch_buffer_header (b[5], LOAD);
+ vlib_prefetch_buffer_header (b[6], LOAD);
+ vlib_prefetch_buffer_header (b[7], LOAD);
- clib_prefetch_load (pb[0]->data);
- clib_prefetch_load (pb[1]->data);
- clib_prefetch_load (pb[2]->data);
- clib_prefetch_load (pb[3]->data);
- }
+ clib_prefetch_load (b[4]->data);
+ clib_prefetch_load (b[5]->data);
+ clib_prefetch_load (b[6]->data);
+ clib_prefetch_load (b[7]->data);
- if (lb_alg == BOND_LB_L2)
- {
- h[0] = bond_lb_l2 (b[0]);
- h[1] = bond_lb_l2 (b[1]);
- h[2] = bond_lb_l2 (b[2]);
- h[3] = bond_lb_l2 (b[3]);
- }
- else if (lb_alg == BOND_LB_L34)
- {
- h[0] = bond_lb_l34 (b[0]);
- h[1] = bond_lb_l34 (b[1]);
- h[2] = bond_lb_l34 (b[2]);
- h[3] = bond_lb_l34 (b[3]);
- }
- else if (lb_alg == BOND_LB_L23)
- {
- h[0] = bond_lb_l23 (b[0]);
- h[1] = bond_lb_l23 (b[1]);
- h[2] = bond_lb_l23 (b[2]);
- h[3] = bond_lb_l23 (b[3]);
- }
- else if (lb_alg == BOND_LB_RR)
+ if (lb_alg == BOND_LB_RR)
{
h[0] = bond_lb_round_robin (bif, b[0], n_members);
h[1] = bond_lb_round_robin (bif, b[1], n_members);
@@ -469,13 +299,7 @@ bond_tx_inline (vlib_main_t * vm, bond_if_t * bif, vlib_buffer_t ** b,
while (n_left > 0)
{
- if (bif->lb == BOND_LB_L2)
- h[0] = bond_lb_l2 (b[0]);
- else if (bif->lb == BOND_LB_L34)
- h[0] = bond_lb_l34 (b[0]);
- else if (bif->lb == BOND_LB_L23)
- h[0] = bond_lb_l23 (b[0]);
- else if (bif->lb == BOND_LB_RR)
+ if (bif->lb == BOND_LB_RR)
h[0] = bond_lb_round_robin (bif, b[0], n_members);
else if (bif->lb == BOND_LB_BC)
h[0] = bond_lb_broadcast (vm, bif, b[0], n_members);
@@ -496,40 +320,6 @@ bond_hash_to_port (u32 * h, u32 n_left, u32 n_members,
{
u32 mask = n_members - 1;
-#ifdef CLIB_HAVE_VEC256
- /* only lower 16 bits of hash due to single precision fp arithmetic */
- u32x8 mask8, sc8u, h8a, h8b;
- f32x8 sc8f;
-
- if (use_modulo_shortcut)
- {
- mask8 = u32x8_splat (mask);
- }
- else
- {
- mask8 = u32x8_splat (0xffff);
- sc8u = u32x8_splat (n_members);
- sc8f = f32x8_from_u32x8 (sc8u);
- }
-
- while (n_left > 16)
- {
- h8a = u32x8_load_unaligned (h) & mask8;
- h8b = u32x8_load_unaligned (h + 8) & mask8;
-
- if (use_modulo_shortcut == 0)
- {
- h8a -= sc8u * u32x8_from_f32x8 (f32x8_from_u32x8 (h8a) / sc8f);
- h8b -= sc8u * u32x8_from_f32x8 (f32x8_from_u32x8 (h8b) / sc8f);
- }
-
- u32x8_store_unaligned (h8a, h);
- u32x8_store_unaligned (h8b, h + 8);
- n_left -= 16;
- h += 16;
- }
-#endif
-
while (n_left > 4)
{
if (use_modulo_shortcut)
@@ -568,17 +358,13 @@ bond_update_sw_if_index (bond_per_thread_data_t * ptd, bond_if_t * bif,
u32 sw_if_index = data[0];
u32 *h = data;
- while (n_left >= 4)
+ while (n_left >= 8)
{
// Prefetch next iteration
- if (n_left >= 8)
- {
- vlib_buffer_t **pb = b + 4;
- vlib_prefetch_buffer_header (pb[0], LOAD);
- vlib_prefetch_buffer_header (pb[1], LOAD);
- vlib_prefetch_buffer_header (pb[2], LOAD);
- vlib_prefetch_buffer_header (pb[3], LOAD);
- }
+ vlib_prefetch_buffer_header (b[4], LOAD);
+ vlib_prefetch_buffer_header (b[5], LOAD);
+ vlib_prefetch_buffer_header (b[6], LOAD);
+ vlib_prefetch_buffer_header (b[7], LOAD);
if (PREDICT_FALSE (single_sw_if_index))
{
@@ -594,17 +380,14 @@ bond_update_sw_if_index (bond_per_thread_data_t * ptd, bond_if_t * bif,
}
else
{
- u32 sw_if_index[4];
-
- sw_if_index[0] = *vec_elt_at_index (bif->active_members, h[0]);
- sw_if_index[1] = *vec_elt_at_index (bif->active_members, h[1]);
- sw_if_index[2] = *vec_elt_at_index (bif->active_members, h[2]);
- sw_if_index[3] = *vec_elt_at_index (bif->active_members, h[3]);
-
- vnet_buffer (b[0])->sw_if_index[VLIB_TX] = sw_if_index[0];
- vnet_buffer (b[1])->sw_if_index[VLIB_TX] = sw_if_index[1];
- vnet_buffer (b[2])->sw_if_index[VLIB_TX] = sw_if_index[2];
- vnet_buffer (b[3])->sw_if_index[VLIB_TX] = sw_if_index[3];
+ vnet_buffer (b[0])->sw_if_index[VLIB_TX] =
+ *vec_elt_at_index (bif->active_members, h[0]);
+ vnet_buffer (b[1])->sw_if_index[VLIB_TX] =
+ *vec_elt_at_index (bif->active_members, h[1]);
+ vnet_buffer (b[2])->sw_if_index[VLIB_TX] =
+ *vec_elt_at_index (bif->active_members, h[2]);
+ vnet_buffer (b[3])->sw_if_index[VLIB_TX] =
+ *vec_elt_at_index (bif->active_members, h[3]);
bond_tx_add_to_queue (ptd, h[0], bi[0]);
bond_tx_add_to_queue (ptd, h[1], bi[1]);
@@ -626,9 +409,8 @@ bond_update_sw_if_index (bond_per_thread_data_t * ptd, bond_if_t * bif,
}
else
{
- u32 sw_if_index0 = *vec_elt_at_index (bif->active_members, h[0]);
-
- vnet_buffer (b[0])->sw_if_index[VLIB_TX] = sw_if_index0;
+ vnet_buffer (b[0])->sw_if_index[VLIB_TX] =
+ *vec_elt_at_index (bif->active_members, h[0]);
bond_tx_add_to_queue (ptd, h[0], bi[0]);
}
@@ -735,7 +517,7 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
{
sw_if_index = *vec_elt_at_index (bif->active_members, 0);
- bond_tx_inline (vm, bif, bufs, hashes, n_left, n_members, BOND_LB_BC);
+ bond_tx_no_hash (vm, bif, bufs, hashes, n_left, n_members, BOND_LB_BC);
bond_tx_trace (vm, node, bif, bufs, frame->n_vectors, 0);
bond_update_sw_if_index (ptd, bif, from, bufs, &sw_if_index, n_left,
/* single_sw_if_index */ 1);
@@ -747,24 +529,10 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
if (bif->n_numa_members >= 1)
n_members = bif->n_numa_members;
- if (bif->lb == BOND_LB_L2)
- bond_tx_inline (vm, bif, bufs, hashes, n_left, n_members, BOND_LB_L2);
- else if (bif->lb == BOND_LB_L34)
- bond_tx_inline (vm, bif, bufs, hashes, n_left, n_members, BOND_LB_L34);
- else if (bif->lb == BOND_LB_L23)
- bond_tx_inline (vm, bif, bufs, hashes, n_left, n_members, BOND_LB_L23);
- else if (bif->lb == BOND_LB_RR)
- bond_tx_inline (vm, bif, bufs, hashes, n_left, n_members, BOND_LB_RR);
+ if (bif->lb == BOND_LB_RR)
+ bond_tx_no_hash (vm, bif, bufs, hashes, n_left, n_members, BOND_LB_RR);
else
- {
- vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
- vlib_increment_simple_counter (
- vnet_main.interface_main.sw_if_counters + VNET_INTERFACE_COUNTER_DROP,
- thread_index, bif->sw_if_index, frame->n_vectors);
- vlib_error_count (vm, node->node_index, BOND_TX_ERROR_BAD_LB_MODE,
- frame->n_vectors);
- return frame->n_vectors;
- }
+ bond_tx_hash (vm, ptd, bif, bufs, hashes, n_left);
/* calculate port out of hash */
h = hashes;
@@ -805,8 +573,10 @@ bond_active_interface_switch_cb (vnet_main_t * vnm, u32 sw_if_index,
{
bond_main_t *bm = &bond_main;
- ip4_neighbor_advertise (bm->vlib_main, bm->vnet_main, sw_if_index, NULL);
- ip6_neighbor_advertise (bm->vlib_main, bm->vnet_main, sw_if_index, NULL);
+ ip4_neighbor_advertise (bm->vlib_main, bm->vnet_main, sw_if_index,
+ vlib_get_thread_index (), NULL);
+ ip6_neighbor_advertise (bm->vlib_main, bm->vnet_main, sw_if_index,
+ vlib_get_thread_index (), NULL);
return (WALK_CONTINUE);
}
@@ -838,16 +608,13 @@ bond_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (bond_process_node) = {
.function = bond_process,
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "bond-process",
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (bond_dev_class) = {
.name = "bond",
.tx_function_n_errors = BOND_TX_N_ERROR,
@@ -855,12 +622,10 @@ VNET_DEVICE_CLASS (bond_dev_class) = {
.format_device_name = format_bond_interface_name,
.set_l2_mode_function = bond_set_l2_mode_function,
.admin_up_down_function = bond_interface_admin_up_down,
- .subif_add_del_function = bond_subif_add_del_function,
.format_tx_trace = format_bond_tx_trace,
.mac_addr_add_del_function = bond_add_del_mac_address,
};
-/* *INDENT-ON* */
static clib_error_t *
bond_member_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
diff --git a/src/vnet/bonding/node.c b/src/vnet/bonding/node.c
index 21a968177fe..66de1e4dd80 100644
--- a/src/vnet/bonding/node.c
+++ b/src/vnet/bonding/node.c
@@ -397,7 +397,6 @@ bond_input_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (bond_input_node) = {
.name = "bond-input",
.vector_size = sizeof (u32),
@@ -421,7 +420,6 @@ VNET_FEATURE_INIT (bond_input, static) =
.node_name = "bond-input",
.runs_before = VNET_FEATURES ("ethernet-input"),
};
-/* *INDENT-ON* */
static clib_error_t *
bond_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
diff --git a/src/vnet/bonding/node.h b/src/vnet/bonding/node.h
index 843c236f123..c6602ef01b9 100644
--- a/src/vnet/bonding/node.h
+++ b/src/vnet/bonding/node.h
@@ -21,6 +21,7 @@
#include <vppinfra/hash.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/interface.h>
+#include <vnet/hash/hash.h>
#define LACP_FAST_PERIODIC_TIMER 1.0
#define LACP_SHORT_TIMOUT_TIME (LACP_FAST_PERIODIC_TIMER * 3)
@@ -163,6 +164,7 @@ typedef struct
typedef struct
{
bond_per_port_queue_t *per_port_queue;
+ void **data;
} bond_per_thread_data_t;
typedef struct
@@ -208,6 +210,7 @@ typedef struct
u8 hw_address[6];
clib_spinlock_t lockp;
+ vnet_hash_fn_t hash_func;
} bond_if_t;
typedef struct
diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h
index 2b3152fbcd6..2f34aa4b5fc 100644
--- a/src/vnet/buffer.h
+++ b/src/vnet/buffer.h
@@ -244,7 +244,8 @@ typedef struct
u8 save_rewrite_length;
u8 ip_proto; /* protocol in ip header */
u8 icmp_type_or_tcp_flags;
- u8 is_non_first_fragment;
+ u8 is_non_first_fragment : 1;
+ u8 l4_layer_truncated : 7;
u32 tcp_seq_number;
};
/* full reassembly output variables */
@@ -319,13 +320,13 @@ typedef struct
/* L2 classify */
struct
{
- struct opaque_l2 pad;
+ u32 pad[4]; /* do not overlay w/ ip.fib_index nor l2 */
union
{
u32 table_index;
u32 opaque_index;
};
- u64 hash;
+ u32 hash;
} l2_classify;
/* vnet policer */
@@ -417,7 +418,9 @@ typedef struct
};
} vnet_buffer_opaque_t;
-#define VNET_REWRITE_TOTAL_BYTES (VLIB_BUFFER_PRE_DATA_SIZE)
+#define VNET_REWRITE_TOTAL_BYTES 128
+STATIC_ASSERT (VNET_REWRITE_TOTAL_BYTES <= VLIB_BUFFER_PRE_DATA_SIZE,
+ "VNET_REWRITE_TOTAL_BYTES too big");
STATIC_ASSERT (STRUCT_SIZE_OF (vnet_buffer_opaque_t, ip.save_rewrite_length)
== STRUCT_SIZE_OF (vnet_buffer_opaque_t,
@@ -464,15 +467,7 @@ typedef struct
} qos;
u8 loop_counter;
- u8 __unused[1];
-
- /* Group Based Policy */
- struct
- {
- u8 __unused;
- u8 flags;
- u16 sclass;
- } gbp;
+ u8 pad[5]; /* unused */
/**
* The L4 payload size set on input on GSO enabled interfaces
@@ -500,15 +495,7 @@ typedef struct
};
} nat;
- union
- {
- struct
- {
- u64 pad[1];
- u64 pg_replay_timestamp;
- };
- u32 unused[8];
- };
+ u32 unused[8];
} vnet_buffer_opaque2_t;
#define vnet_buffer2(b) ((vnet_buffer_opaque2_t *) (b)->opaque2)
@@ -517,8 +504,8 @@ typedef struct
* The opaque2 field of the vlib_buffer_t is interpreted as a
* vnet_buffer_opaque2_t. Hence it should be big enough to accommodate one.
*/
-STATIC_ASSERT (sizeof (vnet_buffer_opaque2_t) <=
- STRUCT_SIZE_OF (vlib_buffer_t, opaque2),
+STATIC_ASSERT (sizeof (vnet_buffer_opaque2_t) ==
+ STRUCT_SIZE_OF (vlib_buffer_t, opaque2),
"VNET buffer opaque2 meta-data too large for vlib_buffer");
#define gso_mtu_sz(b) (vnet_buffer2(b)->gso_size + \
diff --git a/src/vnet/classify/classify.api b/src/vnet/classify/classify.api
index d1d7340302f..00963f6fb6a 100644
--- a/src/vnet/classify/classify.api
+++ b/src/vnet/classify/classify.api
@@ -436,6 +436,30 @@ autoreply define punt_acl_add_del
bool is_add [default=true];
};
+/** \brief Get classify table ids configured for punt ACL
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define punt_acl_get
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply for punt_acl_get
+ @param context - sender context which was passed in the request
+ @param retval - return value (0 for success)
+ @param ip4_table_index - ip4 punt classify table index (~0 for none)
+ @param ip6_table_index - ip6 punt classify table index (~0 for none)
+*/
+define punt_acl_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 ip4_table_index;
+ u32 ip6_table_index;
+};
+
/** \brief Set/unset output ACL interface
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/vnet/classify/classify_api.c b/src/vnet/classify/classify_api.c
index 3e8dc511479..fc57b006d37 100644
--- a/src/vnet/classify/classify_api.c
+++ b/src/vnet/classify/classify_api.c
@@ -115,9 +115,8 @@ static void vl_api_classify_pcap_set_table_t_handler
u32 table_index = ntohl (mp->table_index);
u32 sw_if_index = ntohl (mp->sw_if_index);
- if (sw_if_index == ~0
- || sw_if_index >= vec_len (cm->classify_table_index_by_sw_if_index)
- || (table_index != ~0 && pool_is_free_index (cm->tables, table_index)))
+ if (sw_if_index == ~0 ||
+ (table_index != ~0 && pool_is_free_index (cm->tables, table_index)))
{
rv = VNET_API_ERROR_INVALID_VALUE;
goto out;
@@ -380,7 +379,6 @@ static void vl_api_classify_add_del_table_t_handler
current_data_flag, current_data_offset, mp->is_add, mp->del_chain);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_CLASSIFY_ADD_DEL_TABLE_REPLY,
({
if (rv == 0 && mp->is_add)
@@ -397,7 +395,6 @@ out:
rmp->new_table_index = ~0;
}
}));
- /* *INDENT-ON* */
}
static void vl_api_classify_add_del_session_t_handler
@@ -534,12 +531,10 @@ vl_api_classify_table_ids_t_handler (vl_api_classify_table_ids_t * mp)
u32 *table_ids = 0;
u32 count;
- /* *INDENT-OFF* */
pool_foreach (t, cm->tables)
{
vec_add1 (table_ids, ntohl(t - cm->tables));
}
- /* *INDENT-ON* */
count = vec_len (table_ids);
vl_api_classify_table_ids_reply_t *rmp;
@@ -596,7 +591,6 @@ static void
BAD_SW_IF_INDEX_LABEL;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_CLASSIFY_TABLE_BY_INTERFACE_REPLY,
({
rmp->sw_if_index = ntohl(sw_if_index);
@@ -604,7 +598,6 @@ static void
rmp->ip4_table_id = ntohl(acl[IN_OUT_ACL_TABLE_IP4]);
rmp->ip6_table_id = ntohl(acl[IN_OUT_ACL_TABLE_IP6]);
}));
- /* *INDENT-ON* */
vec_free (acl);
}
@@ -667,7 +660,7 @@ send_classify_session_details (vl_api_registration_t * reg,
{
vl_api_classify_session_details_t *rmp;
- rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp = vl_msg_api_alloc (sizeof (*rmp) + match_length);
clib_memset (rmp, 0, sizeof (*rmp));
rmp->_vl_msg_id =
ntohs (REPLY_MSG_ID_BASE + VL_API_CLASSIFY_SESSION_DETAILS);
@@ -695,7 +688,6 @@ vl_api_classify_session_dump_t_handler (vl_api_classify_session_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (t, cm->tables)
{
if (table_id == t - cm->tables)
@@ -729,7 +721,6 @@ vl_api_classify_session_dump_t_handler (vl_api_classify_session_dump_t * mp)
break;
}
}
- /* *INDENT-ON* */
}
static void
@@ -912,6 +903,27 @@ vl_api_punt_acl_add_del_t_handler (vl_api_punt_acl_add_del_t *mp)
REPLY_MACRO (VL_API_PUNT_ACL_ADD_DEL_REPLY);
}
+static void
+vl_api_punt_acl_get_t_handler (vl_api_punt_acl_get_t *mp)
+{
+ vl_api_punt_acl_get_reply_t *rmp;
+ int rv = 0;
+
+ const in_out_acl_main_t *am = &in_out_acl_main;
+
+ u32 *const *tables =
+ am->classify_table_index_by_sw_if_index[IN_OUT_ACL_INPUT_TABLE_GROUP];
+ const u32 *ip4_table = tables[IN_OUT_ACL_TABLE_IP4_PUNT];
+ const u32 *ip6_table = tables[IN_OUT_ACL_TABLE_IP6_PUNT];
+ const u32 ip4_table_index = vec_len (ip4_table) ? ip4_table[0] : ~0;
+ const u32 ip6_table_index = vec_len (ip6_table) ? ip6_table[0] : ~0;
+
+ REPLY_MACRO2 (VL_API_PUNT_ACL_GET_REPLY, ({
+ rmp->ip4_table_index = ntohl (ip4_table_index);
+ rmp->ip6_table_index = ntohl (ip6_table_index);
+ }));
+}
+
static void vl_api_output_acl_set_interface_t_handler
(vl_api_output_acl_set_interface_t * mp)
{
@@ -945,9 +957,10 @@ classify_api_hookup (vlib_main_t * vm)
/*
* Trace space for classifier mask+match
*/
- am->api_trace_cfg[VL_API_CLASSIFY_ADD_DEL_TABLE].size += 5 * sizeof (u32x4);
- am->api_trace_cfg[VL_API_CLASSIFY_ADD_DEL_SESSION].size +=
- 5 * sizeof (u32x4);
+ vl_api_increase_msg_trace_size (am, VL_API_CLASSIFY_ADD_DEL_TABLE,
+ 5 * sizeof (u32x4));
+ vl_api_increase_msg_trace_size (am, VL_API_CLASSIFY_ADD_DEL_SESSION,
+ 5 * sizeof (u32x4));
/*
* Set up the (msg_name, crc, message-id) table
diff --git a/src/vnet/classify/flow_classify.c b/src/vnet/classify/flow_classify.c
index afdadc66235..7197558a77a 100644
--- a/src/vnet/classify/flow_classify.c
+++ b/src/vnet/classify/flow_classify.c
@@ -150,7 +150,6 @@ set_flow_classify_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_input_acl_command, static) = {
.path = "set flow classify",
.short_help =
@@ -158,7 +157,6 @@ VLIB_CLI_COMMAND (set_input_acl_command, static) = {
" [ip6-table <index>] [del]",
.function = set_flow_classify_command_fn,
};
-/* *INDENT-ON* */
static uword
unformat_table_type (unformat_input_t * input, va_list * va)
@@ -215,13 +213,11 @@ show_flow_classify_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_flow_classify_command, static) = {
.path = "show classify flow",
.short_help = "show classify flow type [ip4|ip6]",
.function = show_flow_classify_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/classify/flow_classify_node.c b/src/vnet/classify/flow_classify_node.c
index 4989bf0a012..a34bab6190b 100644
--- a/src/vnet/classify/flow_classify_node.c
+++ b/src/vnet/classify/flow_classify_node.c
@@ -184,7 +184,7 @@ flow_classify_inline (vlib_main_t * vm,
u32 table_index0;
vnet_classify_table_t *t0;
vnet_classify_entry_t *e0;
- u64 hash0;
+ u32 hash0;
u8 *h0;
/* Stride 3 seems to work best */
@@ -193,7 +193,7 @@ flow_classify_inline (vlib_main_t * vm,
vlib_buffer_t *p1 = vlib_get_buffer (vm, from[3]);
vnet_classify_table_t *tp1;
u32 table_index1;
- u64 phash1;
+ u32 phash1;
table_index1 = vnet_buffer (p1)->l2_classify.table_index;
@@ -279,7 +279,6 @@ VLIB_NODE_FN (ip4_flow_classify_node) (vlib_main_t * vm,
return flow_classify_inline (vm, node, frame, FLOW_CLASSIFY_TABLE_IP4);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_flow_classify_node) = {
.name = "ip4-flow-classify",
.vector_size = sizeof (u32),
@@ -291,7 +290,6 @@ VLIB_REGISTER_NODE (ip4_flow_classify_node) = {
[FLOW_CLASSIFY_NEXT_INDEX_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip6_flow_classify_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -300,7 +298,6 @@ VLIB_NODE_FN (ip6_flow_classify_node) (vlib_main_t * vm,
return flow_classify_inline (vm, node, frame, FLOW_CLASSIFY_TABLE_IP6);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_flow_classify_node) = {
.name = "ip6-flow-classify",
.vector_size = sizeof (u32),
@@ -313,7 +310,6 @@ VLIB_REGISTER_NODE (ip6_flow_classify_node) = {
},
};
-/* *INDENT-ON* */
static clib_error_t *
diff --git a/src/vnet/classify/in_out_acl.c b/src/vnet/classify/in_out_acl.c
index 752305e1cc2..af765139332 100644
--- a/src/vnet/classify/in_out_acl.c
+++ b/src/vnet/classify/in_out_acl.c
@@ -255,7 +255,6 @@ set_output_acl_command_fn (vlib_main_t * vm,
* Note: Only one table index per API call is allowed.
*
*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_input_acl_command, static) = {
.path = "set interface input acl",
.short_help =
@@ -271,7 +270,6 @@ VLIB_CLI_COMMAND (set_output_acl_command, static) = {
" [ip6-table <index>] [l2-table <index>] [del]",
.function = set_output_acl_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
in_out_acl_init (vlib_main_t * vm)
@@ -284,12 +282,10 @@ in_out_acl_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (in_out_acl_init) =
{
.runs_after = VLIB_INITS("ip_in_out_acl_init"),
};
-/* *INDENT-ON* */
uword
unformat_acl_type (unformat_input_t * input, va_list * args)
@@ -392,7 +388,6 @@ show_outacl_command_fn (vlib_main_t * vm,
IN_OUT_ACL_OUTPUT_TABLE_GROUP);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_inacl_command, static) = {
.path = "show inacl",
.short_help = "show inacl type [ip4|ip6|l2]",
@@ -403,7 +398,6 @@ VLIB_CLI_COMMAND (show_outacl_command, static) = {
.short_help = "show outacl type [ip4|ip6|l2]",
.function = show_outacl_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/classify/ip_classify.c b/src/vnet/classify/ip_classify.c
index a5c044521bf..e8562c6912c 100644
--- a/src/vnet/classify/ip_classify.c
+++ b/src/vnet/classify/ip_classify.c
@@ -190,7 +190,7 @@ ip_classify_inline (vlib_main_t * vm,
u32 table_index0;
vnet_classify_table_t *t0;
vnet_classify_entry_t *e0;
- u64 hash0;
+ u32 hash0;
u8 *h0;
/* Stride 3 seems to work best */
@@ -199,7 +199,7 @@ ip_classify_inline (vlib_main_t * vm,
vlib_buffer_t *p1 = vlib_get_buffer (vm, from[3]);
vnet_classify_table_t *tp1;
u32 table_index1;
- u64 phash1;
+ u32 phash1;
table_index1 = vnet_buffer (p1)->l2_classify.table_index;
@@ -309,7 +309,6 @@ VLIB_NODE_FN (ip4_classify_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_classify_node) = {
.name = "ip4-classify",
.vector_size = sizeof (u32),
@@ -320,7 +319,6 @@ VLIB_REGISTER_NODE (ip4_classify_node) = {
.n_next_nodes = 0,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip6_classify_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -330,7 +328,6 @@ VLIB_NODE_FN (ip6_classify_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_classify_node) = {
.name = "ip6-classify",
.vector_size = sizeof (u32),
@@ -341,7 +338,6 @@ VLIB_REGISTER_NODE (ip6_classify_node) = {
.n_next_nodes = 0,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
static clib_error_t *
diff --git a/src/vnet/classify/pcap_classify.h b/src/vnet/classify/pcap_classify.h
index e079816f62c..a4ebcd1241c 100644
--- a/src/vnet/classify/pcap_classify.h
+++ b/src/vnet/classify/pcap_classify.h
@@ -47,11 +47,11 @@ vnet_is_packet_pcaped (vnet_pcap_t *pp, vlib_buffer_t *b, u32 sw_if_index)
return 0; /* wrong error */
if (filter_classify_table_index != ~0 &&
- vnet_is_packet_traced_inline (b, filter_classify_table_index,
- 0 /* full classify */) != 1)
+ pp->current_filter_function (b, filter_classify_table_index,
+ 0 /* full classify */) != 1)
return 0; /* not matching the filter, skip */
- return 1; /* success */
+ return 1;
}
/*
diff --git a/src/vnet/classify/policer_classify.c b/src/vnet/classify/policer_classify.c
index 4cf12a24e9e..814adefc987 100644
--- a/src/vnet/classify/policer_classify.c
+++ b/src/vnet/classify/policer_classify.c
@@ -164,7 +164,6 @@ set_policer_classify_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_policer_classify_command, static) = {
.path = "set policer classify",
.short_help =
@@ -172,7 +171,6 @@ VLIB_CLI_COMMAND (set_policer_classify_command, static) = {
" [ip6-table <index>] [l2-table <index>] [del]",
.function = set_policer_classify_command_fn,
};
-/* *INDENT-ON* */
static uword
unformat_table_type (unformat_input_t * input, va_list * va)
@@ -231,13 +229,11 @@ show_policer_classify_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_policer_classify_command, static) = {
.path = "show classify policer",
.short_help = "show classify policer type [ip4|ip6|l2]",
.function = show_policer_classify_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/classify/trace_classify.h b/src/vnet/classify/trace_classify.h
index bc25ecd0ff7..03421210d03 100644
--- a/src/vnet/classify/trace_classify.h
+++ b/src/vnet/classify/trace_classify.h
@@ -29,6 +29,8 @@
* @param u32 classify_table_index - classifier table index
* @return 0 => no trace, 1 => trace, -1 => error
*/
+int vnet_is_packet_traced (vlib_buffer_t *b, u32 classify_table_index,
+ int func);
static inline int
vnet_is_packet_traced_inline (vlib_buffer_t * b,
@@ -43,6 +45,9 @@ vnet_is_packet_traced_inline (vlib_buffer_t * b,
if (func != 0)
return -1;
+ if (classify_table_index == ~0)
+ return -1;
+
/* This will happen... */
if (pool_is_free_index (vcm->tables, classify_table_index))
return -1;
diff --git a/src/vnet/classify/vnet_classify.c b/src/vnet/classify/vnet_classify.c
index 4fb4f336582..77c1c81f9c4 100644
--- a/src/vnet/classify/vnet_classify.c
+++ b/src/vnet/classify/vnet_classify.c
@@ -293,7 +293,7 @@ split_and_rehash (vnet_classify_table_t * t,
for (i = 0; i < length_in_entries; i++)
{
- u64 new_hash;
+ u32 new_hash;
v = vnet_classify_entry_at_index (t, old_values, i);
@@ -424,7 +424,7 @@ vnet_classify_add_del (vnet_classify_table_t *t, vnet_classify_entry_t *add_v,
u32 value_index;
int rv = 0;
int i;
- u64 hash, new_hash;
+ u32 hash, new_hash;
u32 limit;
u32 old_log2_pages, new_log2_pages;
u32 thread_index = vlib_get_thread_index ();
@@ -640,28 +640,26 @@ unlock:
return rv;
}
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct {
ethernet_header_t eh;
ip4_header_t ip;
}) classify_data_or_mask_t;
-/* *INDENT-ON* */
-u64
-vnet_classify_hash_packet (vnet_classify_table_t * t, u8 * h)
+u32
+vnet_classify_hash_packet (const vnet_classify_table_t *t, u8 *h)
{
return vnet_classify_hash_packet_inline (t, h);
}
vnet_classify_entry_t *
-vnet_classify_find_entry (vnet_classify_table_t * t,
- u8 * h, u64 hash, f64 now)
+vnet_classify_find_entry (const vnet_classify_table_t *t, u8 *h, u32 hash,
+ f64 now)
{
return vnet_classify_find_entry_inline (t, h, hash, now);
}
-static u8 *
-format_classify_entry (u8 * s, va_list * args)
+u8 *
+format_classify_entry (u8 *s, va_list *args)
{
vnet_classify_table_t *t = va_arg (*args, vnet_classify_table_t *);
vnet_classify_entry_t *e = va_arg (*args, vnet_classify_entry_t *);
@@ -777,8 +775,10 @@ vnet_classify_add_del_table (vnet_classify_main_t *cm, const u8 *mask,
else /* update */
{
vnet_classify_main_t *cm = &vnet_classify_main;
- t = pool_elt_at_index (cm->tables, *table_index);
+ if (pool_is_free_index (cm->tables, *table_index))
+ return VNET_API_ERROR_CLASSIFY_TABLE_NOT_FOUND;
+ t = pool_elt_at_index (cm->tables, *table_index);
t->next_table_index = next_table_index;
}
return 0;
@@ -1233,12 +1233,16 @@ unformat_classify_mask (unformat_input_t * input, va_list * args)
u8 *l2 = 0;
u8 *l3 = 0;
u8 *l4 = 0;
+ u8 add_l2 = 1;
int i;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
if (unformat (input, "hex %U", unformat_hex_string, &mask))
;
+ else if (unformat (input, "l2 none"))
+ /* Don't add the l2 header in the mask */
+ add_l2 = 0;
else if (unformat (input, "l2 %U", unformat_l2_mask, &l2))
;
else if (unformat (input, "l3 %U", unformat_l3_mask, &l3))
@@ -1249,6 +1253,15 @@ unformat_classify_mask (unformat_input_t * input, va_list * args)
break;
}
+ if (l2 && !add_l2)
+ {
+ vec_free (mask);
+ vec_free (l2);
+ vec_free (l3);
+ vec_free (l4);
+ return 0;
+ }
+
if (l4 && !l3)
{
vec_free (mask);
@@ -1261,15 +1274,20 @@ unformat_classify_mask (unformat_input_t * input, va_list * args)
{
if (l2 || l3 || l4)
{
- /* "With a free Ethernet header in every package" */
- if (l2 == 0)
- vec_validate (l2, 13);
- mask = l2;
- if (l3)
+ if (add_l2)
{
- vec_append (mask, l3);
- vec_free (l3);
+ /* "With a free Ethernet header in every package" */
+ if (l2 == 0)
+ vec_validate (l2, 13);
+ mask = l2;
+ if (l3)
+ {
+ vec_append (mask, l3);
+ vec_free (l3);
+ }
}
+ else
+ mask = l3;
if (l4)
{
vec_append (mask, l4);
@@ -1302,7 +1320,7 @@ unformat_classify_mask (unformat_input_t * input, va_list * args)
if (match == 0)
clib_warning ("BUG: match 0");
- _vec_len (mask) = match * sizeof (u32x4);
+ vec_set_len (mask, match * sizeof (u32x4));
*matchp = match;
*maskp = mask;
@@ -1313,12 +1331,11 @@ unformat_classify_mask (unformat_input_t * input, va_list * args)
return 0;
}
-#define foreach_l2_input_next \
-_(drop, DROP) \
-_(ethernet, ETHERNET_INPUT) \
-_(ip4, IP4_INPUT) \
-_(ip6, IP6_INPUT) \
-_(li, LI)
+#define foreach_l2_input_next \
+ _ (drop, DROP) \
+ _ (ethernet, ETHERNET_INPUT) \
+ _ (ip4, IP4_INPUT) \
+ _ (ip6, IP6_INPUT)
uword
unformat_l2_input_next_index (unformat_input_t * input, va_list * args)
@@ -1618,7 +1635,6 @@ classify_table_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (classify_table, static) =
{
.path = "classify table",
@@ -1630,7 +1646,6 @@ VLIB_CLI_COMMAND (classify_table, static) =
"\n [del] [del-chain]",
.function = classify_table_command_fn,
};
-/* *INDENT-ON* */
static int
filter_table_mask_compare (void *a1, void *a2)
@@ -2034,7 +2049,7 @@ vlib_enable_disable_pkt_trace_filter (int enable)
/*?
* Construct an arbitrary set of packet classifier tables for use with
- * "pcap rx | tx trace," and with the vpp packet tracer
+ * "pcap trace rx | tx," and with the vpp packet tracer
*
* Packets which match a rule in the classifier table chain
* will be traced. The tables are automatically ordered so that
@@ -2077,10 +2092,10 @@ vlib_enable_disable_pkt_trace_filter (int enable)
* @cliexpar
* Configuring the classify filter
*
- * Configure a simple classify filter, and configure pcap rx trace to use it:
+ * Configure a simple classify filter, and configure pcap trace rx to use it:
*
* @cliexcmd{classify filter rx mask l3 ip4 src match l3 ip4 src 192.168.1.11}
- * <b><em>pcap rx trace on max 100 filter</em></b>
+ * <b><em>pcap trace rx max 100 filter</em></b>
*
* Configure another fairly simple filter
*
@@ -2106,7 +2121,6 @@ vlib_enable_disable_pkt_trace_filter (int enable)
* The verbose form displays all of the match rules, with hit-counters
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (classify_filter, static) =
{
.path = "classify filter",
@@ -2116,7 +2130,6 @@ VLIB_CLI_COMMAND (classify_filter, static) =
" [buckets <nn>] [memory-size <n>]",
.function = classify_filter_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_classify_filter_command_fn (vlib_main_t * vm,
@@ -2196,14 +2209,12 @@ show_classify_filter_command_fn (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_classify_filter, static) =
{
.path = "show classify filter",
.short_help = "show classify filter [verbose [nn]]",
.function = show_classify_filter_command_fn,
};
-/* *INDENT-ON* */
u8 *
format_vnet_classify_table (u8 *s, va_list *args)
@@ -2266,13 +2277,11 @@ show_classify_tables_command_fn (vlib_main_t * vm,
break;
}
- /* *INDENT-OFF* */
pool_foreach (t, cm->tables)
{
if (match_index == ~0 || (match_index == t - cm->tables))
vec_add1 (indices, t - cm->tables);
}
- /* *INDENT-ON* */
if (vec_len (indices))
{
@@ -2292,13 +2301,11 @@ show_classify_tables_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_classify_table_command, static) = {
.path = "show classify tables",
.short_help = "show classify tables [index <nn>]",
.function = show_classify_tables_command_fn,
};
-/* *INDENT-ON* */
uword
unformat_l4_match (unformat_input_t * input, va_list * args)
@@ -2679,6 +2686,7 @@ unformat_classify_match (unformat_input_t * input, va_list * args)
u8 *l2 = 0;
u8 *l3 = 0;
u8 *l4 = 0;
+ u8 add_l2 = 1;
if (pool_is_free_index (cm->tables, table_index))
return 0;
@@ -2689,6 +2697,9 @@ unformat_classify_match (unformat_input_t * input, va_list * args)
{
if (unformat (input, "hex %U", unformat_hex_string, &match))
;
+ else if (unformat (input, "l2 none"))
+ /* Don't add the l2 header in the mask */
+ add_l2 = 0;
else if (unformat (input, "l2 %U", unformat_l2_match, &l2))
;
else if (unformat (input, "l3 %U", unformat_l3_match, &l3))
@@ -2699,6 +2710,15 @@ unformat_classify_match (unformat_input_t * input, va_list * args)
break;
}
+ if (l2 && !add_l2)
+ {
+ vec_free (match);
+ vec_free (l2);
+ vec_free (l3);
+ vec_free (l4);
+ return 0;
+ }
+
if (l4 && !l3)
{
vec_free (match);
@@ -2711,15 +2731,20 @@ unformat_classify_match (unformat_input_t * input, va_list * args)
{
if (l2 || l3 || l4)
{
- /* "Win a free Ethernet header in every packet" */
- if (l2 == 0)
- vec_validate_aligned (l2, 13, sizeof (u32x4));
- match = l2;
- if (l3)
+ if (add_l2)
{
- vec_append_aligned (match, l3, sizeof (u32x4));
- vec_free (l3);
+ /* "Win a free Ethernet header in every packet" */
+ if (l2 == 0)
+ vec_validate_aligned (l2, 13, sizeof (u32x4));
+ match = l2;
+ if (l3)
+ {
+ vec_append_aligned (match, l3, sizeof (u32x4));
+ vec_free (l3);
+ }
}
+ else
+ match = l3;
if (l4)
{
vec_append_aligned (match, l4, sizeof (u32x4));
@@ -2734,8 +2759,8 @@ unformat_classify_match (unformat_input_t * input, va_list * args)
sizeof (u32x4));
/* Set size, include skipped vectors */
- _vec_len (match) =
- (t->match_n_vectors + t->skip_n_vectors) * sizeof (u32x4);
+ vec_set_len (match,
+ (t->match_n_vectors + t->skip_n_vectors) * sizeof (u32x4));
*matchp = match;
@@ -2747,9 +2772,9 @@ unformat_classify_match (unformat_input_t * input, va_list * args)
int
vnet_classify_add_del_session (vnet_classify_main_t *cm, u32 table_index,
- const u8 *match, u32 hit_next_index,
+ const u8 *match, u16 hit_next_index,
u32 opaque_index, i32 advance, u8 action,
- u16 metadata, int is_add)
+ u32 metadata, int is_add)
{
vnet_classify_table_t *t;
vnet_classify_entry_5_t _max_e __attribute__ ((aligned (16)));
@@ -2893,7 +2918,6 @@ classify_session_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (classify_session_command, static) = {
.path = "classify session",
.short_help =
@@ -2903,7 +2927,6 @@ VLIB_CLI_COMMAND (classify_session_command, static) = {
"\n [action set-ip4-fib-id|set-ip6-fib-id|set-sr-policy-index <n>] [del]",
.function = classify_session_command_fn,
};
-/* *INDENT-ON* */
static uword
unformat_opaque_sw_if_index (unformat_input_t * input, va_list * args)
@@ -3047,7 +3070,12 @@ vnet_is_packet_traced (vlib_buffer_t * b, u32 classify_table_index, int func)
{
return vnet_is_packet_traced_inline (b, classify_table_index, func);
}
-
+VLIB_REGISTER_TRACE_FILTER_FUNCTION (vnet_is_packet_traced_fn, static) = {
+ .name = "vnet_is_packet_traced",
+ .description = "classifier based filter",
+ .priority = 50,
+ .function = vnet_is_packet_traced
+};
#define TEST_CODE 0
@@ -3199,7 +3227,7 @@ test_classify_churn (test_classify_main_t * tm)
for (i = 0; i < tm->sessions; i++)
{
u8 *key_minus_skip;
- u64 hash;
+ u32 hash;
vnet_classify_entry_t *e;
ep = tm->entries + i;
@@ -3316,7 +3344,6 @@ test_classify_command_fn (vlib_main_t * vm,
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_classify_command, static) = {
.path = "test classify",
.short_help =
@@ -3325,7 +3352,6 @@ VLIB_CLI_COMMAND (test_classify_command, static) = {
" [churn-test]",
.function = test_classify_command_fn,
};
-/* *INDENT-ON* */
#endif /* TEST_CODE */
/*
diff --git a/src/vnet/classify/vnet_classify.h b/src/vnet/classify/vnet_classify.h
index ff79c26c45a..768593c45af 100644
--- a/src/vnet/classify/vnet_classify.h
+++ b/src/vnet/classify/vnet_classify.h
@@ -89,15 +89,17 @@ typedef struct _vnet_classify_entry
/* last heard time */
f64 last_heard;
+ u32 metadata;
+
+ /* Graph node next index */
+ u16 next_index;
+
+ vnet_classify_action_t action;
+
/* Really only need 1 bit */
u8 flags;
#define VNET_CLASSIFY_ENTRY_FREE (1<<0)
- vnet_classify_action_t action;
- u16 metadata;
- /* Graph node next index */
- u32 next_index;
-
/* Must be aligned to a 16-octet boundary */
u32x4 key[0];
} vnet_classify_entry_t;
@@ -196,7 +198,11 @@ typedef struct
CLIB_CACHE_LINE_ALIGN_MARK (cacheline2);
/* Mask to apply after skipping N vectors */
- u32x4 mask[8];
+ union
+ {
+ u32x4 mask[8];
+ u32 mask_u32[32];
+ };
} vnet_classify_table_t;
@@ -235,10 +241,11 @@ struct _vnet_classify_main
extern vnet_classify_main_t vnet_classify_main;
+u8 *format_classify_entry (u8 *s, va_list *args);
u8 *format_classify_table (u8 * s, va_list * args);
u8 *format_vnet_classify_table (u8 *s, va_list *args);
-u64 vnet_classify_hash_packet (vnet_classify_table_t * t, u8 * h);
+u32 vnet_classify_hash_packet (const vnet_classify_table_t *t, u8 *h);
static_always_inline vnet_classify_table_t *
vnet_classify_table_get (u32 table_index)
@@ -248,8 +255,8 @@ vnet_classify_table_get (u32 table_index)
return (pool_elt_at_index (vcm->tables, table_index));
}
-static inline u64
-vnet_classify_hash_packet_inline (vnet_classify_table_t *t, const u8 *h)
+static inline u32
+vnet_classify_hash_packet_inline (const vnet_classify_table_t *t, const u8 *h)
{
u64 xor_sum;
ASSERT (t);
@@ -339,7 +346,7 @@ vnet_classify_hash_packet_inline (vnet_classify_table_t *t, const u8 *h)
#ifdef clib_crc32c_uses_intrinsics
return clib_crc32c ((u8 *) & xor_sum, sizeof (xor_sum));
#else
- return clib_xxhash (xor_sum.as_u64[0] ^ xor_sum.as_u64[1]);
+ return clib_xxhash (xor_sum);
#endif
}
@@ -356,7 +363,7 @@ vnet_classify_prefetch_bucket (vnet_classify_table_t * t, u64 hash)
}
static inline vnet_classify_entry_t *
-vnet_classify_get_entry (vnet_classify_table_t * t, uword offset)
+vnet_classify_get_entry (const vnet_classify_table_t *t, uword offset)
{
u8 *hp = clib_mem_get_heap_base (t->mheap);
u8 *vp = hp + offset;
@@ -378,8 +385,8 @@ vnet_classify_get_offset (vnet_classify_table_t * t,
}
static inline vnet_classify_entry_t *
-vnet_classify_entry_at_index (vnet_classify_table_t * t,
- vnet_classify_entry_t * e, u32 index)
+vnet_classify_entry_at_index (const vnet_classify_table_t *t,
+ vnet_classify_entry_t *e, u32 index)
{
u8 *eu8;
@@ -416,8 +423,9 @@ vnet_classify_prefetch_entry (vnet_classify_table_t * t, u64 hash)
clib_prefetch_load (e);
}
-vnet_classify_entry_t *vnet_classify_find_entry (vnet_classify_table_t * t,
- u8 * h, u64 hash, f64 now);
+vnet_classify_entry_t *
+vnet_classify_find_entry (const vnet_classify_table_t *t, u8 *h, u32 hash,
+ f64 now);
static_always_inline int
vnet_classify_entry_is_equal (vnet_classify_entry_t *v, const u8 *d, u8 *m,
@@ -426,7 +434,7 @@ vnet_classify_entry_is_equal (vnet_classify_entry_t *v, const u8 *d, u8 *m,
#if defined(CLIB_HAVE_VEC512) && defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
u64x8 r, *mask = (u64x8 *) m;
u64x8u *data = (u64x8u *) d;
- u64x4 *key = (u64x4 *) v->key;
+ u64x8 *key = (u64x8 *) v->key;
r = (u64x8_mask_load_zero (data, load_mask) & mask[0]) ^
u64x8_mask_load_zero (key, load_mask);
@@ -524,8 +532,8 @@ vnet_classify_entry_is_equal (vnet_classify_entry_t *v, const u8 *d, u8 *m,
}
static inline vnet_classify_entry_t *
-vnet_classify_find_entry_inline (vnet_classify_table_t *t, const u8 *h,
- u64 hash, f64 now)
+vnet_classify_find_entry_inline (const vnet_classify_table_t *t, const u8 *h,
+ u32 hash, f64 now)
{
vnet_classify_entry_t *v;
vnet_classify_bucket_t *b;
@@ -580,9 +588,9 @@ vnet_classify_table_t *vnet_classify_new_table (vnet_classify_main_t *cm,
u32 match_n_vectors);
int vnet_classify_add_del_session (vnet_classify_main_t *cm, u32 table_index,
- const u8 *match, u32 hit_next_index,
+ const u8 *match, u16 hit_next_index,
u32 opaque_index, i32 advance, u8 action,
- u16 metadata, int is_add);
+ u32 metadata, int is_add);
int vnet_classify_add_del_table (vnet_classify_main_t *cm, const u8 *mask,
u32 nbuckets, u32 memory_size, u32 skip,
diff --git a/src/vnet/config.c b/src/vnet/config.c
index c9d4909cdeb..c05da663fb7 100644
--- a/src/vnet/config.c
+++ b/src/vnet/config.c
@@ -97,7 +97,7 @@ find_config_with_features (vlib_main_t * vm,
config_string = cm->config_string_temp;
cm->config_string_temp = 0;
if (config_string)
- _vec_len (config_string) = 0;
+ vec_set_len (config_string, 0);
vec_foreach (f, feature_vector)
{
@@ -119,6 +119,12 @@ find_config_with_features (vlib_main_t * vm,
vec_add1 (config_string, next_index);
}
+ /* Add the end node index to the config string so that it is part of
+ * the key used to detect string sharing. If this is not included then
+ * a modification of the end node would affect all the user of a shared
+ * string. */
+ vec_add1 (config_string, end_node_index);
+
/* See if config string is unique. */
p = hash_get_mem (cm->config_string_hash, config_string);
if (p)
@@ -250,6 +256,15 @@ vnet_config_del (vnet_config_main_t * cm, u32 config_id)
}
u32
+vnet_config_reset_end_node (vlib_main_t *vm, vnet_config_main_t *cm, u32 ci)
+{
+ cm->end_node_indices_by_user_index[ci] = cm->default_end_node_index;
+
+ return (
+ vnet_config_modify_end_node (vm, cm, ci, cm->default_end_node_index));
+}
+
+u32
vnet_config_modify_end_node (vlib_main_t * vm,
vnet_config_main_t * cm,
u32 config_string_heap_index, u32 end_node_index)
@@ -281,7 +296,7 @@ vnet_config_modify_end_node (vlib_main_t * vm,
if (new_features[last].node_index == cm->default_end_node_index)
{
vec_free (new_features->feature_config);
- _vec_len (new_features) = last;
+ vec_set_len (new_features, last);
}
}
@@ -304,6 +319,18 @@ vnet_config_modify_end_node (vlib_main_t * vm,
}
u32
+vnet_config_get_end_node (vlib_main_t *vm, vnet_config_main_t *cm,
+ u32 config_string_heap_index)
+{
+ if (config_string_heap_index >= vec_len (cm->end_node_indices_by_user_index))
+ return cm->default_end_node_index;
+ if (~0 == cm->end_node_indices_by_user_index[config_string_heap_index])
+ return cm->default_end_node_index;
+
+ return (cm->end_node_indices_by_user_index[config_string_heap_index]);
+}
+
+u32
vnet_config_add_feature (vlib_main_t * vm,
vnet_config_main_t * cm,
u32 config_string_heap_index,
diff --git a/src/vnet/config.h b/src/vnet/config.h
index ccbbbf433e2..9b01b4a433e 100644
--- a/src/vnet/config.h
+++ b/src/vnet/config.h
@@ -169,6 +169,12 @@ u32 vnet_config_modify_end_node (vlib_main_t * vm,
u32 config_string_heap_index,
u32 end_node_index);
+u32 vnet_config_reset_end_node (vlib_main_t *vm, vnet_config_main_t *cm,
+ u32 config_string_heap_index);
+
+u32 vnet_config_get_end_node (vlib_main_t *vm, vnet_config_main_t *cm,
+ u32 config_string_heap_index);
+
u8 *vnet_config_format_features (vlib_main_t * vm,
vnet_config_main_t * cm,
u32 config_index, u8 * s);
diff --git a/src/vnet/crypto/cli.c b/src/vnet/crypto/cli.c
index a6098a18e11..2ca66f228c3 100644
--- a/src/vnet/crypto/cli.c
+++ b/src/vnet/crypto/cli.c
@@ -36,16 +36,13 @@ show_crypto_engines_command_fn (vlib_main_t * vm,
}
vlib_cli_output (vm, "%-20s%-8s%s", "Name", "Prio", "Description");
- /* *INDENT-OFF* */
vec_foreach (p, cm->engines)
{
vlib_cli_output (vm, "%-20s%-8u%s", p->name, p->priority, p->desc);
}
- /* *INDENT-ON* */
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_crypto_engines_command, static) =
{
.path = "show crypto engines",
@@ -67,7 +64,7 @@ format_vnet_crypto_engine_candidates (u8 * s, va_list * args)
{
vec_foreach (e, cm->engines)
{
- if (e->enqueue_handlers[id] && e->dequeue_handlers[id])
+ if (e->enqueue_handlers[id] && e->dequeue_handler)
{
s = format (s, "%U", format_vnet_crypto_engine, e - cm->engines);
if (ei == e - cm->engines)
@@ -145,20 +142,18 @@ show_crypto_handlers_command_fn (vlib_main_t * vm,
"Chained");
for (i = 0; i < VNET_CRYPTO_N_ALGS; i++)
- vlib_cli_output (vm, "%-16U%U", format_vnet_crypto_alg, i,
+ vlib_cli_output (vm, "%-20U%U", format_vnet_crypto_alg, i,
format_vnet_crypto_handlers, i);
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_crypto_handlers_command, static) =
{
.path = "show crypto handlers",
.short_help = "show crypto handlers",
.function = show_crypto_handlers_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
set_crypto_handler_command_fn (vlib_main_t * vm,
@@ -209,13 +204,11 @@ set_crypto_handler_command_fn (vlib_main_t * vm,
char *key;
u8 *value;
- /* *INDENT-OFF* */
hash_foreach_mem (key, value, cm->alg_index_by_name,
({
(void) value;
rc += vnet_crypto_set_handler2 (key, engine, oct);
}));
- /* *INDENT-ON* */
if (rc)
vlib_cli_output (vm, "failed to set crypto engine!");
@@ -241,7 +234,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_crypto_handler_command, static) =
{
.path = "set crypto handler",
@@ -249,7 +241,6 @@ VLIB_CLI_COMMAND (set_crypto_handler_command, static) =
" [simple|chained]",
.function = set_crypto_handler_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_vnet_crypto_async_handlers (u8 * s, va_list * args)
@@ -300,14 +291,12 @@ show_crypto_async_handlers_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_crypto_async_handlers_command, static) =
{
.path = "show crypto async handlers",
.short_help = "show crypto async handlers",
.function = show_crypto_async_handlers_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -316,7 +305,6 @@ show_crypto_async_status_command_fn (vlib_main_t * vm,
vlib_cli_command_t * cmd)
{
vnet_crypto_main_t *cm = &crypto_main;
- u32 skip_master = vlib_num_workers () > 0;
vlib_thread_main_t *tm = vlib_get_thread_main ();
unformat_input_t _line_input, *line_input = &_line_input;
int i;
@@ -324,12 +312,7 @@ show_crypto_async_status_command_fn (vlib_main_t * vm,
if (unformat_user (input, unformat_line_input, line_input))
unformat_free (line_input);
- vlib_cli_output (vm, "Crypto async dispatch mode: %s",
- cm->dispatch_mode ==
- VNET_CRYPTO_ASYNC_DISPATCH_POLLING ? "POLLING" :
- "INTERRUPT");
-
- for (i = skip_master; i < tm->n_vlib_mains; i++)
+ for (i = 0; i < tm->n_vlib_mains; i++)
{
vlib_node_state_t state = vlib_node_get_state (
vlib_get_main_by_index (i), cm->crypto_node_index);
@@ -343,14 +326,12 @@ show_crypto_async_status_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_crypto_async_status_command, static) =
{
.path = "show crypto async status",
.short_help = "show crypto async status",
.function = show_crypto_async_status_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
set_crypto_async_handler_command_fn (vlib_main_t * vm,
@@ -394,13 +375,11 @@ set_crypto_async_handler_command_fn (vlib_main_t * vm,
char *key;
u8 *value;
- /* *INDENT-OFF* */
hash_foreach_mem (key, value, cm->async_alg_index_by_name,
({
(void) value;
rc += vnet_crypto_set_async_handler2 (key, engine);
}));
- /* *INDENT-ON* */
if (rc)
vlib_cli_output (vm, "failed to set crypto engine!");
@@ -426,57 +405,52 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_crypto_async_handler_command, static) =
{
.path = "set crypto async handler",
.short_help = "set crypto async handler type [type2 type3 ...] engine",
.function = set_crypto_async_handler_command_fn,
};
-/* *INDENT-ON* */
-
-static inline void
-print_crypto_async_dispatch_warning ()
-{
- clib_warning ("Switching dispatch mode might not work is some situations.");
- clib_warning
- ("Use 'show crypto async status' to verify that the nodes' states were set");
- clib_warning ("and if not, set 'crypto async dispatch' mode again.");
-}
static clib_error_t *
-set_crypto_async_dispatch_polling_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
+set_crypto_async_dispatch_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
{
- print_crypto_async_dispatch_warning ();
- vnet_crypto_set_async_dispatch_mode (VNET_CRYPTO_ASYNC_DISPATCH_POLLING);
- return 0;
-}
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+ u8 adaptive = 0;
+ u8 mode = VLIB_NODE_STATE_INTERRUPT;
-static clib_error_t *
-set_crypto_async_dispatch_interrupt_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- print_crypto_async_dispatch_warning ();
- vnet_crypto_set_async_dispatch_mode (VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT);
- return 0;
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "polling"))
+ mode = VLIB_NODE_STATE_POLLING;
+ else if (unformat (line_input, "interrupt"))
+ mode = VLIB_NODE_STATE_INTERRUPT;
+ else if (unformat (line_input, "adaptive"))
+ adaptive = 1;
+ else
+ {
+ error = clib_error_return (0, "invalid params");
+ goto done;
+ }
+ }
+
+ vnet_crypto_set_async_dispatch (mode, adaptive);
+done:
+ unformat_free (line_input);
+ return error;
}
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (set_crypto_async_dispatch_polling_command, static) =
-{
- .path = "set crypto async dispatch polling",
- .short_help = "set crypto async dispatch polling|interrupt",
- .function = set_crypto_async_dispatch_polling_command_fn,
-};
-VLIB_CLI_COMMAND (set_crypto_async_dispatch_interrupt_command, static) =
-{
- .path = "set crypto async dispatch interrupt",
- .short_help = "set crypto async dispatch polling|interrupt",
- .function = set_crypto_async_dispatch_interrupt_command_fn,
+VLIB_CLI_COMMAND (set_crypto_async_dispatch_mode_command, static) = {
+ .path = "set crypto async dispatch mode",
+ .short_help = "set crypto async dispatch mode <polling|interrupt|adaptive>",
+ .function = set_crypto_async_dispatch_command_fn,
};
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/crypto/crypto.api b/src/vnet/crypto/crypto.api
index 6eccd8524ba..8fec805dcfc 100644
--- a/src/vnet/crypto/crypto.api
+++ b/src/vnet/crypto/crypto.api
@@ -28,7 +28,8 @@ enum crypto_op_class_type:u8
CRYPTO_API_OP_BOTH,
};
- /** \brief crypto: use polling or interrupt dispatch
+ /** \brief crypto: Use polling or interrupt dispatch.
+ Always unset the adaptive flag (that is why it is deprecated).
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@param mode - dispatch mode
@@ -36,11 +37,29 @@ enum crypto_op_class_type:u8
autoreply define crypto_set_async_dispatch
{
+ option deprecated;
+ option replaced_by="crypto_set_async_dispatch_v2";
u32 client_index;
u32 context;
vl_api_crypto_dispatch_mode_t mode;
};
+ /** \brief crypto: Change the way crypto operations are dispatched.
+ Use adaptive (or not) mode, starting in polling or interrupt state.
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param mode - dispatch initial state
+ @param adaptive - whether on not the state shall change depending on load
+*/
+
+autoreply define crypto_set_async_dispatch_v2
+{
+ u32 client_index;
+ u32 context;
+ vl_api_crypto_dispatch_mode_t mode;
+ bool adaptive;
+};
+
/** \brief crypto: set crypto handler
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/vnet/crypto/crypto.c b/src/vnet/crypto/crypto.c
index 7903f88b7cb..c8e7ca90c9d 100644
--- a/src/vnet/crypto/crypto.c
+++ b/src/vnet/crypto/crypto.c
@@ -192,13 +192,16 @@ vnet_crypto_is_set_handler (vnet_crypto_alg_t alg)
vnet_crypto_op_id_t opt = 0;
int i;
- if (alg > vec_len (cm->algs))
+ if (alg >= vec_len (cm->algs))
return 0;
for (i = 0; i < VNET_CRYPTO_OP_N_TYPES; i++)
if ((opt = cm->algs[alg].op_by_type[i]) != 0)
break;
+ if (opt >= vec_len (cm->ops_handlers))
+ return 0;
+
return NULL != cm->ops_handlers[opt];
}
@@ -275,30 +278,24 @@ vnet_crypto_register_ops_handlers (vlib_main_t * vm, u32 engine_index,
}
void
-vnet_crypto_register_async_handler (vlib_main_t * vm, u32 engine_index,
- vnet_crypto_async_op_id_t opt,
- vnet_crypto_frame_enqueue_t * enqueue_hdl,
- vnet_crypto_frame_dequeue_t * dequeue_hdl)
+vnet_crypto_register_enqueue_handler (vlib_main_t *vm, u32 engine_index,
+ vnet_crypto_async_op_id_t opt,
+ vnet_crypto_frame_enqueue_t *enqueue_hdl)
{
vnet_crypto_main_t *cm = &crypto_main;
vnet_crypto_engine_t *ae, *e = vec_elt_at_index (cm->engines, engine_index);
vnet_crypto_async_op_data_t *otd = cm->async_opt_data + opt;
vec_validate_aligned (cm->enqueue_handlers, VNET_CRYPTO_ASYNC_OP_N_IDS,
CLIB_CACHE_LINE_BYTES);
- vec_validate_aligned (cm->dequeue_handlers, VNET_CRYPTO_ASYNC_OP_N_IDS,
- CLIB_CACHE_LINE_BYTES);
- /* both enqueue hdl and dequeue hdl should present */
- if (!enqueue_hdl && !dequeue_hdl)
+ if (!enqueue_hdl)
return;
e->enqueue_handlers[opt] = enqueue_hdl;
- e->dequeue_handlers[opt] = dequeue_hdl;
if (otd->active_engine_index_async == ~0)
{
otd->active_engine_index_async = engine_index;
cm->enqueue_handlers[opt] = enqueue_hdl;
- cm->dequeue_handlers[opt] = dequeue_hdl;
}
ae = vec_elt_at_index (cm->engines, otd->active_engine_index_async);
@@ -306,12 +303,79 @@ vnet_crypto_register_async_handler (vlib_main_t * vm, u32 engine_index,
{
otd->active_engine_index_async = engine_index;
cm->enqueue_handlers[opt] = enqueue_hdl;
- cm->dequeue_handlers[opt] = dequeue_hdl;
}
return;
}
+static int
+engine_index_cmp (void *v1, void *v2)
+{
+ u32 *a1 = v1;
+ u32 *a2 = v2;
+
+ if (*a1 > *a2)
+ return 1;
+ if (*a1 < *a2)
+ return -1;
+ return 0;
+}
+
+static void
+vnet_crypto_update_cm_dequeue_handlers (void)
+{
+ vnet_crypto_main_t *cm = &crypto_main;
+ vnet_crypto_async_op_data_t *otd;
+ vnet_crypto_engine_t *e;
+ u32 *active_engines = 0, *ei, last_ei = ~0, i;
+
+ vec_reset_length (cm->dequeue_handlers);
+
+ for (i = 0; i < VNET_CRYPTO_ASYNC_OP_N_IDS; i++)
+ {
+ otd = cm->async_opt_data + i;
+ if (otd->active_engine_index_async == ~0)
+ continue;
+ e = cm->engines + otd->active_engine_index_async;
+ if (!e->dequeue_handler)
+ continue;
+ vec_add1 (active_engines, otd->active_engine_index_async);
+ }
+
+ vec_sort_with_function (active_engines, engine_index_cmp);
+
+ vec_foreach (ei, active_engines)
+ {
+ if (ei[0] == last_ei)
+ continue;
+ if (ei[0] == ~0)
+ continue;
+
+ e = cm->engines + ei[0];
+ vec_add1 (cm->dequeue_handlers, e->dequeue_handler);
+ last_ei = ei[0];
+ }
+
+ vec_free (active_engines);
+}
+
+void
+vnet_crypto_register_dequeue_handler (vlib_main_t *vm, u32 engine_index,
+ vnet_crypto_frame_dequeue_t *deq_fn)
+{
+ vnet_crypto_main_t *cm = &crypto_main;
+ vnet_crypto_engine_t *e = vec_elt_at_index (cm->engines, engine_index);
+
+ if (!deq_fn)
+ return;
+
+ e->dequeue_handler = deq_fn;
+
+ vnet_crypto_update_cm_dequeue_handlers ();
+
+ return;
+}
+
void
vnet_crypto_register_key_handler (vlib_main_t * vm, u32 engine_index,
vnet_crypto_key_handler_t * key_handler)
@@ -370,8 +434,7 @@ vnet_crypto_key_add (vlib_main_t * vm, vnet_crypto_alg_t alg, u8 * data,
if (!vnet_crypto_key_len_check (alg, length))
return ~0;
- pool_get_aligned_will_expand (cm->keys, need_barrier_sync,
- CLIB_CACHE_LINE_BYTES);
+ need_barrier_sync = pool_get_will_expand (cm->keys);
/* If the cm->keys will expand, stop the parade. */
if (need_barrier_sync)
vlib_worker_thread_barrier_sync (vm);
@@ -386,11 +449,9 @@ vnet_crypto_key_add (vlib_main_t * vm, vnet_crypto_alg_t alg, u8 * data,
key->alg = alg;
vec_validate_aligned (key->data, length - 1, CLIB_CACHE_LINE_BYTES);
clib_memcpy (key->data, data, length);
- /* *INDENT-OFF* */
vec_foreach (engine, cm->engines)
if (engine->key_op_handler)
engine->key_op_handler (vm, VNET_CRYPTO_KEY_OP_ADD, index);
- /* *INDENT-ON* */
return index;
}
@@ -401,25 +462,34 @@ vnet_crypto_key_del (vlib_main_t * vm, vnet_crypto_key_index_t index)
vnet_crypto_engine_t *engine;
vnet_crypto_key_t *key = pool_elt_at_index (cm->keys, index);
- /* *INDENT-OFF* */
vec_foreach (engine, cm->engines)
if (engine->key_op_handler)
engine->key_op_handler (vm, VNET_CRYPTO_KEY_OP_DEL, index);
- /* *INDENT-ON* */
if (key->type == VNET_CRYPTO_KEY_TYPE_DATA)
{
- clib_memset (key->data, 0, vec_len (key->data));
+ clib_memset (key->data, 0xfe, vec_len (key->data));
vec_free (key->data);
}
else if (key->type == VNET_CRYPTO_KEY_TYPE_LINK)
{
- key->index_crypto = key->index_integ = 0;
+ key->index_crypto = key->index_integ = ~0;
}
pool_put (cm->keys, key);
}
+void
+vnet_crypto_key_update (vlib_main_t *vm, vnet_crypto_key_index_t index)
+{
+ vnet_crypto_main_t *cm = &crypto_main;
+ vnet_crypto_engine_t *engine;
+
+ vec_foreach (engine, cm->engines)
+ if (engine->key_op_handler)
+ engine->key_op_handler (vm, VNET_CRYPTO_KEY_OP_MODIFY, index);
+}
+
vnet_crypto_async_alg_t
vnet_crypto_link_algs (vnet_crypto_alg_t crypto_alg,
vnet_crypto_alg_t integ_alg)
@@ -458,50 +528,13 @@ vnet_crypto_key_add_linked (vlib_main_t * vm,
key->index_integ = index_integ;
key->async_alg = linked_alg;
- /* *INDENT-OFF* */
vec_foreach (engine, cm->engines)
if (engine->key_op_handler)
engine->key_op_handler (vm, VNET_CRYPTO_KEY_OP_ADD, index);
- /* *INDENT-ON* */
return index;
}
-clib_error_t *
-crypto_dispatch_enable_disable (int is_enable)
-{
- vnet_crypto_main_t *cm = &crypto_main;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- u32 skip_master = vlib_num_workers () > 0, i;
- vlib_node_state_t state = VLIB_NODE_STATE_DISABLED;
- u8 state_change = 0;
-
- CLIB_MEMORY_STORE_BARRIER ();
- if (is_enable && cm->async_refcnt > 0)
- {
- state_change = 1;
- state =
- cm->dispatch_mode ==
- VNET_CRYPTO_ASYNC_DISPATCH_POLLING ? VLIB_NODE_STATE_POLLING :
- VLIB_NODE_STATE_INTERRUPT;
- }
-
- if (!is_enable && cm->async_refcnt == 0)
- {
- state_change = 1;
- state = VLIB_NODE_STATE_DISABLED;
- }
-
- if (state_change)
- for (i = skip_master; i < tm->n_vlib_mains; i++)
- {
- vlib_main_t *ovm = vlib_get_main_by_index (i);
- if (state != vlib_node_get_state (ovm, cm->crypto_node_index))
- vlib_node_set_state (ovm, cm->crypto_node_index, state);
- }
- return 0;
-}
-
static_always_inline void
crypto_set_active_async_engine (vnet_crypto_async_op_data_t * od,
vnet_crypto_async_op_id_t id, u32 ei)
@@ -509,11 +542,10 @@ crypto_set_active_async_engine (vnet_crypto_async_op_data_t * od,
vnet_crypto_main_t *cm = &crypto_main;
vnet_crypto_engine_t *ce = vec_elt_at_index (cm->engines, ei);
- if (ce->enqueue_handlers[id] && ce->dequeue_handlers[id])
+ if (ce->enqueue_handlers[id] && ce->dequeue_handler)
{
od->active_engine_index_async = ei;
cm->enqueue_handlers[id] = ce->enqueue_handlers[id];
- cm->dequeue_handlers[id] = ce->dequeue_handlers[id];
}
}
@@ -546,6 +578,8 @@ vnet_crypto_set_async_handler2 (char *alg_name, char *engine)
crypto_set_active_async_engine (od, id, p[0]);
}
+ vnet_crypto_update_cm_dequeue_handlers ();
+
return 0;
}
@@ -561,13 +595,11 @@ vnet_crypto_register_post_node (vlib_main_t * vm, char *post_node_name)
if (!pn)
return ~0;
- /* *INDENT-OFF* */
- vec_foreach (cm->next_nodes, nn)
- {
- if (nn->node_idx == pn->index)
- return nn->next_idx;
- }
- /* *INDENT-ON* */
+ vec_foreach (nn, cm->next_nodes)
+ {
+ if (nn->node_idx == pn->index)
+ return nn->next_idx;
+ }
vec_validate (cm->next_nodes, index);
nn = vec_elt_at_index (cm->next_nodes, index);
@@ -580,70 +612,19 @@ vnet_crypto_register_post_node (vlib_main_t * vm, char *post_node_name)
}
void
-vnet_crypto_request_async_mode (int is_enable)
-{
- vnet_crypto_main_t *cm = &crypto_main;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- u32 skip_master = vlib_num_workers () > 0, i;
- vlib_node_state_t state = VLIB_NODE_STATE_DISABLED;
- u8 state_change = 0;
-
- CLIB_MEMORY_STORE_BARRIER ();
- if (is_enable && cm->async_refcnt == 0)
- {
- state_change = 1;
- state =
- cm->dispatch_mode == VNET_CRYPTO_ASYNC_DISPATCH_POLLING ?
- VLIB_NODE_STATE_POLLING : VLIB_NODE_STATE_INTERRUPT;
- }
- if (!is_enable && cm->async_refcnt == 1)
- {
- state_change = 1;
- state = VLIB_NODE_STATE_DISABLED;
- }
-
- if (state_change)
- for (i = skip_master; i < tm->n_vlib_mains; i++)
- {
- vlib_main_t *ovm = vlib_get_main_by_index (i);
- if (state != vlib_node_get_state (ovm, cm->crypto_node_index))
- vlib_node_set_state (ovm, cm->crypto_node_index, state);
- }
-
- if (is_enable)
- cm->async_refcnt += 1;
- else if (cm->async_refcnt > 0)
- cm->async_refcnt -= 1;
-}
-
-void
-vnet_crypto_set_async_dispatch_mode (u8 mode)
+vnet_crypto_set_async_dispatch (u8 mode, u8 adaptive)
{
- vnet_crypto_main_t *cm = &crypto_main;
- u32 skip_master = vlib_num_workers () > 0, i;
vlib_thread_main_t *tm = vlib_get_thread_main ();
- vlib_node_state_t state = VLIB_NODE_STATE_DISABLED;
+ u32 i, node_index = crypto_main.crypto_node_index;
+ vlib_node_state_t state =
+ mode ? VLIB_NODE_STATE_INTERRUPT : VLIB_NODE_STATE_POLLING;
- CLIB_MEMORY_STORE_BARRIER ();
- cm->dispatch_mode = mode;
- if (mode == VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT)
- {
- state =
- cm->async_refcnt == 0 ?
- VLIB_NODE_STATE_DISABLED : VLIB_NODE_STATE_INTERRUPT;
- }
- else if (mode == VNET_CRYPTO_ASYNC_DISPATCH_POLLING)
- {
- state =
- cm->async_refcnt == 0 ?
- VLIB_NODE_STATE_DISABLED : VLIB_NODE_STATE_POLLING;
- }
-
- for (i = skip_master; i < tm->n_vlib_mains; i++)
+ for (i = vlib_num_workers () > 0; i < tm->n_vlib_mains; i++)
{
vlib_main_t *ovm = vlib_get_main_by_index (i);
- if (state != vlib_node_get_state (ovm, cm->crypto_node_index))
- vlib_node_set_state (ovm, cm->crypto_node_index, state);
+ vlib_node_set_state (ovm, node_index, state);
+ vlib_node_set_flag (ovm, node_index, VLIB_NODE_FLAG_ADAPTIVE_MODE,
+ adaptive);
}
}
@@ -742,18 +723,15 @@ vnet_crypto_init (vlib_main_t * vm)
vlib_thread_main_t *tm = vlib_get_thread_main ();
vnet_crypto_thread_t *ct = 0;
- cm->dispatch_mode = VNET_CRYPTO_ASYNC_DISPATCH_POLLING;
cm->engine_index_by_name = hash_create_string ( /* size */ 0,
sizeof (uword));
cm->alg_index_by_name = hash_create_string (0, sizeof (uword));
cm->async_alg_index_by_name = hash_create_string (0, sizeof (uword));
vec_validate_aligned (cm->threads, tm->n_vlib_mains, CLIB_CACHE_LINE_BYTES);
vec_foreach (ct, cm->threads)
- pool_alloc_aligned (ct->frame_pool, VNET_CRYPTO_FRAME_POOL_SIZE,
- CLIB_CACHE_LINE_BYTES);
+ pool_init_fixed (ct->frame_pool, VNET_CRYPTO_FRAME_POOL_SIZE);
vec_validate (cm->algs, VNET_CRYPTO_N_ALGS);
vec_validate (cm->async_algs, VNET_CRYPTO_N_ASYNC_ALGS);
- clib_bitmap_validate (cm->async_active_ids, VNET_CRYPTO_ASYNC_OP_N_IDS);
#define _(n, s, l) \
vnet_crypto_init_cipher_data (VNET_CRYPTO_ALG_##n, \
diff --git a/src/vnet/crypto/crypto.h b/src/vnet/crypto/crypto.h
index 71978b64835..89cf70d19e3 100644
--- a/src/vnet/crypto/crypto.h
+++ b/src/vnet/crypto/crypto.h
@@ -33,11 +33,14 @@
_(AES_256_CTR, "aes-256-ctr", 32)
/* CRYPTO_ID, PRETTY_NAME, KEY_LENGTH_IN_BYTES */
-#define foreach_crypto_aead_alg \
- _(AES_128_GCM, "aes-128-gcm", 16) \
- _(AES_192_GCM, "aes-192-gcm", 24) \
- _(AES_256_GCM, "aes-256-gcm", 32) \
- _(CHACHA20_POLY1305, "chacha20-poly1305", 32)
+#define foreach_crypto_aead_alg \
+ _ (AES_128_GCM, "aes-128-gcm", 16) \
+ _ (AES_192_GCM, "aes-192-gcm", 24) \
+ _ (AES_256_GCM, "aes-256-gcm", 32) \
+ _ (AES_128_NULL_GMAC, "aes-128-null-gmac", 16) \
+ _ (AES_192_NULL_GMAC, "aes-192-null-gmac", 24) \
+ _ (AES_256_NULL_GMAC, "aes-256-null-gmac", 32) \
+ _ (CHACHA20_POLY1305, "chacha20-poly1305", 32)
#define foreach_crypto_hash_alg \
_ (SHA1, "sha-1") \
@@ -82,15 +85,22 @@ typedef enum
/** async crypto **/
/* CRYPTO_ID, PRETTY_NAME, KEY_LENGTH_IN_BYTES, TAG_LEN, AAD_LEN */
-#define foreach_crypto_aead_async_alg \
- _(AES_128_GCM, "aes-128-gcm-aad8", 16, 16, 8) \
- _(AES_128_GCM, "aes-128-gcm-aad12", 16, 16, 12) \
- _(AES_192_GCM, "aes-192-gcm-aad8", 24, 16, 8) \
- _(AES_192_GCM, "aes-192-gcm-aad12", 24, 16, 12) \
- _(AES_256_GCM, "aes-256-gcm-aad8", 32, 16, 8) \
- _(AES_256_GCM, "aes-256-gcm-aad12", 32, 16, 12) \
- _(CHACHA20_POLY1305, "chacha20-poly1305-aad8", 32, 16, 8) \
- _(CHACHA20_POLY1305, "chacha20-poly1305-aad12", 32, 16, 12)
+#define foreach_crypto_aead_async_alg \
+ _ (AES_128_GCM, "aes-128-gcm-aad8", 16, 16, 8) \
+ _ (AES_128_GCM, "aes-128-gcm-aad12", 16, 16, 12) \
+ _ (AES_192_GCM, "aes-192-gcm-aad8", 24, 16, 8) \
+ _ (AES_192_GCM, "aes-192-gcm-aad12", 24, 16, 12) \
+ _ (AES_256_GCM, "aes-256-gcm-aad8", 32, 16, 8) \
+ _ (AES_256_GCM, "aes-256-gcm-aad12", 32, 16, 12) \
+ _ (AES_128_NULL_GMAC, "aes-128-null-gmac-aad8", 16, 16, 8) \
+ _ (AES_128_NULL_GMAC, "aes-128-null-gmac-aad12", 16, 16, 12) \
+ _ (AES_192_NULL_GMAC, "aes-192-null-gmac-aad8", 24, 16, 8) \
+ _ (AES_192_NULL_GMAC, "aes-192-null-gmac-aad12", 24, 16, 12) \
+ _ (AES_256_NULL_GMAC, "aes-256-null-gmac-aad8", 32, 16, 8) \
+ _ (AES_256_NULL_GMAC, "aes-256-null-gmac-aad12", 32, 16, 12) \
+ _ (CHACHA20_POLY1305, "chacha20-poly1305-aad8", 32, 16, 8) \
+ _ (CHACHA20_POLY1305, "chacha20-poly1305-aad12", 32, 16, 12) \
+ _ (CHACHA20_POLY1305, "chacha20-poly1305", 32, 16, 0)
/* CRYPTO_ID, INTEG_ID, PRETTY_NAME, KEY_LENGTH_IN_BYTES, DIGEST_LEN */
#define foreach_crypto_link_async_alg \
@@ -141,7 +151,6 @@ typedef enum
VNET_CRYPTO_OP_N_STATUS,
} vnet_crypto_op_status_t;
-/* *INDENT-OFF* */
typedef enum
{
VNET_CRYPTO_ALG_NONE = 0,
@@ -230,7 +239,6 @@ typedef enum
#undef _
VNET_CRYPTO_N_OP_IDS,
} vnet_crypto_op_id_t;
-/* *INDENT-ON* */
typedef enum
{
@@ -259,9 +267,8 @@ typedef struct
vnet_crypto_op_id_t op:16;
vnet_crypto_op_status_t status:8;
u8 flags;
-#define VNET_CRYPTO_OP_FLAG_INIT_IV (1 << 0)
-#define VNET_CRYPTO_OP_FLAG_HMAC_CHECK (1 << 1)
-#define VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS (1 << 2)
+#define VNET_CRYPTO_OP_FLAG_HMAC_CHECK (1 << 0)
+#define VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS (1 << 1)
union
{
@@ -337,7 +344,7 @@ typedef struct
i16 crypto_start_offset; /* first buffer offset */
i16 integ_start_offset;
/* adj total_length for integ, e.g.4 bytes for IPSec ESN */
- u16 integ_length_adj;
+ i16 integ_length_adj;
vnet_crypto_op_status_t status : 8;
u8 flags; /**< share same VNET_CRYPTO_OP_FLAG_* values */
} vnet_crypto_async_frame_elt_t;
@@ -422,12 +429,15 @@ void vnet_crypto_register_key_handler (vlib_main_t * vm, u32 engine_index,
/** async crypto register functions */
u32 vnet_crypto_register_post_node (vlib_main_t * vm, char *post_node_name);
-void vnet_crypto_register_async_handler (vlib_main_t * vm,
- u32 engine_index,
- vnet_crypto_async_op_id_t opt,
- vnet_crypto_frame_enqueue_t * enq_fn,
- vnet_crypto_frame_dequeue_t *
- deq_fn);
+
+void
+vnet_crypto_register_enqueue_handler (vlib_main_t *vm, u32 engine_index,
+ vnet_crypto_async_op_id_t opt,
+ vnet_crypto_frame_enqueue_t *enq_fn);
+
+void
+vnet_crypto_register_dequeue_handler (vlib_main_t *vm, u32 engine_index,
+ vnet_crypto_frame_dequeue_t *deq_fn);
typedef struct
{
@@ -439,7 +449,7 @@ typedef struct
vnet_crypto_chained_ops_handler_t
* chained_ops_handlers[VNET_CRYPTO_N_OP_IDS];
vnet_crypto_frame_enqueue_t *enqueue_handlers[VNET_CRYPTO_ASYNC_OP_N_IDS];
- vnet_crypto_frame_dequeue_t *dequeue_handlers[VNET_CRYPTO_ASYNC_OP_N_IDS];
+ vnet_crypto_frame_dequeue_t *dequeue_handler;
} vnet_crypto_engine_t;
typedef struct
@@ -456,7 +466,6 @@ typedef struct
vnet_crypto_chained_ops_handler_t **chained_ops_handlers;
vnet_crypto_frame_enqueue_t **enqueue_handlers;
vnet_crypto_frame_dequeue_t **dequeue_handlers;
- clib_bitmap_t *async_active_ids;
vnet_crypto_op_data_t opt_data[VNET_CRYPTO_N_OP_IDS];
vnet_crypto_async_op_data_t async_opt_data[VNET_CRYPTO_ASYNC_OP_N_IDS];
vnet_crypto_engine_t *engines;
@@ -465,12 +474,8 @@ typedef struct
uword *alg_index_by_name;
uword *async_alg_index_by_name;
vnet_crypto_async_alg_data_t *async_algs;
- u32 async_refcnt;
vnet_crypto_async_next_node_t *next_nodes;
u32 crypto_node_index;
-#define VNET_CRYPTO_ASYNC_DISPATCH_POLLING 0
-#define VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT 1
- u8 dispatch_mode;
} vnet_crypto_main_t;
extern vnet_crypto_main_t crypto_main;
@@ -481,7 +486,7 @@ u32 vnet_crypto_process_chained_ops (vlib_main_t * vm, vnet_crypto_op_t ops[],
u32 vnet_crypto_process_ops (vlib_main_t * vm, vnet_crypto_op_t ops[],
u32 n_ops);
-
+void vnet_crypto_set_async_dispatch (u8 mode, u8 adaptive);
int vnet_crypto_set_handler2 (char *ops_handler_name, char *engine,
crypto_op_class_type_t oct);
int vnet_crypto_is_set_handler (vnet_crypto_alg_t alg);
@@ -489,6 +494,7 @@ int vnet_crypto_is_set_handler (vnet_crypto_alg_t alg);
u32 vnet_crypto_key_add (vlib_main_t * vm, vnet_crypto_alg_t alg,
u8 * data, u16 length);
void vnet_crypto_key_del (vlib_main_t * vm, vnet_crypto_key_index_t index);
+void vnet_crypto_key_update (vlib_main_t *vm, vnet_crypto_key_index_t index);
/**
* Use 2 created keys to generate new key for linked algs (cipher + integ)
@@ -498,21 +504,13 @@ u32 vnet_crypto_key_add_linked (vlib_main_t * vm,
vnet_crypto_key_index_t index_crypto,
vnet_crypto_key_index_t index_integ);
-clib_error_t *crypto_dispatch_enable_disable (int is_enable);
-
int vnet_crypto_set_async_handler2 (char *alg_name, char *engine);
int vnet_crypto_is_set_async_handler (vnet_crypto_async_op_id_t opt);
-void vnet_crypto_request_async_mode (int is_enable);
-
-void vnet_crypto_set_async_dispatch_mode (u8 mode);
-
vnet_crypto_async_alg_t vnet_crypto_link_algs (vnet_crypto_alg_t crypto_alg,
vnet_crypto_alg_t integ_alg);
-clib_error_t *crypto_dispatch_enable_disable (int is_enable);
-
format_function_t format_vnet_crypto_alg;
format_function_t format_vnet_crypto_engine;
format_function_t format_vnet_crypto_op;
@@ -566,12 +564,16 @@ vnet_crypto_async_get_frame (vlib_main_t * vm, vnet_crypto_async_op_id_t opt)
vnet_crypto_thread_t *ct = cm->threads + vm->thread_index;
vnet_crypto_async_frame_t *f = NULL;
- pool_get_aligned (ct->frame_pool, f, CLIB_CACHE_LINE_BYTES);
- if (CLIB_DEBUG > 0)
- clib_memset (f, 0xfe, sizeof (*f));
- f->state = VNET_CRYPTO_FRAME_STATE_NOT_PROCESSED;
- f->op = opt;
- f->n_elts = 0;
+ if (PREDICT_TRUE (pool_free_elts (ct->frame_pool)))
+ {
+ pool_get_aligned (ct->frame_pool, f, CLIB_CACHE_LINE_BYTES);
+#if CLIB_DEBUG > 0
+ clib_memset (f, 0xfe, sizeof (*f));
+#endif
+ f->state = VNET_CRYPTO_FRAME_STATE_NOT_PROCESSED;
+ f->op = opt;
+ f->n_elts = 0;
+ }
return f;
}
@@ -591,20 +593,26 @@ vnet_crypto_async_submit_open_frame (vlib_main_t * vm,
{
vnet_crypto_main_t *cm = &crypto_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
- vnet_crypto_async_op_id_t opt = frame->op;
- u32 i = vlib_num_workers () > 0;
+ u32 i;
+ vlib_node_t *n;
frame->state = VNET_CRYPTO_FRAME_STATE_PENDING;
frame->enqueue_thread_index = vm->thread_index;
+ if (PREDICT_FALSE (cm->enqueue_handlers == NULL))
+ {
+ frame->state = VNET_CRYPTO_FRAME_STATE_ELT_ERROR;
+ return -1;
+ }
+
int ret = (cm->enqueue_handlers[frame->op]) (vm, frame);
- clib_bitmap_set_no_check (cm->async_active_ids, opt, 1);
if (PREDICT_TRUE (ret == 0))
{
- if (cm->dispatch_mode == VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT)
+ n = vlib_get_node (vm, cm->crypto_node_index);
+ if (n->state == VLIB_NODE_STATE_INTERRUPT)
{
- for (; i < tm->n_vlib_mains; i++)
+ for (i = 0; i < tm->n_vlib_mains; i++)
vlib_node_set_interrupt_pending (vlib_get_main_by_index (i),
cm->crypto_node_index);
}
@@ -621,7 +629,7 @@ static_always_inline void
vnet_crypto_async_add_to_frame (vlib_main_t *vm, vnet_crypto_async_frame_t *f,
u32 key_index, u32 crypto_len,
i16 integ_len_adj, i16 crypto_start_offset,
- u16 integ_start_offset, u32 buffer_index,
+ i16 integ_start_offset, u32 buffer_index,
u16 next_node, u8 *iv, u8 *tag, u8 *aad,
u8 flags)
{
diff --git a/src/vnet/crypto/crypto_api.c b/src/vnet/crypto/crypto_api.c
index 49b12a3d377..e701864a5ba 100644
--- a/src/vnet/crypto/crypto_api.c
+++ b/src/vnet/crypto/crypto_api.c
@@ -46,12 +46,24 @@ vl_api_crypto_set_async_dispatch_t_handler (vl_api_crypto_set_async_dispatch_t
vl_api_crypto_set_async_dispatch_reply_t *rmp;
int rv = 0;
- vnet_crypto_set_async_dispatch_mode ((u8) mp->mode);
+ vnet_crypto_set_async_dispatch ((u8) mp->mode, 0);
REPLY_MACRO (VL_API_CRYPTO_SET_ASYNC_DISPATCH_REPLY);
}
static void
+vl_api_crypto_set_async_dispatch_v2_t_handler (
+ vl_api_crypto_set_async_dispatch_v2_t *mp)
+{
+ vl_api_crypto_set_async_dispatch_v2_reply_t *rmp;
+ int rv = 0;
+
+ vnet_crypto_set_async_dispatch ((u8) mp->mode, mp->adaptive ? 1 : 0);
+
+ REPLY_MACRO (VL_API_CRYPTO_SET_ASYNC_DISPATCH_V2_REPLY);
+}
+
+static void
vl_api_crypto_set_handler_t_handler (vl_api_crypto_set_handler_t * mp)
{
vl_api_crypto_set_handler_reply_t *rmp;
diff --git a/src/vnet/crypto/node.c b/src/vnet/crypto/node.c
index e753f1ad1db..ee7f344ce68 100644
--- a/src/vnet/crypto/node.c
+++ b/src/vnet/crypto/node.c
@@ -135,8 +135,11 @@ crypto_dequeue_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
vnet_crypto_async_free_frame (vm, cf);
}
/* signal enqueue-thread to dequeue the processed frame (n_elts>0) */
- if (cm->dispatch_mode == VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT
- && n_elts > 0)
+ if (n_elts > 0 &&
+ ((node->state == VLIB_NODE_STATE_POLLING &&
+ (node->flags &
+ VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE)) ||
+ node->state == VLIB_NODE_STATE_INTERRUPT))
{
vlib_node_set_interrupt_pending (
vlib_get_main_by_index (enqueue_thread_idx),
@@ -158,27 +161,35 @@ VLIB_NODE_FN (crypto_dispatch_node) (vlib_main_t * vm,
{
vnet_crypto_main_t *cm = &crypto_main;
vnet_crypto_thread_t *ct = cm->threads + vm->thread_index;
- u32 n_dispatched = 0, n_cache = 0;
- u32 index;
-
- /* *INDENT-OFF* */
- clib_bitmap_foreach (index, cm->async_active_ids) {
- n_cache = crypto_dequeue_frame (vm, node, ct, cm->dequeue_handlers[index],
- n_cache, &n_dispatched);
- }
- /* *INDENT-ON* */
+ u32 n_dispatched = 0, n_cache = 0, index;
+ vec_foreach_index (index, cm->dequeue_handlers)
+ {
+ n_cache = crypto_dequeue_frame (
+ vm, node, ct, cm->dequeue_handlers[index], n_cache, &n_dispatched);
+ }
if (n_cache)
vlib_buffer_enqueue_to_next_vec (vm, node, &ct->buffer_indices, &ct->nexts,
n_cache);
+ /* if there are still pending tasks and node in interrupt mode,
+ sending current thread signal to dequeue next loop */
+ if (pool_elts (ct->frame_pool) > 0 &&
+ ((node->state == VLIB_NODE_STATE_POLLING &&
+ (node->flags &
+ VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE)) ||
+ node->state == VLIB_NODE_STATE_INTERRUPT))
+ {
+ vlib_node_set_interrupt_pending (vm, node->node_index);
+ }
+
return n_dispatched;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (crypto_dispatch_node) = {
.name = "crypto-dispatch",
.type = VLIB_NODE_TYPE_INPUT,
- .state = VLIB_NODE_STATE_DISABLED,
+ .flags = VLIB_NODE_FLAG_ADAPTIVE_MODE,
+ .state = VLIB_NODE_STATE_INTERRUPT,
.format_trace = format_crypto_dispatch_trace,
.n_errors = ARRAY_LEN(vnet_crypto_async_error_strings),
@@ -192,7 +203,6 @@ VLIB_REGISTER_NODE (crypto_dispatch_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/dev/api.c b/src/vnet/dev/api.c
new file mode 100644
index 00000000000..114b63d6662
--- /dev/null
+++ b/src/vnet/dev/api.c
@@ -0,0 +1,275 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/pool.h"
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+#include <vnet/dev/api.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "api",
+};
+
+static int
+_vnet_dev_queue_size_validate (u32 size, vnet_dev_queue_config_t c)
+{
+ if (size < c.min_size)
+ return 0;
+ if (size > c.max_size)
+ return 0;
+ if (c.size_is_power_of_two && count_set_bits (size) != 1)
+ return 0;
+ if (c.multiplier && size % c.multiplier)
+ return 0;
+
+ return 1;
+}
+
+vnet_dev_rv_t
+vnet_dev_api_attach (vlib_main_t *vm, vnet_dev_api_attach_args_t *args)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_t *dev = 0;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ vnet_dev_bus_t *bus;
+ vnet_dev_driver_t *driver;
+ void *bus_dev_info = 0;
+ u8 *dev_desc = 0;
+
+ log_debug (0, "%s driver %s flags '%U' args '%v'", args->device_id,
+ args->driver_name, format_vnet_dev_flags, &args->flags,
+ args->args);
+
+ if (vnet_dev_by_id (args->device_id))
+ return VNET_DEV_ERR_ALREADY_IN_USE;
+
+ bus = vnet_dev_find_device_bus (vm, args->device_id);
+ if (!bus)
+ {
+ log_err (dev, "unknown bus");
+ rv = VNET_DEV_ERR_INVALID_BUS;
+ goto done;
+ }
+
+ bus_dev_info = vnet_dev_get_device_info (vm, args->device_id);
+ if (!bus_dev_info)
+ {
+ log_err (dev, "invalid or unsupported device id");
+ rv = VNET_DEV_ERR_INVALID_DEVICE_ID;
+ goto done;
+ }
+
+ vec_foreach (driver, dm->drivers)
+ {
+ if (args->driver_name[0] &&
+ strcmp (args->driver_name, driver->registration->name))
+ continue;
+ if (driver->ops.probe &&
+ (dev_desc = driver->ops.probe (vm, bus->index, bus_dev_info)))
+ break;
+ }
+
+ if (!dev_desc)
+ {
+ log_err (dev, "driver not available for %s", args->device_id);
+ rv = VNET_DEV_ERR_DRIVER_NOT_AVAILABLE;
+ goto done;
+ }
+
+ dev = vnet_dev_alloc (vm, args->device_id, driver);
+ if (!dev)
+ {
+ log_err (dev, "dev alloc failed for %s", args->device_id);
+ rv = VNET_DEV_ERR_BUG;
+ goto done;
+ }
+ dev->description = dev_desc;
+
+ if (driver->registration->args)
+ for (vnet_dev_arg_t *a = driver->registration->args;
+ a->type != VNET_DEV_ARG_END; a++)
+ vec_add1 (dev->args, *a);
+
+ if (args->args)
+ {
+ if ((rv = vnet_dev_arg_parse (vm, dev, dev->args, args->args)) !=
+ VNET_DEV_OK)
+ goto done;
+ }
+
+ if ((args->flags.e & VNET_DEV_F_NO_STATS) == 0)
+ dev->poll_stats = 1;
+
+ log_debug (0, "found '%v'", dev->description);
+
+ rv = vnet_dev_process_call_op (vm, dev, vnet_dev_init);
+
+done:
+ if (bus_dev_info)
+ bus->ops.free_device_info (vm, bus_dev_info);
+
+ if (rv != VNET_DEV_OK && dev)
+ vnet_dev_process_call_op_no_rv (vm, dev, vnet_dev_free);
+ else if (dev)
+ args->dev_index = dev->index;
+
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_api_detach (vlib_main_t *vm, vnet_dev_api_detach_args_t *args)
+{
+ vnet_dev_t *dev = vnet_dev_by_index (args->dev_index);
+
+ log_debug (dev, "detach");
+
+ if (dev)
+ return vnet_dev_process_call_op_no_rv (vm, dev, vnet_dev_detach);
+
+ return VNET_DEV_ERR_NOT_FOUND;
+}
+
+vnet_dev_rv_t
+vnet_dev_api_reset (vlib_main_t *vm, vnet_dev_api_reset_args_t *args)
+{
+ vnet_dev_t *dev = vnet_dev_by_id (args->device_id);
+
+ log_debug (dev, "detach");
+
+ if (!dev)
+ return VNET_DEV_ERR_NOT_FOUND;
+
+ if (dev->ops.reset)
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+
+ return vnet_dev_process_call_op (vm, dev, vnet_dev_reset);
+}
+
+vnet_dev_rv_t
+vnet_dev_api_create_port_if (vlib_main_t *vm,
+ vnet_dev_api_create_port_if_args_t *args)
+{
+ vnet_dev_t *dev = vnet_dev_by_index (args->dev_index);
+ vnet_dev_port_t *port = 0;
+ u16 n_threads = vlib_get_n_threads ();
+ int default_is_intr_mode;
+ vnet_dev_rv_t rv;
+
+ log_debug (dev,
+ "create_port_if: dev_index %u port %u intf_name '%s' num_rx_q %u "
+ "num_tx_q %u rx_q_sz %u tx_q_sz %u, flags '%U' args '%v'",
+ args->dev_index, args->port_id, args->intf_name,
+ args->num_rx_queues, args->num_tx_queues, args->rx_queue_size,
+ args->tx_queue_size, format_vnet_dev_port_flags, &args->flags,
+ args->args);
+
+ if (dev == 0)
+ return VNET_DEV_ERR_NOT_FOUND;
+
+ foreach_vnet_dev_port (p, dev)
+ if (p->port_id == args->port_id)
+ {
+ port = p;
+ break;
+ }
+
+ if (!port)
+ return VNET_DEV_ERR_INVALID_DEVICE_ID;
+
+ if (port->interface_created)
+ return VNET_DEV_ERR_ALREADY_EXISTS;
+
+ if (args->args)
+ {
+ rv = vnet_dev_arg_parse (vm, dev, port->args, args->args);
+ if (rv != VNET_DEV_OK)
+ return rv;
+ }
+
+ default_is_intr_mode = (args->flags.e & VNET_DEV_PORT_F_INTERRUPT_MODE) != 0;
+ if (default_is_intr_mode && port->attr.caps.interrupt_mode == 0)
+ {
+ log_err (dev, "interrupt mode requested and port doesn't support it");
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+
+ if (args->num_rx_queues)
+ {
+ if (args->num_rx_queues > port->attr.max_rx_queues)
+ return VNET_DEV_ERR_INVALID_NUM_RX_QUEUES;
+ port->intf.num_rx_queues = args->num_rx_queues;
+ }
+ else
+ port->intf.num_rx_queues = clib_min (port->attr.max_tx_queues, 1);
+
+ if (args->num_tx_queues)
+ {
+ if (args->num_tx_queues > port->attr.max_tx_queues)
+ return VNET_DEV_ERR_INVALID_NUM_TX_QUEUES;
+ port->intf.num_tx_queues = args->num_tx_queues;
+ }
+ else
+ port->intf.num_tx_queues = clib_min (port->attr.max_tx_queues, n_threads);
+
+ if (args->rx_queue_size)
+ {
+ if (!_vnet_dev_queue_size_validate (args->rx_queue_size,
+ port->rx_queue_config))
+ return VNET_DEV_ERR_INVALID_RX_QUEUE_SIZE;
+ port->intf.rxq_sz = args->rx_queue_size;
+ }
+ else
+ port->intf.rxq_sz = port->rx_queue_config.default_size;
+
+ if (args->tx_queue_size)
+ {
+ if (!_vnet_dev_queue_size_validate (args->tx_queue_size,
+ port->tx_queue_config))
+ return VNET_DEV_ERR_INVALID_TX_QUEUE_SIZE;
+ port->intf.txq_sz = args->tx_queue_size;
+ }
+ else
+ port->intf.txq_sz = port->tx_queue_config.default_size;
+
+ clib_memcpy (port->intf.name, args->intf_name, sizeof (port->intf.name));
+ port->intf.default_is_intr_mode = default_is_intr_mode;
+
+ rv = vnet_dev_process_call_port_op (vm, port, vnet_dev_port_if_create);
+ args->sw_if_index = (rv == VNET_DEV_OK) ? port->intf.sw_if_index : ~0;
+
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_api_remove_port_if (vlib_main_t *vm,
+ vnet_dev_api_remove_port_if_args_t *args)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_sw_interface_t *si;
+ vnet_hw_interface_t *hi;
+ vnet_dev_port_t *port;
+
+ si = vnet_get_sw_interface_or_null (vnm, args->sw_if_index);
+ if (!si)
+ return VNET_DEV_ERR_UNKNOWN_INTERFACE;
+
+ hi = vnet_get_hw_interface_or_null (vnm, si->hw_if_index);
+ if (!hi)
+ return VNET_DEV_ERR_UNKNOWN_INTERFACE;
+
+ if (pool_is_free_index (dm->ports_by_dev_instance, hi->dev_instance))
+ return VNET_DEV_ERR_UNKNOWN_INTERFACE;
+
+ port = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+
+ if (port->intf.hw_if_index != si->hw_if_index)
+ return VNET_DEV_ERR_UNKNOWN_INTERFACE;
+
+ return vnet_dev_process_call_port_op (vm, port, vnet_dev_port_if_remove);
+}
diff --git a/src/vnet/dev/api.h b/src/vnet/dev/api.h
new file mode 100644
index 00000000000..1b7bf27d62a
--- /dev/null
+++ b/src/vnet/dev/api.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_API_H_
+#define _VNET_DEV_API_H_
+
+#include <vppinfra/clib.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/types.h>
+
+typedef struct
+{
+ vnet_dev_device_id_t device_id;
+ vnet_dev_driver_name_t driver_name;
+ vnet_dev_flags_t flags;
+ u8 *args;
+
+ /* return */
+ u32 dev_index;
+} vnet_dev_api_attach_args_t;
+
+vnet_dev_rv_t vnet_dev_api_attach (vlib_main_t *,
+ vnet_dev_api_attach_args_t *);
+
+typedef struct
+{
+ u32 dev_index;
+} vnet_dev_api_detach_args_t;
+vnet_dev_rv_t vnet_dev_api_detach (vlib_main_t *,
+ vnet_dev_api_detach_args_t *);
+
+typedef struct
+{
+ vnet_dev_device_id_t device_id;
+} vnet_dev_api_reset_args_t;
+vnet_dev_rv_t vnet_dev_api_reset (vlib_main_t *, vnet_dev_api_reset_args_t *);
+
+typedef struct
+{
+ u32 dev_index;
+ vnet_dev_if_name_t intf_name;
+ u16 num_rx_queues;
+ u16 num_tx_queues;
+ u16 rx_queue_size;
+ u16 tx_queue_size;
+ vnet_dev_port_id_t port_id;
+ vnet_dev_port_flags_t flags;
+ u8 *args;
+
+ /* return */
+ u32 sw_if_index;
+} vnet_dev_api_create_port_if_args_t;
+
+vnet_dev_rv_t
+vnet_dev_api_create_port_if (vlib_main_t *,
+ vnet_dev_api_create_port_if_args_t *);
+
+typedef struct
+{
+ u32 sw_if_index;
+} vnet_dev_api_remove_port_if_args_t;
+
+vnet_dev_rv_t
+vnet_dev_api_remove_port_if (vlib_main_t *,
+ vnet_dev_api_remove_port_if_args_t *);
+
+#endif /* _VNET_DEV_API_H_ */
diff --git a/src/vnet/dev/args.c b/src/vnet/dev/args.c
new file mode 100644
index 00000000000..e302517cc61
--- /dev/null
+++ b/src/vnet/dev/args.c
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/pool.h"
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+#include <vnet/dev/types.h>
+#include <vppinfra/format_table.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "args",
+};
+
+void
+vnet_dev_arg_clear_value (vnet_dev_arg_t *a)
+{
+ if (a->type == VNET_DEV_ARG_TYPE_STRING)
+ vec_free (a->val.string);
+ a->val = (typeof (a->val)){};
+ a->val_set = 0;
+}
+
+void
+vnet_dev_arg_free (vnet_dev_arg_t **vp)
+{
+ vnet_dev_arg_t *v;
+ vec_foreach (v, *vp)
+ vnet_dev_arg_clear_value (v);
+ vec_free (*vp);
+}
+
+vnet_dev_rv_t
+vnet_dev_arg_parse (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_arg_t *args,
+ u8 *str)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ unformat_input_t in;
+ u8 *name = 0;
+ u8 *err = 0;
+
+ log_debug (dev, "input '%v'", str);
+ if (args == 0)
+ return rv;
+
+ unformat_init_string (&in, (char *) str, vec_len (str));
+
+ while (unformat (&in, "%U=", unformat_token, "a-zA-Z0-9_", &name))
+ {
+ vnet_dev_arg_t *a = args;
+ vec_add1 (name, 0);
+ while (a < vec_end (args))
+ if (strcmp (a->name, (char *) name) == 0)
+ break;
+ else
+ a++;
+
+ if (a->type == VNET_DEV_ARG_TYPE_BOOL)
+ {
+
+ if (unformat (&in, "true") || unformat (&in, "1") ||
+ unformat (&in, "on") || unformat (&in, "yes"))
+ a->val.boolean = 1;
+ else if (unformat (&in, "false") || unformat (&in, "0") ||
+ unformat (&in, "off") || unformat (&in, "no"))
+ a->val.boolean = 0;
+ else
+ {
+ log_err (dev, "unable to parse args: %U", format_unformat_error,
+ &in);
+ err = format (
+ 0,
+ "boolean value expected ('yes', 'no', '0', '1', 'on', "
+ "'off', 'true' or 'false') for argument '%s', found '%U'",
+ a->name, format_unformat_error, &in);
+ goto done;
+ }
+ }
+ else if (a->type == VNET_DEV_ARG_TYPE_UINT32)
+ {
+ u32 val, min = 0, max = CLIB_U32_MAX;
+ if (!unformat (&in, "%u", &val))
+ {
+ err = format (0,
+ "unsigned integer in range %u - %u expected for "
+ "argument '%s', found '%U'",
+ min, max, a->name, format_unformat_error, &in);
+ goto done;
+ }
+
+ if (a->min || a->max)
+ {
+ min = a->min;
+ max = a->max;
+ }
+
+ if (val < min || val > max)
+ {
+ err = format (0,
+ "unsigned integer in range %u - %u expected for "
+ "argument '%s', found '%u'",
+ min, max, a->name, val);
+ goto done;
+ }
+ a->val.uint32 = val;
+ }
+ else if (a->type == VNET_DEV_ARG_TYPE_STRING)
+ {
+ if (!unformat (&in, "%U", unformat_double_quoted_string,
+ &a->val.string))
+ {
+ err = format (
+ 0,
+ "double quoted string expected for argument '%s', found '%U'",
+ a->name, format_unformat_error, &in);
+ goto done;
+ }
+
+ if (a->min && vec_len (a->val.string) < a->min)
+ {
+ err =
+ format (0, "string '%v' too short, must be at least %u chars",
+ a->val.string, a->min);
+ goto done;
+ }
+ if (a->max && vec_len (a->val.string) > a->max)
+ {
+ err = format (
+ 0, "string '%v' too long, must be no longer than %u chars",
+ a->val.string, a->max);
+ goto done;
+ }
+ }
+ else
+ {
+ err = format (0, "unknown argument '%s'", name);
+ goto done;
+ }
+
+ a->val_set = 1;
+ log_debug (dev, "name '%s' type %U value %U", name,
+ format_vnet_dev_arg_type, a->type, format_vnet_dev_arg_value,
+ a->type, &a->val);
+ vec_free (name);
+ unformat (&in, ",");
+ }
+
+ if (unformat_check_input (&in) != UNFORMAT_END_OF_INPUT)
+ err = format (0, "unable to parse argument name '%U'",
+ format_unformat_error, &in);
+
+done:
+ if (err)
+ {
+ vnet_dev_arg_t *a = 0;
+ log_err (dev, "%v", err);
+ vec_free (err);
+ vec_foreach (a, args)
+ vnet_dev_arg_clear_value (a);
+ rv = VNET_DEV_ERR_INVALID_ARG;
+ }
+
+ vec_free (name);
+ unformat_free (&in);
+ return rv;
+}
+
+u8 *
+format_vnet_dev_arg_type (u8 *s, va_list *args)
+{
+ vnet_dev_arg_type_t t = va_arg (*args, u32);
+ switch (t)
+ {
+#define _(n, f, val) \
+ case VNET_DEV_ARG_TYPE_##n: \
+ return format (s, #n);
+ foreach_vnet_dev_arg_type
+#undef _
+ default : ASSERT (0);
+ break;
+ }
+ return s;
+}
+
+u8 *
+format_vnet_dev_arg_value (u8 *s, va_list *args)
+{
+ vnet_dev_arg_type_t t = va_arg (*args, u32);
+ vnet_dev_arg_value_t *v = va_arg (*args, vnet_dev_arg_value_t *);
+
+ switch (t)
+ {
+#define _(n, f, value) \
+ case VNET_DEV_ARG_TYPE_##n: \
+ s = format (s, f, v->value); \
+ break;
+ foreach_vnet_dev_arg_type
+#undef _
+ default : break;
+ }
+ return s;
+}
+
+u8 *
+format_vnet_dev_args (u8 *s, va_list *va)
+{
+ vnet_dev_arg_t *a, *args = va_arg (*va, vnet_dev_arg_t *);
+ table_t t = { .no_ansi = 1 };
+
+ table_add_header_col (&t, 4, "Name", "Value", "Default", "Description");
+ table_set_cell_align (&t, -1, 0, TTAA_LEFT);
+ table_set_cell_align (&t, -1, 3, TTAA_LEFT);
+ vec_foreach (a, args)
+ {
+ int r = a - args;
+ table_format_cell (&t, r, 0, "%s", a->name);
+ if (a->val_set)
+ table_format_cell (&t, r, 1, "%U", format_vnet_dev_arg_value, a->type,
+ &a->val);
+ else
+ table_format_cell (&t, r, 1, "<not set>");
+
+ table_format_cell (&t, r, 2, "%U", format_vnet_dev_arg_value, a->type,
+ &a->default_val);
+ table_format_cell (&t, r, 3, "%s", a->desc);
+ table_set_cell_align (&t, r, 0, TTAA_LEFT);
+ table_set_cell_align (&t, r, 3, TTAA_LEFT);
+ }
+
+ s = format (s, "%U", format_table, &t);
+
+ table_free (&t);
+ return s;
+}
diff --git a/src/vnet/dev/args.h b/src/vnet/dev/args.h
new file mode 100644
index 00000000000..a256cfe8e0e
--- /dev/null
+++ b/src/vnet/dev/args.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_ARGS_H_
+#define _VNET_DEV_ARGS_H_
+
+#include <vppinfra/clib.h>
+#include <vnet/dev/errors.h>
+
+#define foreach_vnet_dev_arg_type \
+ _ (BOOL, "%u", boolean) \
+ _ (UINT32, "%u", uint32) \
+ _ (STRING, "\'%v\'", string)
+
+typedef enum
+{
+ VNET_DEV_ARG_END,
+#define _(n, f, v) VNET_DEV_ARG_TYPE_##n,
+ foreach_vnet_dev_arg_type
+#undef _
+} __clib_packed vnet_dev_arg_type_t;
+
+typedef union
+{
+ u8 boolean;
+ u32 uint32;
+ u8 *string;
+} vnet_dev_arg_value_t;
+
+typedef struct
+{
+ char *name;
+ char *desc;
+ vnet_dev_arg_type_t type;
+ u8 val_set;
+ u32 min;
+ u32 max;
+ u64 id;
+ vnet_dev_arg_value_t val;
+ vnet_dev_arg_value_t default_val;
+} vnet_dev_arg_t;
+
+#define VNET_DEV_ARG_BOOL(ud, n, d, ...) \
+ { \
+ .type = VNET_DEV_ARG_TYPE_BOOL, .id = ud, .name = n, .desc = d, \
+ __VA_ARGS__ \
+ }
+#define VNET_DEV_ARG_UINT32(ud, n, d, ...) \
+ { \
+ .type = VNET_DEV_ARG_TYPE_UINT32, .id = ud, .name = n, .desc = d, \
+ __VA_ARGS__ \
+ }
+#define VNET_DEV_ARG_STRING(ud, n, d, ...) \
+ { \
+ .type = VNET_DEV_ARG_TYPE_STRING, .id = ud, .name = n, .desc = d, \
+ __VA_ARGS__ \
+ }
+#define VNET_DEV_ARG_END() \
+ { \
+ .type = VNET_DEV_ARG_END \
+ }
+
+#define VNET_DEV_ARGS(...) \
+ (vnet_dev_arg_t[]) { __VA_ARGS__, VNET_DEV_ARG_END () }
+
+#define foreach_vnet_dev_args(a, d) \
+ for (typeof ((d)->args[0]) *(a) = (d)->args; (a) < vec_end ((d)->args); \
+ (a)++)
+#define foreach_vnet_dev_port_args(a, p) \
+ for (typeof ((p)->args[0]) *(a) = (p)->args; (a) < vec_end ((p)->args); \
+ (a)++)
+
+#endif /* _VNET_DEV_ARGS_H_ */
diff --git a/src/vnet/dev/cli.c b/src/vnet/dev/cli.c
new file mode 100644
index 00000000000..53be4483183
--- /dev/null
+++ b/src/vnet/dev/cli.c
@@ -0,0 +1,331 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/api.h>
+
+static clib_error_t *
+device_attach_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_api_attach_args_t a = {};
+ vnet_dev_rv_t rv;
+
+ if (!unformat_user (input, unformat_c_string_array, a.device_id,
+ sizeof (a.device_id)))
+ return clib_error_return (0, "please specify valid device id");
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (!a.driver_name[0] &&
+ unformat (input, "driver %U", unformat_c_string_array, a.driver_name,
+ sizeof (a.driver_name)))
+ ;
+ else if (!a.flags.n &&
+ unformat (input, "flags %U", unformat_vnet_dev_flags, &a.flags))
+ ;
+ else if (!a.args && unformat (input, "args %v", &a.args))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ rv = vnet_dev_api_attach (vm, &a);
+
+ vec_free (a.args);
+
+ if (rv != VNET_DEV_OK)
+ return clib_error_return (0, "unable to attach '%s': %U", a.device_id,
+ format_vnet_dev_rv, rv);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (device_attach_cmd, static) = {
+ .path = "device attach",
+ .short_help = "device attach <device-id> [driver <name>] "
+ "[args <dev-args>]",
+ .function = device_attach_cmd_fn,
+};
+
+static clib_error_t *
+device_detach_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_rv_t rv;
+ vnet_dev_device_id_t device_id = {};
+ vnet_dev_t *dev;
+
+ if (!unformat_user (input, unformat_c_string_array, device_id,
+ sizeof (device_id)))
+ return clib_error_return (0, "please specify valid device id");
+
+ dev = vnet_dev_by_id (device_id);
+
+ if (dev)
+ {
+ vnet_dev_api_detach_args_t a = { .dev_index = dev->index };
+ rv = vnet_dev_api_detach (vm, &a);
+ }
+ else
+ rv = VNET_DEV_ERR_UNKNOWN_DEVICE;
+
+ if (rv != VNET_DEV_OK)
+ return clib_error_return (0, "unable to detach '%s': %U", device_id,
+ format_vnet_dev_rv, rv);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (device_detach_cmd, static) = {
+ .path = "device detach",
+ .short_help = "device detach <device-id>",
+ .function = device_detach_cmd_fn,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+device_reset_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_api_reset_args_t a = {};
+ vnet_dev_rv_t rv;
+
+ if (!unformat_user (input, unformat_c_string_array, a.device_id,
+ sizeof (a.device_id)))
+ return clib_error_return (0, "please specify valid device id");
+
+ rv = vnet_dev_api_reset (vm, &a);
+
+ if (rv != VNET_DEV_OK)
+ return clib_error_return (0, "unable to reset '%s': %U", a.device_id,
+ format_vnet_dev_rv, rv);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (device_reset_cmd, static) = {
+ .path = "device reset",
+ .short_help = "device reset <device-id>",
+ .function = device_reset_cmd_fn,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+device_create_if_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_api_create_port_if_args_t a = {};
+ vnet_dev_rv_t rv;
+ vnet_dev_device_id_t device_id = {};
+ vnet_dev_t *dev = 0;
+ u32 n;
+
+ if (unformat_user (input, unformat_c_string_array, device_id,
+ sizeof (device_id)))
+ dev = vnet_dev_by_id (device_id);
+
+ if (!dev)
+ return clib_error_return (0, "please specify valid device id");
+
+ a.dev_index = dev->index;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (!a.intf_name[0] &&
+ unformat (input, "if-name %U", unformat_c_string_array, a.intf_name,
+ sizeof (a.intf_name)))
+ ;
+ else if (!a.port_id && unformat (input, "port %u", &n))
+ a.port_id = n;
+ else if (!a.flags.n && unformat (input, "flags %U",
+ unformat_vnet_dev_port_flags, &a.flags))
+ ;
+ else if (!a.num_rx_queues && unformat (input, "num-rx-queues %u", &n))
+ a.num_rx_queues = n;
+ else if (!a.num_tx_queues && unformat (input, "num-tx-queues %u", &n))
+ a.num_tx_queues = n;
+ else if (!a.rx_queue_size && unformat (input, "rx-queues-size %u", &n))
+ a.rx_queue_size = n;
+ else if (!a.tx_queue_size && unformat (input, "tx-queues-size %u", &n))
+ a.tx_queue_size = n;
+ else if (!a.intf_name[0] &&
+ unformat (input, "name %U", unformat_c_string_array,
+ &a.intf_name, sizeof (a.intf_name)))
+ ;
+ else if (!a.args && unformat (input, "args %v", &a.args))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ rv = vnet_dev_api_create_port_if (vm, &a);
+
+ vec_free (a.args);
+
+ if (rv != VNET_DEV_OK)
+ return clib_error_return (0, "unable to create_if '%s': %U", device_id,
+ format_vnet_dev_rv, rv);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (device_create_if_cmd, static) = {
+ .path = "device create-interface",
+ .short_help = "device create-interface <device-id> [port <port-id>] "
+ "[args <iface-args>]",
+ .function = device_create_if_cmd_fn,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+device_remove_if_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_api_remove_port_if_args_t a = { .sw_if_index = ~0 };
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_dev_rv_t rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_vnet_sw_interface, vnm,
+ &a.sw_if_index))
+ ;
+ else if (unformat (input, "sw-if-index %u", &a.sw_if_index))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (a.sw_if_index == ~0)
+ return clib_error_return (0, "please specify existing interface name");
+
+ rv = vnet_dev_api_remove_port_if (vm, &a);
+
+ if (rv != VNET_DEV_OK)
+ return clib_error_return (0, "unable to remove interface: %U",
+ format_vnet_dev_rv, rv);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (device_remove_if_cmd, static) = {
+ .path = "device remove-interface",
+ .short_help = "device remove-interface [<interface-name> | sw-if-index <n>]",
+ .function = device_remove_if_cmd_fn,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+show_devices_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_format_args_t fa = {}, *a = &fa;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "counters"))
+ fa.counters = 1;
+ else if (unformat (input, "all"))
+ fa.show_zero_counters = 1;
+ else if (unformat (input, "debug"))
+ fa.debug = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ pool_foreach_pointer (dev, dm->devices)
+ {
+ vlib_cli_output (vm, "device '%s':", dev->device_id);
+ vlib_cli_output (vm, " %U", format_vnet_dev_info, a, dev);
+ foreach_vnet_dev_port (p, dev)
+ {
+ vlib_cli_output (vm, " Port %u:", p->port_id);
+ vlib_cli_output (vm, " %U", format_vnet_dev_port_info, a, p);
+ if (fa.counters)
+ vlib_cli_output (vm, " %U", format_vnet_dev_counters, a,
+ p->counter_main);
+
+ foreach_vnet_dev_port_rx_queue (q, p)
+ {
+ vlib_cli_output (vm, " RX queue %u:", q->queue_id);
+ vlib_cli_output (vm, " %U", format_vnet_dev_rx_queue_info,
+ a, q);
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, p)
+ {
+ vlib_cli_output (vm, " TX queue %u:", q->queue_id);
+ vlib_cli_output (vm, " %U", format_vnet_dev_tx_queue_info,
+ a, q);
+ }
+ }
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_devices_cmd, static) = {
+ .path = "show device",
+ .short_help = "show device [counters]",
+ .function = show_devices_cmd_fn,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+show_device_counters_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_format_args_t fa = { .counters = 1 };
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "all"))
+ fa.show_zero_counters = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ pool_foreach_pointer (dev, dm->devices)
+ {
+ vlib_cli_output (vm, "device '%s':", dev->device_id);
+ foreach_vnet_dev_port (p, dev)
+ {
+ vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa,
+ p->counter_main);
+
+ foreach_vnet_dev_port_rx_queue (q, p)
+ if (q->counter_main)
+ {
+ vlib_cli_output (vm, " RX queue %u:", q->queue_id);
+ vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa,
+ q->counter_main);
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, p)
+ if (q->counter_main)
+ {
+ vlib_cli_output (vm, " TX queue %u:", q->queue_id);
+ vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa,
+ q->counter_main);
+ }
+ }
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_device_counters_cmd, static) = {
+ .path = "show device counters",
+ .short_help = "show device counters [all]",
+ .function = show_device_counters_cmd_fn,
+ .is_mp_safe = 1,
+};
diff --git a/src/vnet/dev/config.c b/src/vnet/dev/config.c
new file mode 100644
index 00000000000..8883e727ac2
--- /dev/null
+++ b/src/vnet/dev/config.c
@@ -0,0 +1,196 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/error.h"
+#include "vppinfra/pool.h"
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/api.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "config",
+};
+
+static clib_error_t *
+vnet_dev_config_one_interface (vlib_main_t *vm, unformat_input_t *input,
+ vnet_dev_api_create_port_if_args_t *args)
+{
+ clib_error_t *err = 0;
+
+ log_debug (0, "port %u %U", args->port_id, format_unformat_input, input);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ u32 n;
+
+ if (unformat (input, "name %U", unformat_c_string_array, args->intf_name,
+ sizeof (args->intf_name)))
+ ;
+ else if (unformat (input, "num-rx-queues %u", &n))
+ args->num_rx_queues = n;
+ else if (unformat (input, "num-tx-queues %u", &n))
+ args->num_tx_queues = n;
+ else if (unformat (input, "rx-queue-size %u", &n))
+ args->rx_queue_size = n;
+ else if (unformat (input, "tx-queue-size %u", &n))
+ args->tx_queue_size = n;
+ else if (unformat (input, "flags %U", unformat_vnet_dev_port_flags,
+ &args->flags))
+ ;
+ else if (unformat (input, "args %U", unformat_single_quoted_string,
+ &args->args))
+ ;
+ else
+ {
+ err = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ break;
+ }
+ }
+ return err;
+}
+static clib_error_t *
+vnet_dev_config_one_device (vlib_main_t *vm, unformat_input_t *input,
+ char *device_id)
+{
+ log_debug (0, "device %s %U", device_id, format_unformat_input, input);
+ clib_error_t *err = 0;
+ vnet_dev_api_attach_args_t args = {};
+ vnet_dev_api_create_port_if_args_t *if_args_vec = 0, *if_args;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ unformat_input_t sub_input;
+ u32 n;
+
+ if (unformat (input, "driver %U", unformat_c_string_array,
+ args.driver_name, sizeof (args.driver_name)))
+ ;
+ else if (unformat (input, "flags %U", unformat_vnet_dev_flags,
+ &args.flags))
+ ;
+ else if (unformat (input, "args %U", unformat_single_quoted_string,
+ &args.args))
+ ;
+ else if (unformat (input, "port %u %U", &n, unformat_vlib_cli_sub_input,
+ &sub_input))
+ {
+ vnet_dev_api_create_port_if_args_t *if_args;
+ vec_add2 (if_args_vec, if_args, 1);
+ if_args->port_id = n;
+ err = vnet_dev_config_one_interface (vm, &sub_input, if_args);
+ unformat_free (&sub_input);
+ if (err)
+ break;
+ }
+ else
+ {
+ err = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ break;
+ }
+ }
+
+ if (err == 0)
+ {
+ vnet_dev_rv_t rv;
+
+ clib_memcpy (args.device_id, device_id, sizeof (args.device_id));
+ rv = vnet_dev_api_attach (vm, &args);
+ vec_free (args.args);
+
+ if (rv == VNET_DEV_OK)
+ {
+ vec_foreach (if_args, if_args_vec)
+ {
+ if_args->dev_index = args.dev_index;
+ rv = vnet_dev_api_create_port_if (vm, if_args);
+ if (rv != VNET_DEV_OK)
+ break;
+ }
+ }
+
+ if (rv != VNET_DEV_OK)
+ err = clib_error_return (0, "error: %U for device '%s'",
+ format_vnet_dev_rv, rv, device_id);
+ }
+
+ vec_free (if_args_vec);
+ return err;
+}
+
+uword
+dev_config_process_node_fn (vlib_main_t *vm, vlib_node_runtime_t *rt,
+ vlib_frame_t *f)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ unformat_input_t input;
+ clib_error_t *err = 0;
+
+ if (dm->startup_config == 0)
+ return 0;
+
+ unformat_init_vector (&input, dm->startup_config);
+ dm->startup_config = 0;
+
+ while (!err && unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
+ {
+ unformat_input_t sub_input;
+ vnet_dev_device_id_t device_id;
+ if (unformat (&input, "dev %U %U", unformat_c_string_array, device_id,
+ sizeof (device_id), unformat_vlib_cli_sub_input,
+ &sub_input))
+ {
+ err = vnet_dev_config_one_device (vm, &sub_input, device_id);
+ unformat_free (&sub_input);
+ }
+ else if (unformat (&input, "dev %U", unformat_c_string_array, device_id,
+ sizeof (device_id)))
+ {
+ unformat_input_t no_input = {};
+ unformat_init_vector (&no_input, 0);
+ err = vnet_dev_config_one_device (vm, &no_input, device_id);
+ unformat_free (&no_input);
+ }
+ else
+ err = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, &input);
+ }
+
+ unformat_free (&input);
+
+ if (err)
+ {
+ log_err (0, "%U", format_clib_error, err);
+ clib_error_free (err);
+ }
+
+ vlib_node_set_state (vm, rt->node_index, VLIB_NODE_STATE_DISABLED);
+ vlib_node_rename (vm, rt->node_index, "deleted-%u", rt->node_index);
+ vec_add1 (dm->free_process_node_indices, rt->node_index);
+ return 0;
+}
+
+VLIB_REGISTER_NODE (dev_config_process_node) = {
+ .function = dev_config_process_node_fn,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "dev-config",
+};
+
+static clib_error_t *
+devices_config (vlib_main_t *vm, unformat_input_t *input)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ uword c;
+
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ vec_add1 (dm->startup_config, c);
+
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (devices_config, "devices");
diff --git a/src/vnet/dev/counters.c b/src/vnet/dev/counters.c
new file mode 100644
index 00000000000..d02839d664f
--- /dev/null
+++ b/src/vnet/dev/counters.c
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+#include <vnet/interface/rx_queue_funcs.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "counters",
+};
+
+vnet_dev_counter_main_t *
+vnet_dev_counters_alloc (vlib_main_t *vm, vnet_dev_counter_t *counters,
+ u16 n_counters, char *fmt, ...)
+{
+ vnet_dev_counter_t *c;
+ vnet_dev_counter_main_t *cm;
+ u32 alloc_sz;
+
+ alloc_sz = sizeof (*cm) + n_counters * sizeof (*c);
+ cm = clib_mem_alloc_aligned (alloc_sz, CLIB_CACHE_LINE_BYTES);
+ clib_memset (cm, 0, sizeof (*cm));
+ cm->n_counters = n_counters;
+
+ if (fmt && strlen (fmt))
+ {
+ va_list va;
+ va_start (va, fmt);
+ cm->desc = va_format (0, fmt, &va);
+ va_end (va);
+ }
+
+ for (u32 i = 0; i < n_counters; i++)
+ {
+ cm->counters[i] = counters[i];
+ cm->counters[i].index = i;
+ }
+
+ vec_validate_aligned (cm->counter_data, n_counters - 1,
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (cm->counter_start, n_counters - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ return cm;
+}
+
+void
+vnet_dev_counters_clear (vlib_main_t *vm, vnet_dev_counter_main_t *cm)
+{
+ for (int i = 0; i < cm->n_counters; i++)
+ {
+ cm->counter_start[i] += cm->counter_data[i];
+ cm->counter_data[i] = 0;
+ }
+}
+
+void
+vnet_dev_counters_free (vlib_main_t *vm, vnet_dev_counter_main_t *cm)
+{
+ vec_free (cm->desc);
+ vec_free (cm->counter_data);
+ vec_free (cm->counter_start);
+ clib_mem_free (cm);
+}
+
+u8 *
+format_vnet_dev_counter_name (u8 *s, va_list *va)
+{
+ vnet_dev_counter_t *c = va_arg (*va, vnet_dev_counter_t *);
+
+ char *std_counters[] = {
+ [VNET_DEV_CTR_TYPE_RX_BYTES] = "total bytes received",
+ [VNET_DEV_CTR_TYPE_TX_BYTES] = "total bytes transmitted",
+ [VNET_DEV_CTR_TYPE_RX_PACKETS] = "total packets received",
+ [VNET_DEV_CTR_TYPE_TX_PACKETS] = "total packets transmitted",
+ [VNET_DEV_CTR_TYPE_RX_DROPS] = "total drops received",
+ [VNET_DEV_CTR_TYPE_TX_DROPS] = "total drops transmitted",
+ };
+
+ char *directions[] = {
+ [VNET_DEV_CTR_DIR_RX] = "received",
+ [VNET_DEV_CTR_DIR_TX] = "sent",
+ };
+ char *units[] = {
+ [VNET_DEV_CTR_UNIT_BYTES] = "bytes",
+ [VNET_DEV_CTR_UNIT_PACKETS] = "packets",
+ };
+
+ if (c->type == VNET_DEV_CTR_TYPE_VENDOR)
+ {
+ s = format (s, "%s", c->name);
+
+ if (c->unit < ARRAY_LEN (units) && units[c->unit])
+ s = format (s, " %s", units[c->unit]);
+
+ if (c->dir < ARRAY_LEN (directions) && directions[c->dir])
+ s = format (s, " %s", directions[c->dir]);
+ }
+ else if (c->type < ARRAY_LEN (std_counters) && std_counters[c->type])
+ s = format (s, "%s", std_counters[c->type]);
+ else
+ ASSERT (0);
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_counters (u8 *s, va_list *va)
+{
+ vnet_dev_format_args_t *a = va_arg (*va, vnet_dev_format_args_t *);
+ vnet_dev_counter_main_t *cm = va_arg (*va, vnet_dev_counter_main_t *);
+ u32 line = 0, indent = format_get_indent (s);
+
+ foreach_vnet_dev_counter (c, cm)
+ {
+ if (a->show_zero_counters == 0 && cm->counter_data[c->index] == 0)
+ continue;
+
+ if (line++)
+ s = format (s, "\n%U", format_white_space, indent);
+
+ s = format (s, "%-45U%lu", format_vnet_dev_counter_name, c,
+ cm->counter_data[c->index]);
+ }
+
+ return s;
+}
diff --git a/src/vnet/dev/counters.h b/src/vnet/dev/counters.h
new file mode 100644
index 00000000000..33d08ffbecd
--- /dev/null
+++ b/src/vnet/dev/counters.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_COUNTERS_H_
+#define _VNET_DEV_COUNTERS_H_
+
+#include <vnet/dev/dev.h>
+
+typedef enum
+{
+ VNET_DEV_CTR_DIR_NA,
+ VNET_DEV_CTR_DIR_RX,
+ VNET_DEV_CTR_DIR_TX,
+} __clib_packed vnet_dev_counter_direction_t;
+
+typedef enum
+{
+ VNET_DEV_CTR_TYPE_RX_BYTES,
+ VNET_DEV_CTR_TYPE_RX_PACKETS,
+ VNET_DEV_CTR_TYPE_RX_DROPS,
+ VNET_DEV_CTR_TYPE_TX_BYTES,
+ VNET_DEV_CTR_TYPE_TX_PACKETS,
+ VNET_DEV_CTR_TYPE_TX_DROPS,
+ VNET_DEV_CTR_TYPE_VENDOR,
+} __clib_packed vnet_dev_counter_type_t;
+
+typedef enum
+{
+ VNET_DEV_CTR_UNIT_NA,
+ VNET_DEV_CTR_UNIT_BYTES,
+ VNET_DEV_CTR_UNIT_PACKETS,
+} __clib_packed vnet_dev_counter_unit_t;
+
+typedef struct vnet_dev_counter
+{
+ char name[24];
+ uword user_data;
+ vnet_dev_counter_type_t type;
+ vnet_dev_counter_direction_t dir;
+ vnet_dev_counter_unit_t unit;
+ u16 index;
+} vnet_dev_counter_t;
+
+typedef struct vnet_dev_counter_main
+{
+ u8 *desc;
+ u64 *counter_data;
+ u64 *counter_start;
+ u16 n_counters;
+ vnet_dev_counter_t counters[];
+} vnet_dev_counter_main_t;
+
+#define VNET_DEV_CTR_RX_BYTES(p, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_RX_BYTES, .dir = VNET_DEV_CTR_DIR_RX, \
+ .unit = VNET_DEV_CTR_UNIT_BYTES, .user_data = (p), __VA_ARGS__ \
+ }
+#define VNET_DEV_CTR_TX_BYTES(p, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_TX_BYTES, .dir = VNET_DEV_CTR_DIR_TX, \
+ .unit = VNET_DEV_CTR_UNIT_BYTES, .user_data = (p), __VA_ARGS__ \
+ }
+#define VNET_DEV_CTR_RX_PACKETS(p, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_RX_PACKETS, .dir = VNET_DEV_CTR_DIR_RX, \
+ .unit = VNET_DEV_CTR_UNIT_PACKETS, .user_data = (p), __VA_ARGS__ \
+ }
+#define VNET_DEV_CTR_TX_PACKETS(p, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_TX_PACKETS, .dir = VNET_DEV_CTR_DIR_TX, \
+ .unit = VNET_DEV_CTR_UNIT_PACKETS, .user_data = (p), __VA_ARGS__ \
+ }
+#define VNET_DEV_CTR_RX_DROPS(p, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_RX_DROPS, .dir = VNET_DEV_CTR_DIR_RX, \
+ .unit = VNET_DEV_CTR_UNIT_PACKETS, .user_data = (p), __VA_ARGS__ \
+ }
+#define VNET_DEV_CTR_TX_DROPS(p, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_TX_DROPS, .dir = VNET_DEV_CTR_DIR_TX, \
+ .unit = VNET_DEV_CTR_UNIT_PACKETS, .user_data = (p), __VA_ARGS__ \
+ }
+#define VNET_DEV_CTR_VENDOR(p, d, u, n, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_VENDOR, .user_data = (p), .name = n, \
+ .dir = VNET_DEV_CTR_DIR_##d, .unit = VNET_DEV_CTR_UNIT_##u, __VA_ARGS__ \
+ }
+
+vnet_dev_counter_main_t *vnet_dev_counters_alloc (vlib_main_t *,
+ vnet_dev_counter_t *, u16,
+ char *, ...);
+void vnet_dev_counters_clear (vlib_main_t *, vnet_dev_counter_main_t *);
+void vnet_dev_counters_free (vlib_main_t *, vnet_dev_counter_main_t *);
+
+format_function_t format_vnet_dev_counters;
+format_function_t format_vnet_dev_counters_all;
+
+static_always_inline vnet_dev_counter_main_t *
+vnet_dev_counter_get_main (vnet_dev_counter_t *counter)
+{
+ return (vnet_dev_counter_main_t *) ((u8 *) (counter - counter->index) -
+ STRUCT_OFFSET_OF (
+ vnet_dev_counter_main_t, counters));
+}
+
+static_always_inline void
+vnet_dev_counter_value_add (vlib_main_t *vm, vnet_dev_counter_t *counter,
+ u64 val)
+{
+ vnet_dev_counter_main_t *cm = vnet_dev_counter_get_main (counter);
+ cm->counter_data[counter->index] += val;
+}
+
+static_always_inline void
+vnet_dev_counter_value_update (vlib_main_t *vm, vnet_dev_counter_t *counter,
+ u64 val)
+{
+ vnet_dev_counter_main_t *cm = vnet_dev_counter_get_main (counter);
+ cm->counter_data[counter->index] = val - cm->counter_start[counter->index];
+}
+
+#define foreach_vnet_dev_counter(c, cm) \
+ if (cm) \
+ for (typeof (*(cm)->counters) *(c) = (cm)->counters; \
+ (c) < (cm)->counters + (cm)->n_counters; (c)++)
+
+#endif /* _VNET_DEV_COUNTERS_H_ */
diff --git a/src/vnet/dev/dev.api b/src/vnet/dev/dev.api
new file mode 100644
index 00000000000..552b778949b
--- /dev/null
+++ b/src/vnet/dev/dev.api
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+option version = "0.0.1";
+
+enumflag dev_flags : u32
+{
+ VL_API_DEV_FLAG_NO_STATS = 0x1,
+};
+
+enumflag dev_port_flags : u32
+{
+ VL_API_DEV_PORT_FLAG_INTERRUPT_MODE = 0x1,
+};
+
+autoendian define dev_attach
+{
+ u32 client_index;
+ u32 context;
+ string device_id[48];
+ string driver_name[16];
+ vl_api_dev_flags_t flags;
+ string args[];
+};
+
+autoendian define dev_attach_reply
+{
+ u32 context;
+ u32 dev_index;
+ i32 retval;
+ string error_string[];
+};
+
+autoendian define dev_detach
+{
+ u32 client_index;
+ u32 context;
+ u32 dev_index;
+};
+
+autoendian define dev_detach_reply
+{
+ u32 context;
+ i32 retval;
+ string error_string[];
+};
+
+autoendian define dev_create_port_if
+{
+ u32 client_index;
+ u32 context;
+ u32 dev_index;
+ string intf_name[32];
+ u16 num_rx_queues;
+ u16 num_tx_queues;
+ u16 rx_queue_size;
+ u16 tx_queue_size;
+ u16 port_id;
+ vl_api_dev_port_flags_t flags;
+ string args[];
+};
+
+autoendian define dev_create_port_if_reply
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ i32 retval;
+ string error_string[];
+};
+
+autoendian define dev_remove_port_if
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+};
+
+autoendian define dev_remove_port_if_reply
+{
+ u32 context;
+ i32 retval;
+ string error_string[];
+};
+
diff --git a/src/vnet/dev/dev.c b/src/vnet/dev/dev.c
new file mode 100644
index 00000000000..e04fa161ce2
--- /dev/null
+++ b/src/vnet/dev/dev.c
@@ -0,0 +1,461 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/pool.h"
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/log.h>
+#include <vnet/dev/counters.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+};
+
+vnet_dev_main_t vnet_dev_main = { .next_rx_queue_thread = 1 };
+
+vnet_dev_bus_t *
+vnet_dev_find_device_bus (vlib_main_t *vm, vnet_dev_device_id_t id)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_bus_t *bus;
+
+ pool_foreach (bus, dm->buses)
+ {
+ int n = strlen (bus->registration->name);
+ int l = strlen (id);
+ int dl = strlen (VNET_DEV_DEVICE_ID_PREFIX_DELIMITER);
+
+ if (l <= n + dl)
+ continue;
+
+ if (strncmp (bus->registration->name, id, n))
+ continue;
+
+ if (strncmp (VNET_DEV_DEVICE_ID_PREFIX_DELIMITER, id + n, dl))
+ continue;
+
+ return bus;
+ }
+
+ return 0;
+}
+
+void *
+vnet_dev_get_device_info (vlib_main_t *vm, vnet_dev_device_id_t id)
+{
+ vnet_dev_bus_t *bus;
+
+ bus = vnet_dev_find_device_bus (vm, id);
+ if (bus == 0)
+ return 0;
+
+ return bus->ops.get_device_info (vm, id);
+}
+
+vnet_dev_t *
+vnet_dev_alloc (vlib_main_t *vm, vnet_dev_device_id_t id,
+ vnet_dev_driver_t *driver)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_t *dev = 0, **devp = 0;
+
+ dev = vnet_dev_alloc_with_data (sizeof (vnet_dev_t),
+ driver->registration->device_data_sz);
+
+ pool_get (dm->devices, devp);
+ devp[0] = dev;
+ dev->index = devp - dm->devices;
+ dev->driver_index = driver->index;
+ dev->ops = driver->registration->ops;
+ dev->bus_index = driver->bus_index;
+ clib_memcpy (dev->device_id, id, sizeof (dev->device_id));
+ hash_set (dm->device_index_by_id, dev->device_id, dev->index);
+
+ if ((vnet_dev_process_create (vm, dev)) == VNET_DEV_OK)
+ return dev;
+
+ vnet_dev_free (vm, dev);
+ return 0;
+}
+
+vnet_dev_rv_t
+vnet_dev_init (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_bus_t *bus = pool_elt_at_index (dm->buses, dev->bus_index);
+ vnet_dev_rv_t rv;
+
+ vnet_dev_validate (vm, dev);
+
+ if ((rv = bus->ops.device_open (vm, dev)) != VNET_DEV_OK)
+ return rv;
+
+ if (dev->ops.alloc)
+ {
+ rv = dev->ops.alloc (vm, dev);
+ if (rv != VNET_DEV_OK)
+ {
+ log_err (dev, "device init failed [rv %d]", rv);
+ if (dev->ops.deinit)
+ dev->ops.deinit (vm, dev);
+ if (dev->ops.free)
+ dev->ops.free (vm, dev);
+ return rv;
+ }
+ }
+
+ if ((rv = dev->ops.init (vm, dev)) != VNET_DEV_OK)
+ {
+ log_err (dev, "device init failed [rv %d]", rv);
+ if (dev->ops.deinit)
+ dev->ops.deinit (vm, dev);
+ if (dev->ops.free)
+ dev->ops.free (vm, dev);
+ return rv;
+ }
+
+ dev->initialized = 1;
+ dev->not_first_init = 1;
+ return VNET_DEV_OK;
+}
+
+void
+vnet_dev_deinit (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ASSERT (dev->initialized == 1);
+ vnet_dev_bus_t *bus;
+
+ vnet_dev_validate (vm, dev);
+
+ foreach_vnet_dev_port (p, dev)
+ ASSERT (p->interface_created == 0);
+
+ if (dev->ops.deinit)
+ dev->ops.deinit (vm, dev);
+
+ bus = vnet_dev_get_bus (dev);
+ if (bus->ops.device_close)
+ bus->ops.device_close (vm, dev);
+
+ vnet_dev_process_quit (vm, dev);
+
+ dev->initialized = 0;
+}
+
+void
+vnet_dev_free (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+
+ vnet_dev_validate (vm, dev);
+
+ ASSERT (dev->initialized == 0);
+
+ foreach_vnet_dev_port (p, dev)
+ vnet_dev_port_free (vm, p);
+
+ vec_free (dev->description);
+ pool_free (dev->ports);
+ pool_free (dev->periodic_ops);
+ hash_unset (dm->device_index_by_id, dev->device_id);
+ vnet_dev_arg_free (&dev->args);
+ pool_put_index (dm->devices, dev->index);
+}
+
+vnet_dev_rv_t
+vnet_dev_reset (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vnet_dev_rv_t rv;
+
+ ASSERT (dev->initialized == 1);
+ vnet_dev_validate (vm, dev);
+
+ if (dev->ops.reset == 0)
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+
+ if ((rv = dev->ops.reset (vm, dev)) != VNET_DEV_OK)
+ {
+ log_err (dev, "device reset failed [rv %d]", rv);
+ return rv;
+ }
+
+ return VNET_DEV_OK;
+}
+
+void
+vnet_dev_detach (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ foreach_vnet_dev_port (p, dev)
+ if (p->interface_created)
+ vnet_dev_port_if_remove (vm, p);
+ vnet_dev_deinit (vm, dev);
+ vnet_dev_free (vm, dev);
+}
+
+vnet_dev_rv_t
+vnet_dev_dma_mem_alloc (vlib_main_t *vm, vnet_dev_t *dev, u32 size, u32 align,
+ void **pp)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_bus_t *bus = pool_elt_at_index (dm->buses, dev->bus_index);
+ vnet_dev_rv_t rv;
+
+ vnet_dev_validate (vm, dev);
+
+ if (!bus->ops.dma_mem_alloc_fn)
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+
+ rv = bus->ops.dma_mem_alloc_fn (vm, dev, size, align, pp);
+ if (rv == VNET_DEV_OK)
+ log_debug (dev, "%u bytes va %p dma-addr 0x%lx numa %u align %u", size,
+ *pp, vnet_dev_get_dma_addr (vm, dev, *pp), dev->numa_node,
+ align);
+ return rv;
+}
+
+void
+vnet_dev_dma_mem_free (vlib_main_t *vm, vnet_dev_t *dev, void *p)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_bus_t *bus = pool_elt_at_index (dm->buses, dev->bus_index);
+
+ vnet_dev_validate (vm, dev);
+
+ if (p == 0 || !bus->ops.dma_mem_free_fn)
+ return;
+
+ return bus->ops.dma_mem_free_fn (vm, dev, p);
+}
+
+clib_error_t *
+vnet_dev_admin_up_down_fn (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
+{
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ u32 is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+
+ if (is_up && p->started == 0)
+ rv = vnet_dev_process_call_port_op (vm, p, vnet_dev_port_start);
+ else if (!is_up && p->started)
+ rv = vnet_dev_process_call_port_op_no_rv (vm, p, vnet_dev_port_stop);
+
+ if (rv != VNET_DEV_OK)
+ return clib_error_return (0, "failed to change port admin state: %U",
+ format_vnet_dev_rv, rv);
+
+ return 0;
+}
+
+static void
+vnet_dev_feature_update_cb (u32 sw_if_index, u8 arc_index, u8 is_enable,
+ void *cb)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_feature_main_t *fm = &feature_main;
+ vnet_feature_config_main_t *cm;
+ vnet_dev_main_t *vdm = &vnet_dev_main;
+ vnet_dev_port_t *port;
+ vnet_hw_interface_t *hw;
+ u32 current_config_index = ~0;
+ u32 next_index = ~0;
+ int update_runtime = 0;
+
+ if (arc_index != vdm->eth_port_rx_feature_arc_index)
+ return;
+
+ hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ port = vnet_dev_get_port_from_dev_instance (hw->dev_instance);
+
+ if (port == 0 || port->intf.sw_if_index != sw_if_index)
+ return;
+
+ if (vnet_have_features (arc_index, sw_if_index))
+ {
+ cm = &fm->feature_config_mains[arc_index];
+ current_config_index =
+ vec_elt (cm->config_index_by_sw_if_index, sw_if_index);
+ vnet_get_config_data (&cm->config_main, &current_config_index,
+ &next_index, 0);
+ if (port->intf.feature_arc == 0 ||
+ port->intf.rx_next_index != next_index ||
+ port->intf.current_config_index != current_config_index)
+ {
+ port->intf.current_config_index = current_config_index;
+ port->intf.rx_next_index = next_index;
+ port->intf.feature_arc_index = arc_index;
+ port->intf.feature_arc = 1;
+ update_runtime = 1;
+ }
+ }
+ else
+ {
+ if (port->intf.feature_arc)
+ {
+ port->intf.current_config_index = 0;
+ port->intf.rx_next_index =
+ port->intf.redirect_to_node ?
+ port->intf.redirect_to_node_next_index :
+ vnet_dev_default_next_index_by_port_type[port->attr.type];
+ port->intf.feature_arc_index = 0;
+ port->intf.feature_arc = 0;
+ update_runtime = 1;
+ }
+ }
+
+ if (update_runtime)
+ {
+ foreach_vnet_dev_port_rx_queue (rxq, port)
+ vnet_dev_rx_queue_rt_request (
+ vm, rxq,
+ (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1,
+ .update_feature_arc = 1 });
+ log_debug (port->dev, "runtime update requested due to chgange in "
+ "feature arc configuration");
+ }
+}
+
+static int
+sort_driver_registrations (void *a0, void *a1)
+{
+ vnet_dev_driver_registration_t **r0 = a0;
+ vnet_dev_driver_registration_t **r1 = a1;
+
+ if (r0[0]->priority > r1[0]->priority)
+ return -1;
+ else if (r0[0]->priority < r1[0]->priority)
+ return 1;
+
+ return 0;
+}
+
+static clib_error_t *
+vnet_dev_main_init (vlib_main_t *vm)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_driver_registration_t **drv = 0;
+ u32 temp_space_sz = 0;
+
+ dm->device_index_by_id = hash_create_string (0, sizeof (uword));
+
+ for (vnet_dev_bus_registration_t *r = dm->bus_registrations; r;
+ r = r->next_registration)
+ {
+ vnet_dev_bus_t *bus;
+ pool_get_zero (dm->buses, bus);
+ bus->registration = r;
+ bus->index = bus - dm->buses;
+ bus->ops = r->ops;
+ if (!r->device_data_size ||
+ r->device_data_size > STRUCT_SIZE_OF (vnet_dev_t, bus_data))
+ return clib_error_return (
+ 0, "bus device data for bus '%s' is too big not specified", r->name);
+
+ log_debug (0, "bus '%s' registered", r->name);
+ }
+
+ for (vnet_dev_driver_registration_t *r = dm->driver_registrations; r;
+ r = r->next_registration)
+ vec_add1 (drv, r);
+
+ vec_sort_with_function (drv, sort_driver_registrations);
+
+ vec_foreach_pointer (r, drv)
+ {
+ vnet_dev_driver_t *driver;
+ vnet_dev_bus_t *bus;
+ vnet_device_class_t *dev_class;
+ int bus_index = -1;
+
+ pool_foreach (bus, dm->buses)
+ {
+ if (strcmp (bus->registration->name, r->bus) == 0)
+ {
+ bus_index = bus->index;
+ break;
+ }
+ }
+
+ if (bus_index < 0)
+ return clib_error_return (0, "unknown bus '%s'", r->bus);
+
+ pool_get_zero (dm->drivers, driver);
+ driver->registration = r;
+ driver->index = driver - dm->drivers;
+ driver->bus_index = bus_index;
+ driver->ops = r->ops;
+ dev_class = clib_mem_alloc (sizeof (vnet_device_class_t));
+ *dev_class = (vnet_device_class_t){
+ .name = r->name,
+ .format_device_name = format_vnet_dev_interface_name,
+ .format_device = format_vnet_dev_interface_info,
+ .admin_up_down_function = vnet_dev_admin_up_down_fn,
+ .rx_redirect_to_node = vnet_dev_set_interface_next_node,
+ .clear_counters = vnet_dev_clear_hw_interface_counters,
+ .mac_addr_change_function = vnet_dev_port_mac_change,
+ .mac_addr_add_del_function = vnet_dev_add_del_mac_address,
+ .flow_ops_function = vnet_dev_flow_ops_fn,
+ .format_flow = format_vnet_dev_flow,
+ .set_rss_queues_function = vnet_dev_interface_set_rss_queues,
+ };
+ driver->dev_class_index = vnet_register_device_class (vm, dev_class);
+ log_debug (0, "driver '%s' registered on bus '%s'", r->name,
+ bus->registration->name);
+
+ if (temp_space_sz < r->runtime_temp_space_sz)
+ temp_space_sz = r->runtime_temp_space_sz;
+ }
+
+ if (dm->startup_config)
+ log_debug (0, "startup config: %v", dm->startup_config);
+
+ vec_free (drv);
+
+ if (temp_space_sz > 0)
+ {
+ const u32 align = CLIB_CACHE_LINE_BYTES;
+ u32 sz = round_pow2 (temp_space_sz, align);
+ dm->log2_runtime_temp_space_sz =
+ get_lowest_set_bit_index (max_pow2 (sz));
+ sz = 1 << dm->log2_runtime_temp_space_sz;
+ sz *= vlib_get_n_threads ();
+ dm->runtime_temp_spaces = clib_mem_alloc_aligned (sz, align);
+ clib_memset (dm->runtime_temp_spaces, 0, sz);
+ log_debug (0,
+ "requested %u bytes for runtime temp storage, allocated %u "
+ "per thread (total %u)",
+ temp_space_sz, 1 << dm->log2_runtime_temp_space_sz, sz);
+ }
+
+ vnet_feature_register (vnet_dev_feature_update_cb, 0);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (vnet_dev_main_init);
+
+clib_error_t *
+vnet_dev_num_workers_change (vlib_main_t *vm)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+
+ if (dm->log2_runtime_temp_space_sz > 0)
+ {
+ const u32 align = CLIB_CACHE_LINE_BYTES;
+ uword sz =
+ (1ULL << dm->log2_runtime_temp_space_sz) * vlib_get_n_threads ();
+ if (dm->runtime_temp_spaces)
+ clib_mem_free (dm->runtime_temp_spaces);
+ dm->runtime_temp_spaces = clib_mem_alloc_aligned (sz, align);
+ clib_memset (dm->runtime_temp_spaces, 0, sz);
+ log_debug (0, "runtime temp storage resized to %u", sz);
+ }
+
+ return 0;
+}
+
+VLIB_NUM_WORKERS_CHANGE_FN (vnet_dev_num_workers_change);
diff --git a/src/vnet/dev/dev.h b/src/vnet/dev/dev.h
new file mode 100644
index 00000000000..bbf2f9dff21
--- /dev/null
+++ b/src/vnet/dev/dev.h
@@ -0,0 +1,753 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_H_
+#define _VNET_DEV_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/format.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/types.h>
+#include <vnet/dev/args.h>
+
+#define VNET_DEV_DEVICE_ID_PREFIX_DELIMITER "/"
+
+#define foreach_vnet_dev_port_type \
+ _ (0, UNKNOWN) \
+ _ (1, ETHERNET)
+
+typedef enum
+{
+#define _(b, n) VNET_DEV_PORT_TYPE_##n = (1U << (b)),
+ foreach_vnet_dev_port_type
+#undef _
+} vnet_dev_port_type_t;
+
+#define foreach_vnet_dev_port_caps \
+ _ (interrupt_mode) \
+ _ (rss) \
+ _ (change_max_rx_frame_size) \
+ _ (mac_filter)
+
+#define foreach_vnet_dev_port_rx_offloads _ (ip4_cksum)
+
+#define foreach_vnet_dev_port_tx_offloads \
+ _ (ip4_cksum) \
+ _ (tcp_gso) \
+ _ (udp_gso)
+
+typedef union
+{
+ struct
+ {
+#define _(n) u8 n : 1;
+ foreach_vnet_dev_port_caps
+#undef _
+ };
+ u8 as_number;
+} vnet_dev_port_caps_t;
+
+typedef union
+{
+ struct
+ {
+#define _(n) u8 n : 1;
+ foreach_vnet_dev_port_rx_offloads
+#undef _
+ };
+ u8 as_number;
+} vnet_dev_port_rx_offloads_t;
+
+typedef union
+{
+ struct
+ {
+#define _(n) u8 n : 1;
+ foreach_vnet_dev_port_tx_offloads
+#undef _
+ };
+ u8 as_number;
+} vnet_dev_port_tx_offloads_t;
+
+typedef union
+{
+ u8 eth_mac[6];
+ u8 raw[8];
+} vnet_dev_hw_addr_t;
+
+typedef struct vnet_dev_bus_registration vnet_dev_bus_registration_t;
+typedef struct vnet_dev_driver_registration vnet_dev_driver_registration_t;
+
+typedef struct vnet_dev vnet_dev_t;
+typedef struct vnet_dev_port vnet_dev_port_t;
+typedef struct vnet_dev_rx_queue vnet_dev_rx_queue_t;
+typedef struct vnet_dev_tx_queue vnet_dev_tx_queue_t;
+typedef struct vnet_dev_bus_registration vnet_dev_bus_registration_t;
+typedef struct vnet_dev_driver_registration vnet_dev_driver_registration_t;
+typedef struct vnet_dev_counter vnet_dev_counter_t;
+typedef struct vnet_dev_counter_main vnet_dev_counter_main_t;
+typedef struct vnet_dev_port_cfg_change_req vnet_dev_port_cfg_change_req_t;
+
+typedef vnet_dev_rv_t (vnet_dev_op_t) (vlib_main_t *, vnet_dev_t *);
+typedef vnet_dev_rv_t (vnet_dev_port_op_t) (vlib_main_t *, vnet_dev_port_t *);
+typedef vnet_dev_rv_t (vnet_dev_port_cfg_change_op_t) (
+ vlib_main_t *, vnet_dev_port_t *, vnet_dev_port_cfg_change_req_t *);
+typedef vnet_dev_rv_t (vnet_dev_rx_queue_op_t) (vlib_main_t *,
+ vnet_dev_rx_queue_t *);
+typedef vnet_dev_rv_t (vnet_dev_tx_queue_op_t) (vlib_main_t *,
+ vnet_dev_tx_queue_t *);
+typedef void (vnet_dev_op_no_rv_t) (vlib_main_t *, vnet_dev_t *);
+typedef void (vnet_dev_port_op_no_rv_t) (vlib_main_t *, vnet_dev_port_t *);
+typedef void (vnet_dev_rx_queue_op_no_rv_t) (vlib_main_t *,
+ vnet_dev_rx_queue_t *);
+typedef void (vnet_dev_tx_queue_op_no_rv_t) (vlib_main_t *,
+ vnet_dev_tx_queue_t *);
+
+typedef u16 vnet_dev_queue_id_t;
+typedef u16 vnet_dev_bus_index_t;
+typedef u16 vnet_dev_driver_index_t;
+
+typedef struct
+{
+ vnet_dev_rx_queue_op_t *alloc;
+ vnet_dev_rx_queue_op_t *start;
+ vnet_dev_rx_queue_op_no_rv_t *stop;
+ vnet_dev_rx_queue_op_no_rv_t *free;
+ format_function_t *format_info;
+} vnet_dev_rx_queue_ops_t;
+
+typedef struct
+{
+ vnet_dev_tx_queue_op_t *alloc;
+ vnet_dev_tx_queue_op_t *start;
+ vnet_dev_tx_queue_op_no_rv_t *stop;
+ vnet_dev_tx_queue_op_no_rv_t *free;
+ format_function_t *format_info;
+} vnet_dev_tx_queue_ops_t;
+
+typedef struct
+{
+ u16 data_size;
+ u16 min_size;
+ u16 max_size;
+ u16 default_size;
+ u8 multiplier;
+ u8 size_is_power_of_two : 1;
+} vnet_dev_queue_config_t;
+
+#define foreach_vnet_dev_port_cfg_type \
+ _ (PROMISC_MODE) \
+ _ (MAX_RX_FRAME_SIZE) \
+ _ (CHANGE_PRIMARY_HW_ADDR) \
+ _ (ADD_SECONDARY_HW_ADDR) \
+ _ (REMOVE_SECONDARY_HW_ADDR) \
+ _ (RXQ_INTR_MODE_ENABLE) \
+ _ (RXQ_INTR_MODE_DISABLE) \
+ _ (ADD_RX_FLOW) \
+ _ (DEL_RX_FLOW) \
+ _ (GET_RX_FLOW_COUNTER) \
+ _ (RESET_RX_FLOW_COUNTER)
+
+typedef enum
+{
+ VNET_DEV_PORT_CFG_UNKNOWN,
+#define _(n) VNET_DEV_PORT_CFG_##n,
+ foreach_vnet_dev_port_cfg_type
+#undef _
+} __clib_packed vnet_dev_port_cfg_type_t;
+
+typedef struct vnet_dev_port_cfg_change_req
+{
+ vnet_dev_port_cfg_type_t type;
+ u8 validated : 1;
+ u8 all_queues : 1;
+
+ union
+ {
+ u8 promisc : 1;
+ vnet_dev_hw_addr_t addr;
+ u16 max_rx_frame_size;
+ vnet_dev_queue_id_t queue_id;
+ struct
+ {
+ u32 flow_index;
+ uword *private_data;
+ };
+ };
+
+} vnet_dev_port_cfg_change_req_t;
+
+typedef struct
+{
+ vnet_dev_hw_addr_t hw_addr;
+ u16 max_rx_queues;
+ u16 max_tx_queues;
+ u16 max_supported_rx_frame_size;
+ vnet_dev_port_type_t type;
+ vnet_dev_port_caps_t caps;
+ vnet_dev_port_rx_offloads_t rx_offloads;
+ vnet_dev_port_tx_offloads_t tx_offloads;
+} vnet_dev_port_attr_t;
+
+typedef enum
+{
+ VNET_DEV_PERIODIC_OP_TYPE_DEV = 1,
+ VNET_DEV_PERIODIC_OP_TYPE_PORT = 2,
+} __clib_packed vnet_dev_periodic_op_type_t;
+
+typedef struct
+{
+ f64 interval;
+ f64 last_run;
+ vnet_dev_periodic_op_type_t type;
+ union
+ {
+ vnet_dev_t *dev;
+ vnet_dev_port_t *port;
+ void *arg;
+ };
+ union
+ {
+ vnet_dev_op_no_rv_t *dev_op;
+ vnet_dev_port_op_no_rv_t *port_op;
+ void *op;
+ };
+} vnet_dev_periodic_op_t;
+
+typedef struct
+{
+ struct _vlib_node_fn_registration *registrations;
+ format_function_t *format_trace;
+ vlib_error_desc_t *error_counters;
+ u16 n_error_counters;
+} vnet_dev_node_t;
+
+typedef struct
+{
+ vnet_dev_op_t *alloc;
+ vnet_dev_op_t *init;
+ vnet_dev_op_no_rv_t *deinit;
+ vnet_dev_op_t *reset;
+ vnet_dev_op_no_rv_t *free;
+ u8 *(*probe) (vlib_main_t *, vnet_dev_bus_index_t, void *);
+ format_function_t *format_info;
+} vnet_dev_ops_t;
+
+typedef struct
+{
+ vnet_dev_port_op_t *alloc;
+ vnet_dev_port_op_t *init;
+ vnet_dev_port_cfg_change_op_t *config_change;
+ vnet_dev_port_cfg_change_op_t *config_change_validate;
+ vnet_dev_port_op_t *start;
+ vnet_dev_port_op_no_rv_t *stop;
+ vnet_dev_port_op_no_rv_t *deinit;
+ vnet_dev_port_op_no_rv_t *free;
+ format_function_t *format_status;
+ format_function_t *format_flow;
+} vnet_dev_port_ops_t;
+
+typedef union
+{
+ struct
+ {
+ u8 update_next_index : 1;
+ u8 update_feature_arc : 1;
+ u8 suspend_off : 1;
+ u8 suspend_on : 1;
+ };
+ u8 as_number;
+} vnet_dev_rx_queue_rt_req_t;
+
+typedef struct vnet_dev_rx_queue
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ vnet_dev_port_t *port;
+ u16 rx_thread_index;
+ u16 index;
+ vnet_dev_counter_main_t *counter_main;
+ CLIB_CACHE_LINE_ALIGN_MARK (runtime0);
+ vnet_dev_rx_queue_t *next_on_thread;
+ u8 interrupt_mode : 1;
+ u8 enabled : 1;
+ u8 started : 1;
+ u8 suspended : 1;
+ vnet_dev_queue_id_t queue_id;
+ u16 size;
+ u16 next_index;
+ vnet_dev_rx_queue_rt_req_t runtime_request;
+ CLIB_CACHE_LINE_ALIGN_MARK (runtime1);
+ vlib_buffer_template_t buffer_template;
+ CLIB_CACHE_LINE_ALIGN_MARK (driver_data);
+ u8 data[];
+} vnet_dev_rx_queue_t;
+
+STATIC_ASSERT_SIZEOF (vnet_dev_rx_queue_t, 3 * CLIB_CACHE_LINE_BYTES);
+
+typedef struct vnet_dev_tx_queue
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ vnet_dev_port_t *port;
+ clib_bitmap_t *assigned_threads;
+ u16 index;
+ vnet_dev_counter_main_t *counter_main;
+ CLIB_CACHE_LINE_ALIGN_MARK (runtime0);
+ vnet_dev_queue_id_t queue_id;
+ u8 started : 1;
+ u8 enabled : 1;
+ u8 lock_needed : 1;
+ u8 lock;
+ u16 size;
+ CLIB_ALIGN_MARK (private_data, 16);
+ u8 data[];
+} vnet_dev_tx_queue_t;
+
+STATIC_ASSERT_SIZEOF (vnet_dev_tx_queue_t, 2 * CLIB_CACHE_LINE_BYTES);
+
+typedef struct vnet_dev_port
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ vnet_dev_t *dev;
+ vnet_dev_port_id_t port_id;
+ vnet_dev_driver_index_t driver_index;
+ u8 initialized : 1;
+ u8 started : 1;
+ u8 link_up : 1;
+ u8 promisc : 1;
+ u8 interface_created : 1;
+ u8 rx_node_assigned : 1;
+ vnet_dev_counter_main_t *counter_main;
+ vnet_dev_queue_config_t rx_queue_config;
+ vnet_dev_queue_config_t tx_queue_config;
+ vnet_dev_port_attr_t attr;
+ u32 max_rx_frame_size;
+ vnet_dev_hw_addr_t primary_hw_addr;
+ vnet_dev_hw_addr_t *secondary_hw_addr;
+ u32 index;
+ u32 speed;
+ vnet_dev_rx_queue_t **rx_queues;
+ vnet_dev_tx_queue_t **tx_queues;
+ vnet_dev_port_ops_t port_ops;
+ vnet_dev_arg_t *args;
+ vnet_dev_rx_queue_ops_t rx_queue_ops;
+ vnet_dev_tx_queue_ops_t tx_queue_ops;
+ vnet_dev_node_t rx_node;
+ vnet_dev_node_t tx_node;
+
+ struct
+ {
+ vnet_dev_if_name_t name;
+ u32 dev_instance;
+ u32 rx_node_index;
+ u32 current_config_index;
+ u16 rx_next_index;
+ u16 redirect_to_node_next_index;
+ u8 feature_arc_index;
+ u8 feature_arc : 1;
+ u8 redirect_to_node : 1;
+ u8 default_is_intr_mode : 1;
+ u32 tx_node_index;
+ u32 hw_if_index;
+ u32 sw_if_index;
+ u16 num_rx_queues;
+ u16 num_tx_queues;
+ u16 txq_sz;
+ u16 rxq_sz;
+ } intf;
+
+ CLIB_CACHE_LINE_ALIGN_MARK (data0);
+ u8 data[];
+} vnet_dev_port_t;
+
+typedef struct vnet_dev
+{
+ vnet_dev_device_id_t device_id;
+ u16 initialized : 1;
+ u16 not_first_init : 1;
+ u16 va_dma : 1;
+ u16 process_node_quit : 1;
+ u16 process_node_periodic : 1;
+ u16 poll_stats : 1;
+ u16 bus_index;
+ u8 numa_node;
+ u16 max_rx_queues;
+ u16 max_tx_queues;
+ vnet_dev_driver_index_t driver_index;
+ u32 index;
+ u32 process_node_index;
+ u8 bus_data[32] __clib_aligned (16);
+ vnet_dev_ops_t ops;
+ vnet_dev_port_t **ports;
+ vnet_dev_periodic_op_t *periodic_ops;
+ u8 *description;
+ vnet_dev_arg_t *args;
+ u8 __clib_aligned (16)
+ data[];
+} vnet_dev_t;
+
+typedef struct
+{
+ u16 vendor_id, device_id;
+ char *description;
+} vnet_dev_match_t;
+
+#define VNET_DEV_MATCH(...) \
+ (vnet_dev_match_t[]) \
+ { \
+ __VA_ARGS__, {} \
+ }
+
+typedef struct
+{
+ vnet_dev_op_t *device_open;
+ vnet_dev_op_no_rv_t *device_close;
+ vnet_dev_rv_t (*dma_mem_alloc_fn) (vlib_main_t *, vnet_dev_t *, u32, u32,
+ void **);
+ void (*dma_mem_free_fn) (vlib_main_t *, vnet_dev_t *, void *);
+ void *(*get_device_info) (vlib_main_t *, char *);
+ void (*free_device_info) (vlib_main_t *, void *);
+ format_function_t *format_device_info;
+ format_function_t *format_device_addr;
+} vnet_dev_bus_ops_t;
+
+struct vnet_dev_bus_registration
+{
+ vnet_dev_bus_registration_t *next_registration;
+ vnet_dev_driver_name_t name;
+ u16 device_data_size;
+ vnet_dev_bus_ops_t ops;
+};
+
+struct vnet_dev_driver_registration
+{
+ vnet_dev_driver_registration_t *next_registration;
+ u8 bus_master_enable : 1;
+ vnet_dev_driver_name_t name;
+ vnet_dev_bus_name_t bus;
+ u16 device_data_sz;
+ u16 runtime_temp_space_sz;
+ vnet_dev_match_t *match;
+ int priority;
+ vnet_dev_ops_t ops;
+ vnet_dev_arg_t *args;
+};
+
+typedef struct
+{
+ u32 index;
+ vnet_dev_bus_registration_t *registration;
+ vnet_dev_bus_ops_t ops;
+} vnet_dev_bus_t;
+
+typedef struct
+{
+ u32 index;
+ void *dev_data;
+ vnet_dev_driver_registration_t *registration;
+ u32 dev_class_index;
+ vnet_dev_bus_index_t bus_index;
+ vnet_dev_ops_t ops;
+} vnet_dev_driver_t;
+
+typedef struct
+{
+ vnet_dev_bus_t *buses;
+ vnet_dev_driver_t *drivers;
+ vnet_dev_t **devices;
+ vnet_dev_port_t **ports_by_dev_instance;
+ vnet_dev_bus_registration_t *bus_registrations;
+ vnet_dev_driver_registration_t *driver_registrations;
+ void *runtime_temp_spaces;
+ u32 log2_runtime_temp_space_sz;
+ u32 *free_process_node_indices;
+ u32 *free_rx_node_indices;
+ uword *device_index_by_id;
+
+ u8 *startup_config;
+ u16 next_rx_queue_thread;
+ u8 eth_port_rx_feature_arc_index;
+} vnet_dev_main_t;
+
+extern vnet_dev_main_t vnet_dev_main;
+
+typedef struct
+{
+ struct
+ {
+ vnet_dev_port_attr_t attr;
+ vnet_dev_port_ops_t ops;
+ vnet_dev_arg_t *args;
+ u16 data_size;
+ void *initial_data;
+ } port;
+
+ vnet_dev_node_t *rx_node;
+ vnet_dev_node_t *tx_node;
+
+ struct
+ {
+ vnet_dev_queue_config_t config;
+ vnet_dev_rx_queue_ops_t ops;
+ } rx_queue;
+
+ struct
+ {
+ vnet_dev_queue_config_t config;
+ vnet_dev_tx_queue_ops_t ops;
+ } tx_queue;
+} vnet_dev_port_add_args_t;
+
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ u8 link_speed : 1;
+ u8 link_state : 1;
+ u8 link_duplex : 1;
+ };
+ u8 any;
+ } change;
+ u8 link_state : 1;
+ u8 full_duplex : 1;
+ u32 link_speed;
+} vnet_dev_port_state_changes_t;
+
+/* args.c */
+vnet_dev_rv_t vnet_dev_arg_parse (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_arg_t *, u8 *);
+void vnet_dev_arg_free (vnet_dev_arg_t **);
+void vnet_dev_arg_clear_value (vnet_dev_arg_t *);
+format_function_t format_vnet_dev_arg_type;
+format_function_t format_vnet_dev_arg_value;
+format_function_t format_vnet_dev_args;
+
+/* dev.c */
+vnet_dev_t *vnet_dev_alloc (vlib_main_t *, vnet_dev_device_id_t,
+ vnet_dev_driver_t *);
+void vnet_dev_free (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_init (vlib_main_t *, vnet_dev_t *);
+void vnet_dev_deinit (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_reset (vlib_main_t *, vnet_dev_t *);
+void vnet_dev_detach (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_port_add (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_port_id_t,
+ vnet_dev_port_add_args_t *);
+vnet_dev_rv_t vnet_dev_dma_mem_alloc (vlib_main_t *, vnet_dev_t *, u32, u32,
+ void **);
+void vnet_dev_dma_mem_free (vlib_main_t *, vnet_dev_t *, void *);
+vnet_dev_bus_t *vnet_dev_find_device_bus (vlib_main_t *, vnet_dev_device_id_t);
+void *vnet_dev_get_device_info (vlib_main_t *, vnet_dev_device_id_t);
+
+/* error.c */
+clib_error_t *vnet_dev_port_err (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_rv_t, char *, ...);
+int vnet_dev_flow_err (vlib_main_t *, vnet_dev_rv_t);
+
+/* handlers.c */
+clib_error_t *vnet_dev_port_set_max_frame_size (vnet_main_t *,
+ vnet_hw_interface_t *, u32);
+u32 vnet_dev_port_eth_flag_change (vnet_main_t *, vnet_hw_interface_t *, u32);
+clib_error_t *vnet_dev_port_mac_change (vnet_hw_interface_t *, const u8 *,
+ const u8 *);
+clib_error_t *vnet_dev_add_del_mac_address (vnet_hw_interface_t *, const u8 *,
+ u8);
+int vnet_dev_flow_ops_fn (vnet_main_t *, vnet_flow_dev_op_t, u32, u32,
+ uword *);
+clib_error_t *vnet_dev_interface_set_rss_queues (vnet_main_t *,
+ vnet_hw_interface_t *,
+ clib_bitmap_t *);
+void vnet_dev_clear_hw_interface_counters (u32);
+void vnet_dev_set_interface_next_node (vnet_main_t *, u32, u32);
+
+/* port.c */
+vnet_dev_rv_t vnet_dev_port_start (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t vnet_dev_port_start_all_rx_queues (vlib_main_t *,
+ vnet_dev_port_t *);
+vnet_dev_rv_t vnet_dev_port_start_all_tx_queues (vlib_main_t *,
+ vnet_dev_port_t *);
+void vnet_dev_port_stop (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_deinit (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_free (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_add_counters (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_counter_t *, u16);
+void vnet_dev_port_free_counters (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_update_tx_node_runtime (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_state_change (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_state_changes_t);
+void vnet_dev_port_clear_counters (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t
+vnet_dev_port_cfg_change_req_validate (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+vnet_dev_rv_t vnet_dev_port_cfg_change (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+vnet_dev_rv_t vnet_dev_port_if_create (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t vnet_dev_port_if_remove (vlib_main_t *, vnet_dev_port_t *);
+
+/* queue.c */
+vnet_dev_rv_t vnet_dev_rx_queue_alloc (vlib_main_t *, vnet_dev_port_t *, u16);
+vnet_dev_rv_t vnet_dev_tx_queue_alloc (vlib_main_t *, vnet_dev_port_t *, u16);
+void vnet_dev_rx_queue_free (vlib_main_t *, vnet_dev_rx_queue_t *);
+void vnet_dev_tx_queue_free (vlib_main_t *, vnet_dev_tx_queue_t *);
+void vnet_dev_rx_queue_add_counters (vlib_main_t *, vnet_dev_rx_queue_t *,
+ vnet_dev_counter_t *, u16);
+void vnet_dev_rx_queue_free_counters (vlib_main_t *, vnet_dev_rx_queue_t *);
+void vnet_dev_tx_queue_add_counters (vlib_main_t *, vnet_dev_tx_queue_t *,
+ vnet_dev_counter_t *, u16);
+void vnet_dev_tx_queue_free_counters (vlib_main_t *, vnet_dev_tx_queue_t *);
+vnet_dev_rv_t vnet_dev_rx_queue_start (vlib_main_t *, vnet_dev_rx_queue_t *);
+vnet_dev_rv_t vnet_dev_tx_queue_start (vlib_main_t *, vnet_dev_tx_queue_t *);
+void vnet_dev_rx_queue_stop (vlib_main_t *, vnet_dev_rx_queue_t *);
+void vnet_dev_tx_queue_stop (vlib_main_t *, vnet_dev_tx_queue_t *);
+
+/* process.c */
+vnet_dev_rv_t vnet_dev_process_create (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_process_call_op (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_op_t *);
+vnet_dev_rv_t vnet_dev_process_call_op_no_rv (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_op_no_rv_t *);
+void vnet_dev_process_call_op_no_wait (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_op_no_rv_t *);
+vnet_dev_rv_t vnet_dev_process_call_port_op (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_op_t *);
+vnet_dev_rv_t vnet_dev_process_call_port_op_no_rv (vlib_main_t *vm,
+ vnet_dev_port_t *,
+ vnet_dev_port_op_no_rv_t *);
+void vnet_dev_process_call_port_op_no_wait (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_op_no_rv_t *);
+vnet_dev_rv_t
+vnet_dev_process_port_cfg_change_req (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+void vnet_dev_process_quit (vlib_main_t *, vnet_dev_t *);
+void vnet_dev_poll_dev_add (vlib_main_t *, vnet_dev_t *, f64,
+ vnet_dev_op_no_rv_t *);
+void vnet_dev_poll_dev_remove (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_op_no_rv_t *);
+void vnet_dev_poll_port_add (vlib_main_t *, vnet_dev_port_t *, f64,
+ vnet_dev_port_op_no_rv_t *);
+void vnet_dev_poll_port_remove (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_op_no_rv_t *);
+
+typedef struct
+{
+ u16 thread_index;
+ u8 completed;
+ u8 in_order;
+ vnet_dev_port_t *port;
+} vnet_dev_rt_op_t;
+
+vnet_dev_rv_t vnet_dev_rt_exec_ops (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_rt_op_t *, u32);
+
+/* format.c */
+typedef struct
+{
+ u8 counters : 1;
+ u8 show_zero_counters : 1;
+ u8 debug : 1;
+} vnet_dev_format_args_t;
+
+format_function_t format_vnet_dev_addr;
+format_function_t format_vnet_dev_flags;
+format_function_t format_vnet_dev_hw_addr;
+format_function_t format_vnet_dev_info;
+format_function_t format_vnet_dev_interface_info;
+format_function_t format_vnet_dev_interface_name;
+format_function_t format_vnet_dev_log;
+format_function_t format_vnet_dev_port_caps;
+format_function_t format_vnet_dev_port_flags;
+format_function_t format_vnet_dev_port_info;
+format_function_t format_vnet_dev_port_rx_offloads;
+format_function_t format_vnet_dev_port_tx_offloads;
+format_function_t format_vnet_dev_rv;
+format_function_t format_vnet_dev_rx_queue_info;
+format_function_t format_vnet_dev_tx_queue_info;
+format_function_t format_vnet_dev_flow;
+unformat_function_t unformat_vnet_dev_flags;
+unformat_function_t unformat_vnet_dev_port_flags;
+
+typedef struct
+{
+ vnet_dev_rx_queue_t *first_rx_queue;
+} vnet_dev_rx_node_runtime_t;
+
+STATIC_ASSERT (sizeof (vnet_dev_rx_node_runtime_t) <=
+ VLIB_NODE_RUNTIME_DATA_SIZE,
+ "must fit into runtime data");
+
+#define foreach_vnet_dev_port_rx_next \
+ _ (ETH_INPUT, "ethernet-input") \
+ _ (DROP, "error-drop")
+
+typedef enum
+{
+#define _(n, s) VNET_DEV_ETH_RX_PORT_NEXT_##n,
+ foreach_vnet_dev_port_rx_next
+#undef _
+ VNET_DEV_ETH_RX_PORT_N_NEXTS
+} vnet_dev_eth_port_rx_next_t;
+
+extern u16 vnet_dev_default_next_index_by_port_type[];
+extern vlib_node_registration_t port_rx_eth_node;
+
+typedef vnet_interface_output_runtime_t vnet_dev_tx_node_runtime_t;
+
+STATIC_ASSERT (sizeof (vnet_dev_tx_node_runtime_t) <=
+ VLIB_NODE_RUNTIME_DATA_SIZE,
+ "must fit into runtime data");
+
+#define VNET_DEV_REGISTER_BUS(x, ...) \
+ __VA_ARGS__ vnet_dev_bus_registration_t __vnet_dev_bus_registration_##x; \
+ static void __clib_constructor __vnet_dev_bus_registration_fn_##x (void) \
+ { \
+ vnet_dev_main_t *dm = &vnet_dev_main; \
+ __vnet_dev_bus_registration_##x.next_registration = \
+ dm->bus_registrations; \
+ dm->bus_registrations = &__vnet_dev_bus_registration_##x; \
+ } \
+ __VA_ARGS__ vnet_dev_bus_registration_t __vnet_dev_bus_registration_##x
+
+#define VNET_DEV_REGISTER_DRIVER(x, ...) \
+ __VA_ARGS__ vnet_dev_driver_registration_t \
+ __vnet_dev_driver_registration_##x; \
+ static void __clib_constructor __vnet_dev_driver_registration_fn_##x (void) \
+ { \
+ vnet_dev_main_t *dm = &vnet_dev_main; \
+ __vnet_dev_driver_registration_##x.next_registration = \
+ dm->driver_registrations; \
+ dm->driver_registrations = &__vnet_dev_driver_registration_##x; \
+ } \
+ __VA_ARGS__ vnet_dev_driver_registration_t __vnet_dev_driver_registration_##x
+
+#define VNET_DEV_NODE_FN(node) \
+ uword CLIB_MARCH_SFX (node##_fn) (vlib_main_t *, vlib_node_runtime_t *, \
+ vlib_frame_t *); \
+ static vlib_node_fn_registration_t CLIB_MARCH_SFX ( \
+ node##_fn_registration) = { \
+ .function = &CLIB_MARCH_SFX (node##_fn), \
+ }; \
+ \
+ static void __clib_constructor CLIB_MARCH_SFX ( \
+ node##_fn_multiarch_register) (void) \
+ { \
+ extern vnet_dev_node_t node; \
+ vlib_node_fn_registration_t *r; \
+ r = &CLIB_MARCH_SFX (node##_fn_registration); \
+ r->march_variant = CLIB_MARCH_SFX (CLIB_MARCH_VARIANT_TYPE); \
+ r->next_registration = (node).registrations; \
+ (node).registrations = r; \
+ } \
+ uword CLIB_MARCH_SFX (node##_fn)
+
+#define foreach_vnet_dev_port(p, d) pool_foreach_pointer (p, d->ports)
+#define foreach_vnet_dev_port_rx_queue(q, p) \
+ pool_foreach_pointer (q, p->rx_queues)
+#define foreach_vnet_dev_port_tx_queue(q, p) \
+ pool_foreach_pointer (q, p->tx_queues)
+
+#include <vnet/dev/dev_funcs.h>
+
+#endif /* _VNET_DEV_H_ */
diff --git a/src/vnet/dev/dev_api.c b/src/vnet/dev/dev_api.c
new file mode 100644
index 00000000000..5e9ac502b5d
--- /dev/null
+++ b/src/vnet/dev/dev_api.c
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/api.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+/* define message IDs */
+#include <dev/dev.api_enum.h>
+#include <dev/dev.api_types.h>
+
+static u16 vnet_dev_api_msg_id_base;
+
+#define REPLY_MSG_ID_BASE (vnet_dev_api_msg_id_base)
+#include <vlibapi/api_helper_macros.h>
+
+#define _(b, n, d) \
+ STATIC_ASSERT ((int) VL_API_DEV_FLAG_##n == (int) VNET_DEV_F_##n, "");
+foreach_vnet_dev_flag;
+#undef _
+
+#define _(b, n, d) \
+ STATIC_ASSERT ((int) VL_API_DEV_PORT_FLAG_##n == (int) VNET_DEV_PORT_F_##n, \
+ "");
+foreach_vnet_dev_port_flag;
+#undef _
+
+static void
+vl_api_dev_attach_t_handler (vl_api_dev_attach_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_dev_attach_reply_t *rmp;
+ vnet_dev_api_attach_args_t a = {};
+ vnet_dev_rv_t rv;
+ u8 *error_string = 0;
+
+ STATIC_ASSERT (sizeof (mp->device_id) == sizeof (a.device_id), "");
+ STATIC_ASSERT (sizeof (mp->driver_name) == sizeof (a.driver_name), "");
+ STATIC_ASSERT (sizeof (mp->flags) == sizeof (a.flags), "");
+
+ a.flags.n = mp->flags;
+ strncpy (a.device_id, (char *) mp->device_id, sizeof (a.device_id));
+ strncpy (a.driver_name, (char *) mp->driver_name, sizeof (a.driver_name));
+ vec_add (a.args, mp->args.buf, mp->args.length);
+
+ rv = vnet_dev_api_attach (vm, &a);
+
+ if (rv != VNET_DEV_OK)
+ error_string = format (0, "%U", format_vnet_dev_rv, rv);
+
+ vec_free (a.args);
+
+ REPLY_MACRO3_END (VL_API_DEV_ATTACH_REPLY, vec_len (error_string), ({
+ rmp->retval = rv;
+ if (error_string)
+ {
+ rmp->dev_index = ~0;
+ vl_api_vec_to_api_string (error_string,
+ &rmp->error_string);
+ }
+ else
+ rmp->dev_index = a.dev_index;
+ }));
+
+ vec_free (a.args);
+ vec_free (error_string);
+}
+
+static void
+vl_api_dev_detach_t_handler (vl_api_dev_detach_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_dev_detach_reply_t *rmp;
+ vnet_dev_api_detach_args_t a = {};
+ vnet_dev_rv_t rv;
+ u8 *error_string = 0;
+
+ a.dev_index = mp->dev_index;
+
+ rv = vnet_dev_api_detach (vm, &a);
+
+ if (rv != VNET_DEV_OK)
+ error_string = format (0, "%U", format_vnet_dev_rv, rv);
+
+ REPLY_MACRO3_END (VL_API_DEV_DETACH_REPLY, vec_len (error_string), ({
+ rmp->retval = rv;
+ if (error_string)
+ vl_api_vec_to_api_string (error_string,
+ &rmp->error_string);
+ }));
+
+ vec_free (error_string);
+}
+
+static void
+vl_api_dev_create_port_if_t_handler (vl_api_dev_create_port_if_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_dev_create_port_if_reply_t *rmp;
+ vnet_dev_api_create_port_if_args_t a = {};
+ vnet_dev_rv_t rv;
+ u8 *error_string = 0;
+
+ STATIC_ASSERT (sizeof (mp->intf_name) == sizeof (a.intf_name), "");
+ STATIC_ASSERT (sizeof (mp->flags) == sizeof (a.flags), "");
+
+ a.flags.n = mp->flags;
+#define _(n) a.n = mp->n;
+ _ (dev_index)
+ _ (port_id)
+ _ (num_rx_queues)
+ _ (num_tx_queues)
+ _ (rx_queue_size)
+ _ (tx_queue_size)
+#undef _
+
+ strncpy (a.intf_name, (char *) mp->intf_name, sizeof (a.intf_name));
+ vec_add (a.args, mp->args.buf, mp->args.length);
+
+ rv = vnet_dev_api_create_port_if (vm, &a);
+
+ if (rv != VNET_DEV_OK)
+ error_string = format (0, "%U", format_vnet_dev_rv, rv);
+
+ vec_free (a.args);
+
+ REPLY_MACRO3_END (VL_API_DEV_CREATE_PORT_IF_REPLY, vec_len (error_string), ({
+ rmp->retval = rv;
+ if (error_string)
+ {
+ rmp->sw_if_index = ~0;
+ vl_api_vec_to_api_string (error_string,
+ &rmp->error_string);
+ }
+ else
+ rmp->sw_if_index = a.sw_if_index;
+ }));
+
+ vec_free (a.args);
+ vec_free (error_string);
+}
+
+static void
+vl_api_dev_remove_port_if_t_handler (vl_api_dev_remove_port_if_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_dev_remove_port_if_reply_t *rmp;
+ vnet_dev_api_remove_port_if_args_t a = {};
+ vnet_dev_rv_t rv;
+ u8 *error_string = 0;
+
+ a.sw_if_index = mp->sw_if_index;
+
+ rv = vnet_dev_api_remove_port_if (vm, &a);
+
+ if (rv != VNET_DEV_OK)
+ error_string = format (0, "%U", format_vnet_dev_rv, rv);
+
+ REPLY_MACRO3_END (VL_API_DEV_REMOVE_PORT_IF_REPLY, vec_len (error_string), ({
+ rmp->retval = rv;
+ if (error_string)
+ vl_api_vec_to_api_string (error_string,
+ &rmp->error_string);
+ }));
+
+ vec_free (error_string);
+}
+
+/* set tup the API message handling tables */
+
+#include <dev/dev.api.c>
+
+static clib_error_t *
+vnet_dev_api_hookup (vlib_main_t *vm)
+{
+ api_main_t *am = vlibapi_get_main ();
+
+ /* ask for a correctly-sized block of API message decode slots */
+ vnet_dev_api_msg_id_base = setup_message_id_table ();
+
+ foreach_int (i, VL_API_DEV_ATTACH, VL_API_DEV_DETACH,
+ VL_API_DEV_CREATE_PORT_IF, VL_API_DEV_REMOVE_PORT_IF)
+ vl_api_set_msg_thread_safe (am, vnet_dev_api_msg_id_base + i, 1);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (vnet_dev_api_hookup);
diff --git a/src/vnet/dev/dev_funcs.h b/src/vnet/dev/dev_funcs.h
new file mode 100644
index 00000000000..521157abbec
--- /dev/null
+++ b/src/vnet/dev/dev_funcs.h
@@ -0,0 +1,332 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_FUNCS_H_
+#define _VNET_DEV_FUNCS_H_
+
+#include <vppinfra/clib.h>
+#include <vnet/dev/dev.h>
+
+static_always_inline void *
+vnet_dev_get_data (vnet_dev_t *dev)
+{
+ return dev->data;
+}
+
+static_always_inline vnet_dev_t *
+vnet_dev_from_data (void *p)
+{
+ return (void *) ((u8 *) p - STRUCT_OFFSET_OF (vnet_dev_t, data));
+}
+
+static_always_inline void *
+vnet_dev_get_port_data (vnet_dev_port_t *port)
+{
+ return port->data;
+}
+
+static_always_inline void *
+vnet_dev_get_rx_queue_data (vnet_dev_rx_queue_t *rxq)
+{
+ return rxq->data;
+}
+
+static_always_inline void *
+vnet_dev_get_tx_queue_data (vnet_dev_tx_queue_t *txq)
+{
+ return txq->data;
+}
+
+static_always_inline vnet_dev_t *
+vnet_dev_get_by_index (u32 index)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ return pool_elt_at_index (dm->devices, index)[0];
+}
+
+static_always_inline vnet_dev_port_t *
+vnet_dev_get_port_by_index (vnet_dev_t *dev, u32 index)
+{
+ return pool_elt_at_index (dev->ports, index)[0];
+}
+
+static_always_inline vnet_dev_port_t *
+vnet_dev_get_port_from_dev_instance (u32 dev_instance)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ if (pool_is_free_index (dm->ports_by_dev_instance, dev_instance))
+ return 0;
+ return pool_elt_at_index (dm->ports_by_dev_instance, dev_instance)[0];
+}
+
+static_always_inline vnet_dev_port_t *
+vnet_dev_get_port_from_hw_if_index (u32 hw_if_index)
+{
+ vnet_hw_interface_t *hw;
+ vnet_dev_port_t *port;
+ hw = vnet_get_hw_interface (vnet_get_main (), hw_if_index);
+ port = vnet_dev_get_port_from_dev_instance (hw->dev_instance);
+
+ if (!port || port->intf.hw_if_index != hw_if_index)
+ return 0;
+
+ return port;
+}
+
+static_always_inline vnet_dev_t *
+vnet_dev_by_index (u32 index)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ if (pool_is_free_index (dm->devices, index))
+ return 0;
+
+ return *pool_elt_at_index (dm->devices, index);
+}
+
+static_always_inline vnet_dev_t *
+vnet_dev_by_id (char *id)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ uword *p = hash_get (dm->device_index_by_id, id);
+ if (p)
+ return *pool_elt_at_index (dm->devices, p[0]);
+ return 0;
+}
+
+static_always_inline uword
+vnet_dev_get_dma_addr (vlib_main_t *vm, vnet_dev_t *dev, void *p)
+{
+ return dev->va_dma ? pointer_to_uword (p) : vlib_physmem_get_pa (vm, p);
+}
+
+static_always_inline void *
+vnet_dev_get_bus_data (vnet_dev_t *dev)
+{
+ return (void *) dev->bus_data;
+}
+
+static_always_inline vnet_dev_bus_t *
+vnet_dev_get_bus (vnet_dev_t *dev)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ return pool_elt_at_index (dm->buses, dev->bus_index);
+}
+
+static_always_inline void
+vnet_dev_validate (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ASSERT (dev->process_node_index == vlib_get_current_process_node_index (vm));
+ ASSERT (vm->thread_index == 0);
+}
+
+static_always_inline void
+vnet_dev_port_validate (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ ASSERT (port->dev->process_node_index ==
+ vlib_get_current_process_node_index (vm));
+ ASSERT (vm->thread_index == 0);
+}
+
+static_always_inline u32
+vnet_dev_port_get_sw_if_index (vnet_dev_port_t *port)
+{
+ return port->intf.sw_if_index;
+}
+
+static_always_inline vnet_dev_port_t *
+vnet_dev_get_port_by_id (vnet_dev_t *dev, vnet_dev_port_id_t port_id)
+{
+ foreach_vnet_dev_port (p, dev)
+ if (p->port_id == port_id)
+ return p;
+ return 0;
+}
+
+static_always_inline vnet_dev_rx_queue_t *
+vnet_dev_port_get_rx_queue_by_id (vnet_dev_port_t *port,
+ vnet_dev_queue_id_t queue_id)
+{
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if (q->queue_id == queue_id)
+ return q;
+ return 0;
+}
+
+static_always_inline vnet_dev_tx_queue_t *
+vnet_dev_port_get_tx_queue_by_id (vnet_dev_port_t *port,
+ vnet_dev_queue_id_t queue_id)
+{
+ foreach_vnet_dev_port_tx_queue (q, port)
+ if (q->queue_id == queue_id)
+ return q;
+ return 0;
+}
+
+static_always_inline void *
+vnet_dev_alloc_with_data (u32 sz, u32 data_sz)
+{
+ void *p;
+ sz += data_sz;
+ sz = round_pow2 (sz, CLIB_CACHE_LINE_BYTES);
+ p = clib_mem_alloc_aligned (sz, CLIB_CACHE_LINE_BYTES);
+ clib_memset (p, 0, sz);
+ return p;
+}
+
+static_always_inline void
+vnet_dev_tx_queue_lock_if_needed (vnet_dev_tx_queue_t *txq)
+{
+ u8 free = 0;
+
+ if (!txq->lock_needed)
+ return;
+
+ while (!__atomic_compare_exchange_n (&txq->lock, &free, 1, 0,
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
+ {
+ while (__atomic_load_n (&txq->lock, __ATOMIC_RELAXED))
+ CLIB_PAUSE ();
+ free = 0;
+ }
+}
+
+static_always_inline void
+vnet_dev_tx_queue_unlock_if_needed (vnet_dev_tx_queue_t *txq)
+{
+ if (!txq->lock_needed)
+ return;
+ __atomic_store_n (&txq->lock, 0, __ATOMIC_RELEASE);
+}
+
+static_always_inline u8
+vnet_dev_get_rx_queue_buffer_pool_index (vnet_dev_rx_queue_t *rxq)
+{
+ return rxq->buffer_template.buffer_pool_index;
+}
+
+static_always_inline u32
+vnet_dev_get_rx_queue_buffer_data_size (vlib_main_t *vm,
+ vnet_dev_rx_queue_t *rxq)
+{
+ u8 bpi = vnet_dev_get_rx_queue_buffer_pool_index (rxq);
+ return vlib_get_buffer_pool (vm, bpi)->data_size;
+}
+
+static_always_inline void
+vnet_dev_rx_queue_rt_request (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq,
+ vnet_dev_rx_queue_rt_req_t req)
+{
+ __atomic_fetch_or (&rxq->runtime_request.as_number, req.as_number,
+ __ATOMIC_RELEASE);
+}
+
+static_always_inline vnet_dev_rx_node_runtime_t *
+vnet_dev_get_rx_node_runtime (vlib_node_runtime_t *node)
+{
+ return (void *) node->runtime_data;
+}
+
+static_always_inline vnet_dev_tx_node_runtime_t *
+vnet_dev_get_tx_node_runtime (vlib_node_runtime_t *node)
+{
+ return (void *) node->runtime_data;
+}
+
+static_always_inline vnet_dev_rx_queue_t *
+foreach_vnet_dev_rx_queue_runtime_helper (vlib_node_runtime_t *node,
+ vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_port_t *port;
+ vnet_dev_rx_queue_rt_req_t req;
+
+ if (rxq == 0)
+ rxq = vnet_dev_get_rx_node_runtime (node)->first_rx_queue;
+ else
+ next:
+ rxq = rxq->next_on_thread;
+
+ if (PREDICT_FALSE (rxq == 0))
+ return 0;
+
+ if (PREDICT_TRUE (rxq->runtime_request.as_number == 0))
+ return rxq;
+
+ req.as_number =
+ __atomic_exchange_n (&rxq->runtime_request.as_number, 0, __ATOMIC_ACQUIRE);
+
+ port = rxq->port;
+ if (req.update_next_index)
+ rxq->next_index = port->intf.rx_next_index;
+
+ if (req.update_feature_arc)
+ {
+ vlib_buffer_template_t *bt = &rxq->buffer_template;
+ bt->current_config_index = port->intf.current_config_index;
+ vnet_buffer (bt)->feature_arc_index = port->intf.feature_arc_index;
+ }
+
+ if (req.suspend_on)
+ {
+ rxq->suspended = 1;
+ goto next;
+ }
+
+ if (req.suspend_off)
+ rxq->suspended = 0;
+
+ return rxq;
+}
+
+#define foreach_vnet_dev_rx_queue_runtime(q, node) \
+ for (vnet_dev_rx_queue_t * (q) = \
+ foreach_vnet_dev_rx_queue_runtime_helper (node, 0); \
+ q; (q) = foreach_vnet_dev_rx_queue_runtime_helper (node, q))
+
+static_always_inline void *
+vnet_dev_get_rt_temp_space (vlib_main_t *vm)
+{
+ return vnet_dev_main.runtime_temp_spaces +
+ ((uword) vm->thread_index
+ << vnet_dev_main.log2_runtime_temp_space_sz);
+}
+
+static_always_inline void
+vnet_dev_set_hw_addr_eth_mac (vnet_dev_hw_addr_t *addr, const u8 *eth_mac_addr)
+{
+ vnet_dev_hw_addr_t ha = {};
+ clib_memcpy_fast (&ha.eth_mac, eth_mac_addr, sizeof (ha.eth_mac));
+ *addr = ha;
+}
+
+static_always_inline vnet_dev_arg_t *
+vnet_dev_get_port_arg_by_id (vnet_dev_port_t *port, u32 id)
+{
+ foreach_vnet_dev_port_args (a, port)
+ if (a->id == id)
+ return a;
+ return 0;
+}
+
+static_always_inline int
+vnet_dev_arg_get_bool (vnet_dev_arg_t *arg)
+{
+ ASSERT (arg->type == VNET_DEV_ARG_TYPE_BOOL);
+ return arg->val_set ? arg->val.boolean : arg->default_val.boolean;
+}
+
+static_always_inline u32
+vnet_dev_arg_get_uint32 (vnet_dev_arg_t *arg)
+{
+ ASSERT (arg->type == VNET_DEV_ARG_TYPE_UINT32);
+ return arg->val_set ? arg->val.uint32 : arg->default_val.uint32;
+}
+
+static_always_inline u8 *
+vnet_dev_arg_get_string (vnet_dev_arg_t *arg)
+{
+ ASSERT (arg->type == VNET_DEV_ARG_TYPE_STRING);
+ return arg->val_set ? arg->val.string : arg->default_val.string;
+}
+
+#endif /* _VNET_DEV_FUNCS_H_ */
diff --git a/src/vnet/dev/error.c b/src/vnet/dev/error.c
new file mode 100644
index 00000000000..4e057010af0
--- /dev/null
+++ b/src/vnet/dev/error.c
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/flow/flow.h>
+
+clib_error_t *
+vnet_dev_port_err (vlib_main_t *vm, vnet_dev_port_t *port, vnet_dev_rv_t rv,
+ char *fmt, ...)
+{
+ clib_error_t *err;
+ va_list va;
+ u8 *s;
+
+ if (rv == VNET_DEV_OK)
+ return 0;
+
+ va_start (va, fmt);
+ s = va_format (0, fmt, &va);
+ va_end (va);
+
+ err = clib_error_return (0, "%s port %u: %U (%v)", port->dev->device_id,
+ port->port_id, format_vnet_dev_rv, rv, s);
+ vec_free (s);
+ return err;
+}
+
+int
+vnet_dev_flow_err (vlib_main_t *vm, vnet_dev_rv_t rv)
+{
+ if (rv == VNET_DEV_OK)
+ return 0;
+
+ switch (rv)
+ {
+ /* clang-format off */
+#define _(n, e, s) \
+ case VNET_DEV_ERR_##e: \
+ return VNET_FLOW_ERROR_##e;
+ foreach_flow_error;
+#undef _
+ /* clang-format on */
+ default:
+ ASSERT (0);
+ }
+
+ ASSERT (0);
+
+ return 0;
+}
diff --git a/src/vnet/dev/errors.h b/src/vnet/dev/errors.h
new file mode 100644
index 00000000000..430a6aef282
--- /dev/null
+++ b/src/vnet/dev/errors.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_ERRORS_H_
+#define _VNET_DEV_ERRORS_H_
+
+#define foreach_vnet_dev_rv_type \
+ _ (ALREADY_EXISTS, "already exists") \
+ _ (ALREADY_IN_USE, "already in use") \
+ _ (BUFFER_ALLOC_FAIL, "packet buffer allocation failure") \
+ _ (BUG, "bug") \
+ _ (BUS, "bus error") \
+ _ (DEVICE_NO_REPLY, "no reply from device") \
+ _ (DMA_MEM_ALLOC_FAIL, "DMA memory allocation error") \
+ _ (DRIVER_NOT_AVAILABLE, "driver not available") \
+ _ (INVALID_ARG, "invalid argument") \
+ _ (INVALID_BUS, "invalid bus") \
+ _ (INVALID_DATA, "invalid data") \
+ _ (INVALID_DEVICE_ID, "invalid device id") \
+ _ (INVALID_NUM_RX_QUEUES, "invalid number of rx queues") \
+ _ (INVALID_NUM_TX_QUEUES, "invalid number of tx queues") \
+ _ (INVALID_PORT_ID, "invalid port id") \
+ _ (INVALID_RX_QUEUE_SIZE, "invalid rx queue size") \
+ _ (INVALID_TX_QUEUE_SIZE, "invalid tx queue size") \
+ _ (INVALID_VALUE, "invalid value") \
+ _ (INTERNAL, "internal error") \
+ _ (NOT_FOUND, "not found") \
+ _ (NOT_READY, "not ready") \
+ _ (NOT_SUPPORTED, "not supported") \
+ _ (NO_CHANGE, "no change") \
+ _ (NO_AVAIL_QUEUES, "no queues available") \
+ _ (NO_SUCH_ENTRY, "no such enty") \
+ _ (PORT_STARTED, "port started") \
+ _ (PROCESS_REPLY, "dev process reply error") \
+ _ (RESOURCE_NOT_AVAILABLE, "resource not available") \
+ _ (TIMEOUT, "timeout") \
+ _ (UNKNOWN_DEVICE, "unknown device") \
+ _ (UNKNOWN_INTERFACE, "unknown interface") \
+ _ (UNSUPPORTED_CONFIG, "unsupported config") \
+ _ (UNSUPPORTED_DEVICE, "unsupported device") \
+ _ (UNSUPPORTED_DEVICE_VER, "unsupported device version") \
+ _ (ALREADY_DONE, "already done") \
+ _ (NO_SUCH_INTERFACE, "no such interface")
+
+#endif /* _VNET_DEV_ERRORS_H_ */
diff --git a/src/vnet/dev/format.c b/src/vnet/dev/format.c
new file mode 100644
index 00000000000..f599c0f8b85
--- /dev/null
+++ b/src/vnet/dev/format.c
@@ -0,0 +1,507 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/ethernet/ethernet.h>
+
+u8 *
+format_vnet_dev_rv (u8 *s, va_list *args)
+{
+ vnet_dev_rv_t rv = va_arg (*args, vnet_dev_rv_t);
+ u32 index = -rv;
+
+ char *strings[] = { [0] = "OK",
+#define _(n, d) [-VNET_DEV_ERR_##n] = d,
+ foreach_vnet_dev_rv_type
+#undef _
+ };
+
+ if (index >= ARRAY_LEN (strings))
+ return format (s, "unknown return value (%d)", rv);
+ return format (s, "%s", strings[index]);
+}
+
+u8 *
+format_vnet_dev_addr (u8 *s, va_list *args)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ vnet_dev_bus_t *bus;
+
+ if (dev == 0)
+ return 0;
+
+ bus = pool_elt_at_index (dm->buses, dev->bus_index);
+ s = format (s, "%U", bus->ops.format_device_addr, dev);
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_interface_name (u8 *s, va_list *args)
+{
+ u32 i = va_arg (*args, u32);
+ vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (i);
+
+ return format (s, "%s", port->intf.name);
+}
+
+u8 *
+format_vnet_dev_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t *a = va_arg (*args, vnet_dev_format_args_t *);
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ vnet_dev_driver_t *dr = pool_elt_at_index (dm->drivers, dev->driver_index);
+ vnet_dev_bus_t *bus = pool_elt_at_index (dm->buses, dev->bus_index);
+
+ u32 indent = format_get_indent (s);
+ s = format (s, "Driver is '%s', bus is '%s'", dr->registration->name,
+ bus->registration->name);
+
+ if (dev->description)
+ s = format (s, ", description is '%v'", dev->description);
+
+ if (bus->ops.format_device_info)
+ s = format (s, "\n%U%U", format_white_space, indent,
+ bus->ops.format_device_info, a, dev);
+
+ s = format (s, "\n%UAssigned process node is '%U'", format_white_space,
+ indent, format_vlib_node_name, vm, dev->process_node_index);
+ if (dev->args)
+ s = format (s, "\n%UDevice Specific Arguments:\n%U%U", format_white_space,
+ indent, format_white_space, indent + 2, format_vnet_dev_args,
+ dev->args);
+ if (dev->ops.format_info)
+ s =
+ format (s, "\n%UDevice Specific Info:\n%U%U", format_white_space, indent,
+ format_white_space, indent + 2, dev->ops.format_info, a, dev);
+ return s;
+}
+
+u8 *
+format_vnet_dev_hw_addr (u8 *s, va_list *args)
+{
+ vnet_dev_hw_addr_t *addr = va_arg (*args, vnet_dev_hw_addr_t *);
+ return format (s, "%U", format_ethernet_address, addr->eth_mac);
+}
+
+u8 *
+format_vnet_dev_port_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t *a = va_arg (*args, vnet_dev_format_args_t *);
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_dev_port_t *port = va_arg (*args, vnet_dev_port_t *);
+
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "Hardware Address is %U", format_vnet_dev_hw_addr,
+ &port->primary_hw_addr);
+ s = format (s, ", %u RX queues (max %u), %u TX queues (max %u)",
+ pool_elts (port->rx_queues), port->attr.max_rx_queues,
+ pool_elts (port->tx_queues), port->attr.max_tx_queues);
+ if (pool_elts (port->secondary_hw_addr))
+ {
+ u32 i = 0;
+ vnet_dev_hw_addr_t *a;
+ s = format (s, "\n%USecondary Hardware Address%s:", format_white_space,
+ indent,
+ pool_elts (port->secondary_hw_addr) > 1 ? "es are" : " is");
+ pool_foreach (a, port->secondary_hw_addr)
+ {
+ if (i++ % 6 == 0)
+ s = format (s, "\n%U", format_white_space, indent + 1);
+ s = format (s, " %U", format_vnet_dev_hw_addr, a);
+ }
+ }
+ s = format (s, "\n%UMax RX frame size is %u (max supported %u)",
+ format_white_space, indent, port->max_rx_frame_size,
+ port->attr.max_supported_rx_frame_size);
+ s = format (s, "\n%UCaps: %U", format_white_space, indent,
+ format_vnet_dev_port_caps, &port->attr.caps);
+ s = format (s, "\n%URX Offloads: %U", format_white_space, indent,
+ format_vnet_dev_port_rx_offloads, &port->attr.rx_offloads);
+ s = format (s, "\n%UTX Offloads: %U", format_white_space, indent,
+ format_vnet_dev_port_tx_offloads, &port->attr.tx_offloads);
+ if (port->port_ops.format_status)
+ s = format (s, "\n%UDevice Specific Port Status:\n%U%U",
+ format_white_space, indent, format_white_space, indent + 2,
+ port->port_ops.format_status, a, port);
+ if (port->args)
+ s = format (s, "\n%UDevice Specific Port Arguments:\n%U%U",
+ format_white_space, indent, format_white_space, indent + 2,
+ format_vnet_dev_args, port->args);
+
+ s = format (s, "\n%UInterface ", format_white_space, indent);
+ if (port->interface_created)
+ {
+ s = format (s, "assigned, interface name is '%U', RX node is '%U'",
+ format_vnet_sw_if_index_name, vnm, port->intf.sw_if_index,
+ format_vlib_node_name, vm, port->intf.rx_node_index);
+ }
+ else
+ s = format (s, "not assigned");
+ return s;
+}
+
+u8 *
+format_vnet_dev_rx_queue_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t __clib_unused *a =
+ va_arg (*args, vnet_dev_format_args_t *);
+ vnet_dev_rx_queue_t *rxq = va_arg (*args, vnet_dev_rx_queue_t *);
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "Size is %u, buffer pool index is %u", rxq->size,
+ vnet_dev_get_rx_queue_buffer_pool_index (rxq));
+ s = format (s, "\n%UPolling thread is %u, %sabled, %sstarted, %s mode",
+ format_white_space, indent, rxq->rx_thread_index,
+ rxq->enabled ? "en" : "dis", rxq->started ? "" : "not-",
+ rxq->interrupt_mode ? "interrupt" : "polling");
+ if (rxq->port->rx_queue_ops.format_info)
+ s = format (s, "\n%U%U", format_white_space, indent,
+ rxq->port->rx_queue_ops.format_info, a, rxq);
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_tx_queue_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t __clib_unused *a =
+ va_arg (*args, vnet_dev_format_args_t *);
+ vnet_dev_tx_queue_t *txq = va_arg (*args, vnet_dev_tx_queue_t *);
+ u32 indent = format_get_indent (s);
+ u32 n;
+
+ s = format (s, "Size is %u", txq->size);
+ s = format (s, "\n%U", format_white_space, indent);
+ n = clib_bitmap_count_set_bits (txq->assigned_threads);
+ if (n == 0)
+ s = format (s, "Not used by any thread");
+ else
+ s = format (s, "Used by thread%s %U", n > 1 ? "s" : "", format_bitmap_list,
+ txq->assigned_threads);
+ if (txq->port->tx_queue_ops.format_info)
+ s = format (s, "\n%U%U", format_white_space, indent,
+ txq->port->tx_queue_ops.format_info, a, txq);
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_interface_info (u8 *s, va_list *args)
+{
+ u32 i = va_arg (*args, u32);
+ vnet_dev_format_args_t fa = {}, *a = &fa;
+ vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (i);
+ vnet_dev_t *dev = port->dev;
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "Device:");
+ s = format (s, "\n%U%U", format_white_space, indent + 2,
+ format_vnet_dev_info, a, dev);
+
+ s = format (s, "\n%UPort %u:", format_white_space, indent, port->port_id);
+ s = format (s, "\n%U%U", format_white_space, indent + 2,
+ format_vnet_dev_port_info, a, port);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ s = format (s, "\n%URX queue %u:", format_white_space, indent + 2,
+ q->queue_id);
+ s = format (s, "\n%U%U", format_white_space, indent + 4,
+ format_vnet_dev_rx_queue_info, a, q);
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ s = format (s, "\n%UTX queue %u:", format_white_space, indent + 2,
+ q->queue_id);
+ s = format (s, "\n%U%U", format_white_space, indent + 4,
+ format_vnet_dev_tx_queue_info, a, q);
+ }
+ return s;
+}
+
+static u64
+unformat_flags (unformat_input_t *input, char *names[], u64 val[], u32 n_flags)
+{
+ u64 rv = 0;
+ uword c = 0;
+ u8 *s = 0;
+
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ {
+ switch (c)
+ {
+ case 'a' ... 'z':
+ c -= 'a' - 'A';
+ case '0' ... '9':
+ case 'A' ... 'Z':
+ vec_add1 (s, c);
+ break;
+ case '-':
+ vec_add1 (s, '_');
+ break;
+ case ',':
+ vec_add1 (s, 0);
+ break;
+ default:
+ goto end_of_string;
+ }
+ }
+end_of_string:
+
+ if (s == 0)
+ return 0;
+
+ vec_add1 (s, 0);
+
+ for (u8 *p = s, *end = vec_end (s); p < end; p += strlen ((char *) p) + 1)
+ {
+ for (c = 0; c < n_flags; c++)
+ if (strcmp (names[c], (char *) p) == 0)
+ {
+ rv |= val[c];
+ break;
+ }
+ if (c == n_flags)
+ goto done;
+ }
+
+done:
+ vec_free (s);
+ return rv;
+}
+
+uword
+unformat_vnet_dev_flags (unformat_input_t *input, va_list *args)
+{
+ vnet_dev_flags_t *fp = va_arg (*args, vnet_dev_flags_t *);
+ u64 val;
+
+ char *names[] = {
+#define _(b, n, d) #n,
+ foreach_vnet_dev_flag
+#undef _
+ };
+ u64 vals[] = {
+#define _(b, n, d) 1ull << (b)
+ foreach_vnet_dev_flag
+#undef _
+ };
+
+ val = unformat_flags (input, names, vals, ARRAY_LEN (names));
+
+ if (!val)
+ return 0;
+
+ fp->n = val;
+ return 1;
+}
+
+uword
+unformat_vnet_dev_port_flags (unformat_input_t *input, va_list *args)
+{
+ vnet_dev_port_flags_t *fp = va_arg (*args, vnet_dev_port_flags_t *);
+ u64 val;
+
+ char *flag_names[] = {
+#define _(b, n, d) #n,
+ foreach_vnet_dev_port_flag
+#undef _
+ };
+ u64 flag_values[] = {
+#define _(b, n, d) 1ull << (b)
+ foreach_vnet_dev_port_flag
+#undef _
+ };
+
+ val =
+ unformat_flags (input, flag_names, flag_values, ARRAY_LEN (flag_names));
+
+ if (!val)
+ return 0;
+
+ fp->n = val;
+ return 1;
+}
+
+static u8 *
+format_flags (u8 *s, u64 val, char *flag_names[], u64 flag_values[],
+ u32 n_flags)
+{
+ u32 n = 0;
+ for (int i = 0; i < n_flags; i++)
+ {
+ if ((val & flag_values[i]) == 0)
+ continue;
+
+ if (n++)
+ vec_add1 (s, ' ');
+
+ for (char *c = flag_names[i]; c[0] != 0; c++)
+ {
+ switch (c[0])
+ {
+ case 'A' ... 'Z':
+ vec_add1 (s, c[0] + 'a' - 'A');
+ break;
+ case '_':
+ vec_add1 (s, '-');
+ break;
+ default:
+ vec_add1 (s, c[0]);
+ }
+ }
+ }
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_flags (u8 *s, va_list *args)
+{
+ vnet_dev_flags_t *fp = va_arg (*args, vnet_dev_flags_t *);
+ char *flag_names[] = {
+#define _(b, n, d) #n,
+ foreach_vnet_dev_flag
+#undef _
+ };
+ u64 flag_values[] = {
+#define _(b, n, d) 1ull << (b)
+ foreach_vnet_dev_flag
+#undef _
+ };
+
+ return format_flags (s, fp->n, flag_names, flag_values,
+ ARRAY_LEN (flag_names));
+}
+
+u8 *
+format_vnet_dev_port_flags (u8 *s, va_list *args)
+{
+ vnet_dev_port_flags_t *fp = va_arg (*args, vnet_dev_port_flags_t *);
+ char *flag_names[] = {
+#define _(b, n, d) #n,
+ foreach_vnet_dev_port_flag
+#undef _
+ };
+ u64 flag_values[] = {
+#define _(b, n, d) 1ull << (b)
+ foreach_vnet_dev_port_flag
+#undef _
+ };
+
+ return format_flags (s, fp->n, flag_names, flag_values,
+ ARRAY_LEN (flag_names));
+}
+
+u8 *
+format_vnet_dev_log (u8 *s, va_list *args)
+{
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ char *func = va_arg (*args, char *);
+
+ if (dev)
+ s = format (s, "%U", format_vnet_dev_addr, dev);
+ if (dev && func)
+ vec_add1 (s, ' ');
+ if (func)
+ s = format (s, "%s", func);
+ vec_add1 (s, ':');
+ vec_add1 (s, ' ');
+ return s;
+}
+
+u8 *
+format_vnet_dev_port_caps (u8 *s, va_list *args)
+{
+ vnet_dev_port_caps_t *c = va_arg (*args, vnet_dev_port_caps_t *);
+ u32 line = 0;
+
+ if (c->as_number == 0)
+ return s;
+
+#define _(n) \
+ if (c->n) \
+ { \
+ if (line++) \
+ vec_add1 (s, ' '); \
+ for (char *str = #n; *str; str++) \
+ vec_add1 (s, *str == '_' ? '-' : *str); \
+ }
+ foreach_vnet_dev_port_caps;
+#undef _
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_port_rx_offloads (u8 *s, va_list *args)
+{
+ vnet_dev_port_rx_offloads_t *c =
+ va_arg (*args, vnet_dev_port_rx_offloads_t *);
+ u32 line = 0;
+
+ if (c->as_number == 0)
+ return s;
+
+#define _(n) \
+ if (c->n) \
+ { \
+ if (line++) \
+ vec_add1 (s, ' '); \
+ for (char *str = #n; *str; str++) \
+ vec_add1 (s, *str == '_' ? '-' : *str); \
+ }
+ foreach_vnet_dev_port_rx_offloads;
+#undef _
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_port_tx_offloads (u8 *s, va_list *args)
+{
+ vnet_dev_port_tx_offloads_t *c =
+ va_arg (*args, vnet_dev_port_tx_offloads_t *);
+ u32 line = 0;
+
+ if (c->as_number == 0)
+ return s;
+
+#define _(n) \
+ if (c->n) \
+ { \
+ if (line++) \
+ vec_add1 (s, ' '); \
+ for (char *str = #n; *str; str++) \
+ vec_add1 (s, *str == '_' ? '-' : *str); \
+ }
+ foreach_vnet_dev_port_tx_offloads;
+#undef _
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_flow (u8 *s, va_list *args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ u32 flow_index = va_arg (*args, u32);
+ uword private_data = va_arg (*args, uword);
+ vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (dev_instance);
+
+ if (port->port_ops.format_flow)
+ s = format (s, "%U", port->port_ops.format_flow, port, flow_index,
+ private_data);
+
+ return s;
+}
diff --git a/src/vnet/dev/handlers.c b/src/vnet/dev/handlers.c
new file mode 100644
index 00000000000..2a55affe3e3
--- /dev/null
+++ b/src/vnet/dev/handlers.c
@@ -0,0 +1,256 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+#include <vnet/flow/flow.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "handler",
+};
+
+clib_error_t *
+vnet_dev_port_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hw,
+ u32 frame_size)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hw->dev_instance);
+ vnet_dev_rv_t rv;
+
+ vnet_dev_port_cfg_change_req_t req = {
+ .type = VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE,
+ .max_rx_frame_size = frame_size,
+ };
+
+ log_debug (p->dev, "size %u", frame_size);
+
+ rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req);
+ if (rv == VNET_DEV_ERR_NO_CHANGE)
+ return 0;
+
+ if (rv != VNET_DEV_OK)
+ return vnet_dev_port_err (vm, p, rv,
+ "new max frame size is not valid for port");
+
+ if ((rv = vnet_dev_process_port_cfg_change_req (vm, p, &req)) != VNET_DEV_OK)
+ return vnet_dev_port_err (vm, p, rv,
+ "device failed to change max frame size");
+
+ return 0;
+}
+
+u32
+vnet_dev_port_eth_flag_change (vnet_main_t *vnm, vnet_hw_interface_t *hw,
+ u32 flags)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hw->dev_instance);
+ vnet_dev_rv_t rv;
+
+ vnet_dev_port_cfg_change_req_t req = {
+ .type = VNET_DEV_PORT_CFG_PROMISC_MODE,
+ };
+
+ switch (flags)
+ {
+ case ETHERNET_INTERFACE_FLAG_DEFAULT_L3:
+ log_debug (p->dev, "promisc off");
+ break;
+ case ETHERNET_INTERFACE_FLAG_ACCEPT_ALL:
+ log_debug (p->dev, "promisc on");
+ req.promisc = 1;
+ break;
+ default:
+ return ~0;
+ }
+
+ rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req);
+ if (rv == VNET_DEV_ERR_NO_CHANGE)
+ return 0;
+
+ if (rv != VNET_DEV_OK)
+ return ~0;
+
+ rv = vnet_dev_process_port_cfg_change_req (vm, p, &req);
+ if (rv == VNET_DEV_OK || rv == VNET_DEV_ERR_NO_CHANGE)
+ return 0;
+ return ~0;
+}
+
+clib_error_t *
+vnet_dev_port_mac_change (vnet_hw_interface_t *hi, const u8 *old,
+ const u8 *new)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+ vnet_dev_rv_t rv;
+
+ vnet_dev_port_cfg_change_req_t req = {
+ .type = VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR,
+ };
+
+ vnet_dev_set_hw_addr_eth_mac (&req.addr, new);
+
+ log_debug (p->dev, "new mac %U", format_vnet_dev_hw_addr, &req.addr);
+
+ rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req);
+ if (rv == VNET_DEV_ERR_NO_CHANGE)
+ return 0;
+
+ if (rv != VNET_DEV_OK)
+ return vnet_dev_port_err (vm, p, rv, "hw address is not valid for port");
+
+ if ((rv = vnet_dev_process_port_cfg_change_req (vm, p, &req)) != VNET_DEV_OK)
+ return vnet_dev_port_err (vm, p, rv, "device failed to change hw address");
+
+ return 0;
+}
+
+clib_error_t *
+vnet_dev_add_del_mac_address (vnet_hw_interface_t *hi, const u8 *address,
+ u8 is_add)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+ vnet_dev_rv_t rv;
+
+ vnet_dev_port_cfg_change_req_t req = {
+ .type = is_add ? VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR :
+ VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR,
+ };
+
+ vnet_dev_set_hw_addr_eth_mac (&req.addr, address);
+
+ log_debug (p->dev, "received (addr %U is_add %u", format_vnet_dev_hw_addr,
+ &req.addr, is_add);
+
+ rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req);
+ if (rv != VNET_DEV_OK)
+ return vnet_dev_port_err (vm, p, rv,
+ "provided secondary hw addresses cannot "
+ "be added/removed");
+
+ if ((rv = vnet_dev_process_port_cfg_change_req (vm, p, &req)) != VNET_DEV_OK)
+ return vnet_dev_port_err (
+ vm, p, rv, "device failed to add/remove secondary hw address");
+
+ return 0;
+}
+
+int
+vnet_dev_flow_ops_fn (vnet_main_t *vnm, vnet_flow_dev_op_t op,
+ u32 dev_instance, u32 flow_index, uword *private_data)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (dev_instance);
+ vnet_dev_port_cfg_change_req_t req;
+ vnet_dev_rv_t rv;
+
+ switch (op)
+ {
+ case VNET_FLOW_DEV_OP_ADD_FLOW:
+ req.type = VNET_DEV_PORT_CFG_ADD_RX_FLOW;
+ break;
+ case VNET_FLOW_DEV_OP_DEL_FLOW:
+ req.type = VNET_DEV_PORT_CFG_DEL_RX_FLOW;
+ break;
+ case VNET_FLOW_DEV_OP_GET_COUNTER:
+ req.type = VNET_DEV_PORT_CFG_GET_RX_FLOW_COUNTER;
+ break;
+ case VNET_FLOW_DEV_OP_RESET_COUNTER:
+ req.type = VNET_DEV_PORT_CFG_RESET_RX_FLOW_COUNTER;
+ break;
+ default:
+ log_warn (p->dev, "unsupported request for flow_ops received");
+ return VNET_FLOW_ERROR_NOT_SUPPORTED;
+ }
+
+ req.flow_index = flow_index;
+ req.private_data = private_data;
+
+ rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req);
+ if (rv != VNET_DEV_OK)
+ {
+ log_err (p->dev, "validation failed for flow_ops");
+ return VNET_FLOW_ERROR_NOT_SUPPORTED;
+ }
+
+ if ((rv = vnet_dev_process_port_cfg_change_req (vm, p, &req)) != VNET_DEV_OK)
+ {
+ log_err (p->dev, "request for flow_ops failed");
+ return vnet_dev_flow_err (vm, rv);
+ }
+
+ return 0;
+}
+
+clib_error_t *
+vnet_dev_interface_set_rss_queues (vnet_main_t *vnm, vnet_hw_interface_t *hi,
+ clib_bitmap_t *bitmap)
+{
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+ log_warn (p->dev, "unsupported request for flow_ops received");
+ return vnet_error (VNET_ERR_UNSUPPORTED, "not implemented");
+}
+
+void
+vnet_dev_clear_hw_interface_counters (u32 instance)
+{
+ vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (instance);
+ vlib_main_t *vm = vlib_get_main ();
+
+ vnet_dev_process_call_port_op_no_rv (vm, port, vnet_dev_port_clear_counters);
+}
+
+void
+vnet_dev_set_interface_next_node (vnet_main_t *vnm, u32 hw_if_index,
+ u32 node_index)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ vnet_dev_port_t *port =
+ vnet_dev_get_port_from_dev_instance (hw->dev_instance);
+ int runtime_update = 0;
+
+ if (node_index == ~0)
+ {
+ port->intf.redirect_to_node_next_index = 0;
+ if (port->intf.feature_arc == 0)
+ {
+ port->intf.rx_next_index =
+ vnet_dev_default_next_index_by_port_type[port->attr.type];
+ runtime_update = 1;
+ }
+ port->intf.redirect_to_node = 0;
+ }
+ else
+ {
+ u16 next_index = vlib_node_add_next (vlib_get_main (),
+ port_rx_eth_node.index, node_index);
+ port->intf.redirect_to_node_next_index = next_index;
+ if (port->intf.feature_arc == 0)
+ {
+ port->intf.rx_next_index = next_index;
+ runtime_update = 1;
+ }
+ port->intf.redirect_to_node = 1;
+ }
+ port->intf.rx_next_index =
+ node_index == ~0 ?
+ vnet_dev_default_next_index_by_port_type[port->attr.type] :
+ node_index;
+
+ if (runtime_update)
+ {
+ foreach_vnet_dev_port_rx_queue (rxq, port)
+ vnet_dev_rx_queue_rt_request (
+ vm, rxq, (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1 });
+ log_debug (port->dev, "runtime update requested due to chgange in "
+ "reditect-to-next configuration");
+ }
+}
diff --git a/src/vnet/dev/log.h b/src/vnet/dev/log.h
new file mode 100644
index 00000000000..5ca7b6620e9
--- /dev/null
+++ b/src/vnet/dev/log.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_LOG_H_
+#define _VNET_DEV_LOG_H_
+
+#define log_debug(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, dev_log.class, "%U" f, format_vnet_dev_log, \
+ dev, clib_string_skip_prefix (__func__, "vnet_dev_"), \
+ ##__VA_ARGS__)
+#define log_notice(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_NOTICE, dev_log.class, "%U" f, \
+ format_vnet_dev_log, dev, 0, ##__VA_ARGS__)
+#define log_warn(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_WARNING, dev_log.class, "%U" f, \
+ format_vnet_dev_log, dev, 0, ##__VA_ARGS__)
+#define log_err(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_ERR, dev_log.class, "%U" f, format_vnet_dev_log, \
+ dev, 0, ##__VA_ARGS__)
+
+#endif /* _VNET_DEV_LOG_H_ */
diff --git a/src/vnet/dev/mgmt.h b/src/vnet/dev/mgmt.h
new file mode 100644
index 00000000000..f13f4075255
--- /dev/null
+++ b/src/vnet/dev/mgmt.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_MGMT_H_
+#define _VNET_DEV_MGMT_H_
+
+#include <vppinfra/clib.h>
+
+#endif /* _VNET_DEV_MGMT_H_ */
diff --git a/src/vnet/dev/pci.c b/src/vnet/dev/pci.c
new file mode 100644
index 00000000000..3cc0cba5003
--- /dev/null
+++ b/src/vnet/dev/pci.c
@@ -0,0 +1,458 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/log.h>
+#include <vlib/unix/unix.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "pci",
+};
+
+static int
+vnet_dev_bus_pci_device_id_to_pci_addr (vlib_pci_addr_t *addr, char *str)
+{
+ unformat_input_t input;
+ uword rv;
+ unformat_init_string (&input, str, strlen (str));
+ rv = unformat (&input, "pci" VNET_DEV_DEVICE_ID_PREFIX_DELIMITER "%U",
+ unformat_vlib_pci_addr, addr);
+ unformat_free (&input);
+ return rv;
+}
+
+static void *
+vnet_dev_bus_pci_get_device_info (vlib_main_t *vm, char *device_id)
+{
+ vnet_dev_bus_pci_device_info_t *info;
+ vlib_pci_addr_t addr = {};
+ clib_error_t *err = 0;
+ vlib_pci_device_info_t *di = 0;
+
+ vlib_log_debug (dev_log.class, "device %s", device_id);
+
+ if (vnet_dev_bus_pci_device_id_to_pci_addr (&addr, device_id) == 0)
+ return 0;
+
+ di = vlib_pci_get_device_info (vm, &addr, &err);
+ if (err)
+ {
+ vlib_log_err (dev_log.class, "get_device_info: %U", format_clib_error,
+ err);
+ clib_error_free (err);
+ return 0;
+ }
+
+ info = clib_mem_alloc (sizeof (vnet_dev_bus_pci_device_info_t));
+ info->addr = addr;
+ info->vendor_id = di->vendor_id;
+ info->device_id = di->device_id;
+ info->revision = di->revision;
+
+ vlib_pci_free_device_info (di);
+ return info;
+}
+
+static void
+vnet_dev_bus_pci_free_device_info (vlib_main_t *vm, void *dev_info)
+{
+ clib_mem_free (dev_info);
+}
+
+static vnet_dev_rv_t
+vnet_dev_bus_pci_open (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ clib_error_t *err = 0;
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+
+ if (vnet_dev_bus_pci_device_id_to_pci_addr (&pdd->addr, dev->device_id) == 0)
+ return VNET_DEV_ERR_INVALID_DEVICE_ID;
+
+ if ((err = vlib_pci_device_open (vm, &pdd->addr, 0, &pdd->handle)))
+ {
+ log_err (dev, "device_open: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ dev->numa_node = vlib_pci_get_numa_node (vm, pdd->handle);
+
+ if (vlib_pci_supports_virtual_addr_dma (vm, pdd->handle))
+ {
+ dev->va_dma = 1;
+ log_debug (dev, "device supports VA DMA");
+ }
+
+ vlib_pci_set_private_data (vm, pdd->handle, (uword) dev);
+
+ pdd->n_msix_int = vlib_pci_get_num_msix_interrupts (vm, pdd->handle);
+ if (pdd->n_msix_int)
+ {
+ u32 sz = sizeof (pdd->msix_handlers[0]) * pdd->n_msix_int;
+ sz = round_pow2 (sz, CLIB_CACHE_LINE_BYTES);
+ pdd->msix_handlers = clib_mem_alloc_aligned (sz, CLIB_CACHE_LINE_BYTES);
+ clib_memset (pdd->msix_handlers, 0, sz);
+ }
+
+ return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_bus_pci_close (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+
+ if (pdd->intx_handler)
+ vnet_dev_pci_intx_remove_handler (vm, dev);
+
+ if (pdd->msix_handlers)
+ {
+ for (u16 i = 0; i < pdd->n_msix_int; i++)
+ if (pdd->msix_handlers[i])
+ vnet_dev_pci_msix_remove_handler (vm, dev, i, 1);
+ clib_mem_free (pdd->msix_handlers);
+ pdd->msix_handlers = 0;
+ }
+
+ if (pdd->pci_handle_valid)
+ vlib_pci_device_close (vm, pdd->handle);
+}
+
+static vnet_dev_rv_t
+vnet_dev_bus_pci_dma_mem_alloc (vlib_main_t *vm, vnet_dev_t *dev, u32 size,
+ u32 align, void **pp)
+{
+ clib_error_t *err;
+ void *p;
+
+ align = align ? align : CLIB_CACHE_LINE_BYTES;
+ size = round_pow2 (size, align);
+
+ p = vlib_physmem_alloc_aligned_on_numa (vm, size, align, dev->numa_node);
+
+ if (p == 0)
+ {
+ err = vlib_physmem_last_error (vm);
+ log_err (dev, "dev_dma_mem_alloc: physmem_alloc_aligned error %U",
+ format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_DMA_MEM_ALLOC_FAIL;
+ }
+
+ if ((err = vlib_pci_map_dma (vm, vnet_dev_get_pci_handle (dev), p)))
+ {
+ log_err (dev, "dev_dma_mem_alloc: pci_map_dma: %U", format_clib_error,
+ err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_DMA_MEM_ALLOC_FAIL;
+ }
+
+ clib_memset (p, 0, size);
+ pp[0] = p;
+ return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_bus_pci_dma_mem_free (vlib_main_t *vm, vnet_dev_t *dev, void *p)
+{
+ if (p)
+ vlib_physmem_free (vm, p);
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_read_config_header (vlib_main_t *vm, vnet_dev_t *dev,
+ vlib_pci_config_hdr_t *hdr)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_read_write_config (vm, h, VLIB_READ, 0, hdr, sizeof (*hdr));
+ if (err)
+ {
+ log_err (dev, "pci_read_config_header: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_map_region (vlib_main_t *vm, vnet_dev_t *dev, u8 region,
+ void **pp)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ if ((err = vlib_pci_map_region (vm, h, region, pp)))
+ {
+ log_err (dev, "pci_map_region: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_function_level_reset (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ if ((err = vlib_pci_function_level_reset (vm, h)))
+ {
+ log_err (dev, "pci_function_level_reset: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_bus_master_enable (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ if ((err = vlib_pci_bus_master_enable (vm, h)))
+ {
+ log_err (dev, "pci_bus_master_enable: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+ return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_pci_intx_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h)
+{
+ vnet_dev_t *dev = (vnet_dev_t *) vlib_pci_get_private_data (vm, h);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+
+ if (pdd->intx_handler)
+ pdd->intx_handler (vm, dev);
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_intx_add_handler (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_pci_intx_handler_fn_t *fn)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_register_intx_handler (vm, h, vnet_dev_pci_intx_handler);
+
+ if (err)
+ {
+ log_err (dev, "pci_register_intx_handler: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_intx_remove_handler (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_unregister_intx_handler (vm, h);
+
+ if (err)
+ {
+ log_err (dev, "pci_unregister_intx_handler: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ pdd->intx_handler = 0;
+
+ return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_pci_msix_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h, u16 line)
+{
+ vnet_dev_t *dev = (vnet_dev_t *) vlib_pci_get_private_data (vm, h);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+
+ if (line < pdd->n_msix_int && pdd->msix_handlers[line])
+ pdd->msix_handlers[line](vm, dev, line);
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_msix_add_handler (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_pci_msix_handler_fn_t *fn, u16 first,
+ u16 count)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_register_msix_handler (vm, h, first, count,
+ vnet_dev_pci_msix_handler);
+
+ if (err)
+ {
+ log_err (dev, "pci_register_msix_handler: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ for (u16 i = first; i < first + count; i++)
+ {
+ ASSERT (pdd->msix_handlers[i] == 0);
+ pdd->msix_handlers[i] = fn;
+ }
+
+ return VNET_DEV_OK;
+}
+
+void
+vnet_dev_pci_msix_set_polling_thread (vlib_main_t *vm, vnet_dev_t *dev,
+ u16 line, u16 thread_index)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ u32 index;
+
+ index = vlib_pci_get_msix_file_index (vm, h, line);
+
+ clib_file_set_polling_thread (&file_main, index, thread_index);
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_msix_remove_handler (vlib_main_t *vm, vnet_dev_t *dev, u16 first,
+ u16 count)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_unregister_msix_handler (vm, h, first, count);
+
+ if (err)
+ {
+ log_err (dev, "pci_unregister_msix_handler: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ for (u16 i = first; i < first + count; i++)
+ {
+ ASSERT (pdd->msix_handlers[i] != 0);
+ pdd->msix_handlers[i] = 0;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_msix_enable (vlib_main_t *vm, vnet_dev_t *dev, u16 first,
+ u16 count)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_enable_msix_irq (vm, h, first, count);
+
+ if (err)
+ {
+ log_err (dev, "pci_enable_msix_irq: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_msix_disable (vlib_main_t *vm, vnet_dev_t *dev, u16 first,
+ u16 count)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_disable_msix_irq (vm, h, first, count);
+
+ if (err)
+ {
+ log_err (dev, "pci_disble_msix_irq: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_bus_master_disable (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ if ((err = vlib_pci_bus_master_disable (vm, h)))
+ {
+ log_err (dev, "pci_bus_master_disable: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+ return VNET_DEV_OK;
+}
+
+static u8 *
+format_dev_pci_device_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t __clib_unused *a =
+ va_arg (*args, vnet_dev_format_args_t *);
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_pci_config_t cfg = {};
+ clib_error_t *err;
+
+ s = format (s, "PCIe address is %U", format_vlib_pci_addr, &pdd->addr);
+
+ err = vlib_pci_read_write_config (vm, pdd->handle, VLIB_READ, 0, &cfg,
+ sizeof (cfg));
+ if (!err)
+ {
+ s = format (s, ", port is %U, speed is %U (max %U)",
+ format_vlib_pci_link_port, &cfg, format_vlib_pci_link_speed,
+ &cfg, format_vlib_pci_link_speed_cap, &cfg);
+ }
+ else
+ clib_error_free (err);
+
+ return s;
+}
+
+static u8 *
+format_dev_pci_device_addr (u8 *s, va_list *args)
+{
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+ return format (s, "%U", format_vlib_pci_addr, &pdd->addr);
+}
+
+VNET_DEV_REGISTER_BUS (pci) = {
+ .name = "pci",
+ .device_data_size = sizeof (vnet_dev_bus_pci_device_info_t),
+ .ops = {
+ .device_open = vnet_dev_bus_pci_open,
+ .device_close = vnet_dev_bus_pci_close,
+ .get_device_info = vnet_dev_bus_pci_get_device_info,
+ .free_device_info = vnet_dev_bus_pci_free_device_info,
+ .dma_mem_alloc_fn = vnet_dev_bus_pci_dma_mem_alloc,
+ .dma_mem_free_fn = vnet_dev_bus_pci_dma_mem_free,
+ .format_device_info = format_dev_pci_device_info,
+ .format_device_addr = format_dev_pci_device_addr,
+ },
+};
diff --git a/src/vnet/dev/pci.h b/src/vnet/dev/pci.h
new file mode 100644
index 00000000000..ce9a53aa273
--- /dev/null
+++ b/src/vnet/dev/pci.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_PCI_H_
+#define _VNET_DEV_PCI_H_
+
+#include <vppinfra/clib.h>
+#include <vlib/pci/pci.h>
+#include <vnet/dev/dev.h>
+
+typedef void (vnet_dev_pci_intx_handler_fn_t) (vlib_main_t *vm,
+ vnet_dev_t *dev);
+typedef void (vnet_dev_pci_msix_handler_fn_t) (vlib_main_t *vm,
+ vnet_dev_t *dev, u16 line);
+
+typedef struct
+{
+ vlib_pci_addr_t addr;
+ u16 vendor_id;
+ u16 device_id;
+ u8 revision;
+} vnet_dev_bus_pci_device_info_t;
+
+typedef struct
+{
+ u8 pci_handle_valid : 1;
+ u16 n_msix_int;
+ vlib_pci_addr_t addr;
+ vlib_pci_dev_handle_t handle;
+ vnet_dev_pci_intx_handler_fn_t *intx_handler;
+ vnet_dev_pci_msix_handler_fn_t **msix_handlers;
+} vnet_dev_bus_pci_device_data_t;
+
+static_always_inline vnet_dev_bus_pci_device_data_t *
+vnet_dev_get_bus_pci_device_data (vnet_dev_t *dev)
+{
+ return (void *) dev->bus_data;
+}
+static_always_inline vlib_pci_dev_handle_t
+vnet_dev_get_pci_handle (vnet_dev_t *dev)
+{
+ return ((vnet_dev_bus_pci_device_data_t *) (dev->bus_data))->handle;
+}
+
+static_always_inline vlib_pci_addr_t
+vnet_dev_get_pci_addr (vnet_dev_t *dev)
+{
+ return ((vnet_dev_bus_pci_device_data_t *) (dev->bus_data))->addr;
+}
+
+static_always_inline vlib_pci_dev_handle_t
+vnet_dev_get_pci_n_msix_interrupts (vnet_dev_t *dev)
+{
+ return vnet_dev_get_bus_pci_device_data (dev)->n_msix_int;
+}
+
+vnet_dev_rv_t vnet_dev_pci_read_config_header (vlib_main_t *, vnet_dev_t *,
+ vlib_pci_config_hdr_t *);
+
+vnet_dev_rv_t vnet_dev_pci_map_region (vlib_main_t *, vnet_dev_t *, u8,
+ void **);
+vnet_dev_rv_t vnet_dev_pci_function_level_reset (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_pci_bus_master_enable (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_pci_bus_master_disable (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_pci_intx_add_handler (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_pci_intx_handler_fn_t *);
+vnet_dev_rv_t vnet_dev_pci_intx_remove_handler (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_pci_msix_add_handler (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_pci_msix_handler_fn_t *,
+ u16, u16);
+vnet_dev_rv_t vnet_dev_pci_msix_remove_handler (vlib_main_t *, vnet_dev_t *,
+ u16, u16);
+vnet_dev_rv_t vnet_dev_pci_msix_enable (vlib_main_t *, vnet_dev_t *, u16, u16);
+vnet_dev_rv_t vnet_dev_pci_msix_disable (vlib_main_t *, vnet_dev_t *, u16,
+ u16);
+void vnet_dev_pci_msix_set_polling_thread (vlib_main_t *, vnet_dev_t *, u16,
+ u16);
+
+#endif /* _VNET_DEV_PCI_H_ */
diff --git a/src/vnet/dev/port.c b/src/vnet/dev/port.c
new file mode 100644
index 00000000000..8a6df54cbc8
--- /dev/null
+++ b/src/vnet/dev/port.c
@@ -0,0 +1,748 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "port",
+};
+
+static uword
+dummy_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ ASSERT (0);
+ return 0;
+}
+
+VLIB_REGISTER_NODE (port_rx_eth_node) = {
+ .function = dummy_input_fn,
+ .name = "port-rx-eth",
+ .runtime_data_bytes = sizeof (vnet_dev_rx_node_runtime_t),
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_DISABLED,
+ .n_next_nodes = VNET_DEV_ETH_RX_PORT_N_NEXTS,
+ .next_nodes = {
+#define _(n, s) [VNET_DEV_ETH_RX_PORT_NEXT_##n] = s,
+ foreach_vnet_dev_port_rx_next
+#undef _
+ },
+};
+
+u16 vnet_dev_default_next_index_by_port_type[] = {
+ [VNET_DEV_PORT_TYPE_ETHERNET] = VNET_DEV_ETH_RX_PORT_NEXT_ETH_INPUT,
+};
+
+VNET_FEATURE_ARC_INIT (eth_port_rx, static) = {
+ .arc_name = "port-rx-eth",
+ .start_nodes = VNET_FEATURES ("port-rx-eth"),
+ .last_in_arc = "ethernet-input",
+ .arc_index_ptr = &vnet_dev_main.eth_port_rx_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (l2_patch, static) = {
+ .arc_name = "port-rx-eth",
+ .node_name = "l2-patch",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+
+VNET_FEATURE_INIT (worker_handoff, static) = {
+ .arc_name = "port-rx-eth",
+ .node_name = "worker-handoff",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+
+VNET_FEATURE_INIT (span_input, static) = {
+ .arc_name = "port-rx-eth",
+ .node_name = "span-input",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+
+VNET_FEATURE_INIT (p2p_ethernet_node, static) = {
+ .arc_name = "port-rx-eth",
+ .node_name = "p2p-ethernet-input",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+
+VNET_FEATURE_INIT (ethernet_input, static) = {
+ .arc_name = "port-rx-eth",
+ .node_name = "ethernet-input",
+ .runs_before = 0, /* not before any other features */
+};
+
+void
+vnet_dev_port_free (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+
+ vnet_dev_port_validate (vm, port);
+
+ ASSERT (port->started == 0);
+
+ log_debug (dev, "port %u", port->port_id);
+
+ if (port->port_ops.free)
+ port->port_ops.free (vm, port);
+
+ pool_free (port->secondary_hw_addr);
+ pool_free (port->rx_queues);
+ pool_free (port->tx_queues);
+ vnet_dev_arg_free (&port->args);
+ pool_put_index (dev->ports, port->index);
+ clib_mem_free (port);
+}
+
+void
+vnet_dev_port_update_tx_node_runtime (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_port_validate (vm, port);
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ u32 ti;
+ clib_bitmap_foreach (ti, q->assigned_threads)
+ {
+ vlib_main_t *tvm = vlib_get_main_by_index (ti);
+ vlib_node_runtime_t *nr =
+ vlib_node_get_runtime (tvm, port->intf.tx_node_index);
+ vnet_dev_tx_node_runtime_t *tnr = vnet_dev_get_tx_node_runtime (nr);
+ tnr->hw_if_index = port->intf.hw_if_index;
+ tnr->tx_queue = q;
+ }
+ }
+}
+
+void
+vnet_dev_port_stop (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_rt_op_t *ops = 0;
+ u16 n_threads = vlib_get_n_threads ();
+
+ log_debug (dev, "stopping port %u", port->port_id);
+
+ for (u16 i = 0; i < n_threads; i++)
+ {
+ vnet_dev_rt_op_t op = { .thread_index = i, .port = port };
+ vec_add1 (ops, op);
+ }
+
+ vnet_dev_rt_exec_ops (vm, dev, ops, vec_len (ops));
+ vec_free (ops);
+
+ port->port_ops.stop (vm, port);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ q->started = 0;
+ log_debug (dev, "port %u rx queue %u stopped", port->port_id,
+ q->queue_id);
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ q->started = 0;
+ log_debug (dev, "port %u tx queue %u stopped", port->port_id,
+ q->queue_id);
+ }
+
+ log_debug (dev, "port %u stopped", port->port_id);
+ port->started = 0;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_start_all_rx_queues (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ vnet_dev_port_validate (vm, port);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ rv = vnet_dev_rx_queue_start (vm, q);
+ if (rv != VNET_DEV_OK)
+ return rv;
+ }
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_start_all_tx_queues (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ vnet_dev_port_validate (vm, port);
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ rv = vnet_dev_tx_queue_start (vm, q);
+ if (rv != VNET_DEV_OK)
+ return rv;
+ }
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_start (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ u16 n_threads = vlib_get_n_threads ();
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_rt_op_t *ops = 0;
+ vnet_dev_rv_t rv;
+
+ vnet_dev_port_validate (vm, port);
+
+ log_debug (dev, "starting port %u", port->port_id);
+
+ vnet_dev_port_update_tx_node_runtime (vm, port);
+
+ if ((rv = port->port_ops.start (vm, port)) != VNET_DEV_OK)
+ {
+ vnet_dev_port_stop (vm, port);
+ return rv;
+ }
+
+ for (u16 i = 0; i < n_threads; i++)
+ {
+ vnet_dev_rt_op_t op = { .thread_index = i, .port = port };
+ vec_add1 (ops, op);
+ }
+
+ vnet_dev_rt_exec_ops (vm, dev, ops, vec_len (ops));
+ vec_free (ops);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if (q->enabled)
+ {
+ log_debug (dev, "port %u rx queue %u started", port->port_id,
+ q->queue_id);
+ q->started = 1;
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ if (q->enabled)
+ {
+ log_debug (dev, "port %u tx queue %u started", port->port_id,
+ q->queue_id);
+ q->started = 1;
+ }
+
+ port->started = 1;
+ log_debug (dev, "port %u started", port->port_id);
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_add (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_port_id_t id,
+ vnet_dev_port_add_args_t *args)
+{
+ vnet_dev_port_t **pp, *port;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ ASSERT (args->port.attr.type != VNET_DEV_PORT_TYPE_UNKNOWN);
+ ASSERT (args->port.attr.max_supported_rx_frame_size);
+
+ port =
+ vnet_dev_alloc_with_data (sizeof (vnet_dev_port_t), args->port.data_size);
+ pool_get (dev->ports, pp);
+ pp[0] = port;
+ clib_memcpy (vnet_dev_get_port_data (port), args->port.initial_data,
+ args->port.data_size);
+ port->port_id = id;
+ port->index = pp - dev->ports;
+ port->dev = dev;
+ port->attr = args->port.attr;
+ port->rx_queue_config = args->rx_queue.config;
+ port->tx_queue_config = args->tx_queue.config;
+ port->rx_queue_ops = args->rx_queue.ops;
+ port->tx_queue_ops = args->tx_queue.ops;
+ port->port_ops = args->port.ops;
+ port->rx_node = *args->rx_node;
+ port->tx_node = *args->tx_node;
+
+ if (args->port.args)
+ for (vnet_dev_arg_t *a = args->port.args; a->type != VNET_DEV_ARG_END; a++)
+ vec_add1 (port->args, *a);
+
+ /* defaults out of port attributes */
+ port->max_rx_frame_size = args->port.attr.max_supported_rx_frame_size;
+ port->primary_hw_addr = args->port.attr.hw_addr;
+
+ if (port->attr.type == VNET_DEV_PORT_TYPE_ETHERNET)
+ {
+ if (port->max_rx_frame_size > 1514 &&
+ port->attr.caps.change_max_rx_frame_size)
+ port->max_rx_frame_size = 1514;
+ }
+
+ if (port->port_ops.alloc)
+ rv = port->port_ops.alloc (vm, port);
+
+ if (rv == VNET_DEV_OK)
+ port->initialized = 1;
+
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_cfg_change_req_validate (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *req)
+{
+ vnet_dev_rv_t rv;
+ vnet_dev_hw_addr_t *addr;
+ int found;
+
+ if (req->validated)
+ return VNET_DEV_OK;
+
+ switch (req->type)
+ {
+ case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE:
+ if (req->max_rx_frame_size > port->attr.max_supported_rx_frame_size)
+ return VNET_DEV_ERR_INVALID_VALUE;
+ if (req->max_rx_frame_size == port->max_rx_frame_size)
+ return VNET_DEV_ERR_NO_CHANGE;
+ break;
+
+ case VNET_DEV_PORT_CFG_PROMISC_MODE:
+ if (req->promisc == port->promisc)
+ return VNET_DEV_ERR_NO_CHANGE;
+ break;
+
+ case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+ if (clib_memcmp (&req->addr, &port->primary_hw_addr,
+ sizeof (vnet_dev_hw_addr_t)) == 0)
+ return VNET_DEV_ERR_NO_CHANGE;
+ break;
+
+ case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
+ pool_foreach (addr, port->secondary_hw_addr)
+ if (clib_memcmp (addr, &req->addr, sizeof (*addr)) == 0)
+ return VNET_DEV_ERR_ALREADY_EXISTS;
+ break;
+
+ case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
+ found = 0;
+ pool_foreach (addr, port->secondary_hw_addr)
+ if (clib_memcmp (addr, &req->addr, sizeof (*addr)) == 0)
+ found = 1;
+ if (!found)
+ return VNET_DEV_ERR_NO_SUCH_ENTRY;
+ break;
+
+ default:
+ break;
+ }
+
+ if (port->port_ops.config_change_validate)
+ {
+ rv = port->port_ops.config_change_validate (vm, port, req);
+ if (rv != VNET_DEV_OK)
+ return rv;
+ }
+ else
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+
+ req->validated = 1;
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *req)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ vnet_dev_hw_addr_t *a;
+ vnet_dev_rx_queue_t *rxq = 0;
+ u8 enable = 0;
+
+ vnet_dev_port_validate (vm, port);
+
+ if (req->type == VNET_DEV_PORT_CFG_RXQ_INTR_MODE_ENABLE ||
+ req->type == VNET_DEV_PORT_CFG_RXQ_INTR_MODE_DISABLE)
+ {
+ if (req->all_queues == 0)
+ {
+ rxq = vnet_dev_port_get_rx_queue_by_id (port, req->queue_id);
+ if (rxq == 0)
+ return VNET_DEV_ERR_BUG;
+ }
+ }
+
+ if ((rv = vnet_dev_port_cfg_change_req_validate (vm, port, req)))
+ return rv;
+
+ if (port->port_ops.config_change)
+ rv = port->port_ops.config_change (vm, port, req);
+ else
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+
+ if (rv != VNET_DEV_OK)
+ return rv;
+
+ switch (req->type)
+ {
+ case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE:
+ port->max_rx_frame_size = req->max_rx_frame_size;
+ break;
+
+ case VNET_DEV_PORT_CFG_PROMISC_MODE:
+ port->promisc = req->promisc;
+ break;
+
+ case VNET_DEV_PORT_CFG_RXQ_INTR_MODE_ENABLE:
+ enable = 1;
+ case VNET_DEV_PORT_CFG_RXQ_INTR_MODE_DISABLE:
+ if (req->all_queues)
+ {
+ clib_bitmap_t *bmp = 0;
+ vnet_dev_rt_op_t *ops = 0;
+ u32 i;
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ q->interrupt_mode = enable;
+ bmp = clib_bitmap_set (bmp, q->rx_thread_index, 1);
+ }
+
+ clib_bitmap_foreach (i, bmp)
+ {
+ vnet_dev_rt_op_t op = { .port = port, .thread_index = i };
+ vec_add1 (ops, op);
+ }
+
+ vnet_dev_rt_exec_ops (vm, port->dev, ops, vec_len (ops));
+ clib_bitmap_free (bmp);
+ vec_free (ops);
+ }
+ else
+ {
+ rxq->interrupt_mode = enable;
+ vnet_dev_rt_exec_ops (vm, port->dev,
+ &(vnet_dev_rt_op_t){
+ .port = port,
+ .thread_index = rxq->rx_thread_index,
+ },
+ 1);
+ }
+ break;
+
+ case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+ clib_memcpy (&port->primary_hw_addr, &req->addr,
+ sizeof (vnet_dev_hw_addr_t));
+ break;
+
+ case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
+ pool_get (port->secondary_hw_addr, a);
+ clib_memcpy (a, &req->addr, sizeof (vnet_dev_hw_addr_t));
+ break;
+
+ case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
+ pool_foreach (a, port->secondary_hw_addr)
+ if (clib_memcmp (a, &req->addr, sizeof (vnet_dev_hw_addr_t)) == 0)
+ {
+ pool_put (port->secondary_hw_addr, a);
+ break;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return VNET_DEV_OK;
+}
+
+void
+vnet_dev_port_state_change (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_state_changes_t changes)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+
+ vnet_dev_port_validate (vm, port);
+
+ if (changes.change.link_speed)
+ {
+ port->speed = changes.link_speed;
+ if (port->interface_created)
+ vnet_hw_interface_set_link_speed (vnm, port->intf.hw_if_index,
+ changes.link_speed);
+ log_debug (port->dev, "port speed changed to %u", changes.link_speed);
+ }
+
+ if (changes.change.link_state)
+ {
+ port->link_up = changes.link_state;
+ if (port->interface_created)
+ vnet_hw_interface_set_flags (
+ vnm, port->intf.hw_if_index,
+ changes.link_state ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
+ log_debug (port->dev, "port link state changed to %s",
+ changes.link_state ? "up" : "down");
+ }
+}
+
+void
+vnet_dev_port_add_counters (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_counter_t *counters, u16 n_counters)
+{
+ vnet_dev_port_validate (vm, port);
+
+ port->counter_main =
+ vnet_dev_counters_alloc (vm, counters, n_counters, "%s port %u counters",
+ port->dev->device_id, port->port_id);
+}
+
+void
+vnet_dev_port_free_counters (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_port_validate (vm, port);
+
+ if (port->counter_main)
+ vnet_dev_counters_free (vm, port->counter_main);
+}
+
+vnet_dev_rv_t
+vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u16 n_threads = vlib_get_n_threads ();
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_port_t **pp;
+ vnet_dev_rv_t rv;
+ u16 ti = 0;
+
+ if (port->intf.name[0] == 0)
+ {
+ u8 *s;
+ s = format (0, "%s%u/%u",
+ dm->drivers[port->dev->driver_index].registration->name,
+ port->dev->index, port->index);
+ u32 n = vec_len (s);
+
+ if (n >= sizeof (port->intf.name))
+ {
+ vec_free (s);
+ return VNET_DEV_ERR_BUG;
+ }
+ clib_memcpy (port->intf.name, s, n);
+ port->intf.name[n] = 0;
+ vec_free (s);
+ }
+
+ log_debug (
+ dev, "allocating %u rx queues with size %u and %u tx queues with size %u",
+ port->intf.num_rx_queues, port->intf.rxq_sz, port->intf.num_tx_queues,
+ port->intf.txq_sz);
+
+ for (int i = 0; i < port->intf.num_rx_queues; i++)
+ if ((rv = vnet_dev_rx_queue_alloc (vm, port, port->intf.rxq_sz)) !=
+ VNET_DEV_OK)
+ goto error;
+
+ for (u32 i = 0; i < port->intf.num_tx_queues; i++)
+ if ((rv = vnet_dev_tx_queue_alloc (vm, port, port->intf.txq_sz)) !=
+ VNET_DEV_OK)
+ goto error;
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ q->assigned_threads = clib_bitmap_set (q->assigned_threads, ti, 1);
+ log_debug (dev, "port %u tx queue %u assigned to thread %u",
+ port->port_id, q->queue_id, ti);
+ if (++ti >= n_threads)
+ break;
+ }
+
+ /* pool of port pointers helps us to assign unique dev_instance */
+ pool_get (dm->ports_by_dev_instance, pp);
+ port->intf.dev_instance = pp - dm->ports_by_dev_instance;
+ pp[0] = port;
+
+ if (port->attr.type == VNET_DEV_PORT_TYPE_ETHERNET)
+ {
+ vnet_device_class_t *dev_class;
+ vnet_dev_driver_t *driver;
+ vnet_sw_interface_t *sw;
+ vnet_hw_interface_t *hw;
+ vnet_hw_if_caps_t caps = 0;
+ u32 rx_node_index;
+
+ driver = pool_elt_at_index (dm->drivers, dev->driver_index);
+
+ /* hack to provide per-port tx node function */
+ dev_class = vnet_get_device_class (vnm, driver->dev_class_index);
+ dev_class->tx_fn_registrations = port->tx_node.registrations;
+ dev_class->format_tx_trace = port->tx_node.format_trace;
+ dev_class->tx_function_error_counters = port->tx_node.error_counters;
+ dev_class->tx_function_n_errors = port->tx_node.n_error_counters;
+
+ /* create new interface including tx and output nodes */
+ port->intf.hw_if_index = vnet_eth_register_interface (
+ vnm, &(vnet_eth_interface_registration_t){
+ .address = port->primary_hw_addr.eth_mac,
+ .max_frame_size = port->max_rx_frame_size,
+ .dev_class_index = driver->dev_class_index,
+ .dev_instance = port->intf.dev_instance,
+ .cb.set_max_frame_size = vnet_dev_port_set_max_frame_size,
+ .cb.flag_change = vnet_dev_port_eth_flag_change,
+ });
+
+ sw = vnet_get_hw_sw_interface (vnm, port->intf.hw_if_index);
+ hw = vnet_get_hw_interface (vnm, port->intf.hw_if_index);
+ port->intf.sw_if_index = sw->sw_if_index;
+ vnet_hw_interface_set_flags (
+ vnm, port->intf.hw_if_index,
+ port->link_up ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
+ if (port->speed)
+ vnet_hw_interface_set_link_speed (vnm, port->intf.hw_if_index,
+ port->speed);
+
+ port->intf.tx_node_index = hw->tx_node_index;
+
+ caps |= port->attr.caps.interrupt_mode ? VNET_HW_IF_CAP_INT_MODE : 0;
+ caps |= port->attr.caps.mac_filter ? VNET_HW_IF_CAP_MAC_FILTER : 0;
+ caps |= port->attr.tx_offloads.tcp_gso ? VNET_HW_IF_CAP_TCP_GSO : 0;
+ caps |= port->attr.tx_offloads.ip4_cksum ? VNET_HW_IF_CAP_TX_CKSUM : 0;
+
+ if (caps)
+ vnet_hw_if_set_caps (vnm, port->intf.hw_if_index, caps);
+
+ /* create / reuse rx node */
+ if (vec_len (dm->free_rx_node_indices))
+ {
+ vlib_node_t *n;
+ rx_node_index = vec_pop (dm->free_rx_node_indices);
+ vlib_node_rename (vm, rx_node_index, "%s-rx", port->intf.name);
+ n = vlib_get_node (vm, rx_node_index);
+ n->function = vlib_node_get_preferred_node_fn_variant (
+ vm, port->rx_node.registrations);
+ n->format_trace = port->rx_node.format_trace;
+ vlib_register_errors (vm, rx_node_index,
+ port->rx_node.n_error_counters, 0,
+ port->rx_node.error_counters);
+ }
+ else
+ {
+ dev_class->format_tx_trace = port->tx_node.format_trace;
+ dev_class->tx_function_error_counters = port->tx_node.error_counters;
+ dev_class->tx_function_n_errors = port->tx_node.n_error_counters;
+ vlib_node_registration_t rx_node_reg = {
+ .sibling_of = "port-rx-eth",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_DISABLED,
+ .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
+ .node_fn_registrations = port->rx_node.registrations,
+ .format_trace = port->rx_node.format_trace,
+ .error_counters = port->rx_node.error_counters,
+ .n_errors = port->rx_node.n_error_counters,
+ };
+ rx_node_index =
+ vlib_register_node (vm, &rx_node_reg, "%s-rx", port->intf.name);
+ }
+ port->rx_node_assigned = 1;
+ port->intf.rx_node_index = rx_node_index;
+ port->intf.rx_next_index =
+ vnet_dev_default_next_index_by_port_type[port->attr.type];
+
+ vlib_worker_thread_node_runtime_update ();
+ log_debug (dev,
+ "ethernet interface created, hw_if_index %u sw_if_index %u "
+ "rx_node_index %u tx_node_index %u",
+ port->intf.hw_if_index, port->intf.sw_if_index,
+ port->intf.rx_node_index, port->intf.tx_node_index);
+ }
+
+ port->interface_created = 1;
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ vnet_buffer (&q->buffer_template)->sw_if_index[VLIB_RX] =
+ port->intf.sw_if_index;
+ /* poison to catch node not calling runtime update function */
+ q->next_index = ~0;
+ q->interrupt_mode = port->intf.default_is_intr_mode;
+ vnet_dev_rx_queue_rt_request (
+ vm, q, (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1 });
+ }
+
+ vnet_dev_port_update_tx_node_runtime (vm, port);
+
+ if (port->port_ops.init)
+ rv = port->port_ops.init (vm, port);
+
+error:
+ if (rv != VNET_DEV_OK)
+ vnet_dev_port_if_remove (vm, port);
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_if_remove (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ vnet_dev_port_validate (vm, port);
+
+ if (port->started)
+ vnet_dev_port_stop (vm, port);
+
+ if (port->rx_node_assigned)
+ {
+ vlib_node_rename (vm, port->intf.rx_node_index, "deleted-%u",
+ port->intf.rx_node_index);
+ vec_add1 (dm->free_rx_node_indices, port->intf.rx_node_index);
+ port->rx_node_assigned = 0;
+ }
+
+ if (port->interface_created)
+ {
+ vlib_worker_thread_barrier_sync (vm);
+ vnet_delete_hw_interface (vnm, port->intf.hw_if_index);
+ vlib_worker_thread_barrier_release (vm);
+ pool_put_index (dm->ports_by_dev_instance, port->intf.dev_instance);
+ port->interface_created = 0;
+ }
+
+ port->intf = (typeof (port->intf)){};
+
+ if (port->port_ops.deinit)
+ port->port_ops.deinit (vm, port);
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ vnet_dev_tx_queue_free (vm, q);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ vnet_dev_rx_queue_free (vm, q);
+
+ vnet_dev_port_free_counters (vm, port);
+
+ foreach_vnet_dev_port_args (v, port)
+ vnet_dev_arg_clear_value (v);
+
+ return VNET_DEV_OK;
+}
+void
+vnet_dev_port_clear_counters (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ if (port->counter_main)
+ vnet_dev_counters_clear (vm, port->counter_main);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if (q->counter_main)
+ vnet_dev_counters_clear (vm, q->counter_main);
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ if (q->counter_main)
+ vnet_dev_counters_clear (vm, q->counter_main);
+
+ log_notice (port->dev, "counters cleared on port %u", port->port_id);
+}
diff --git a/src/vnet/dev/process.c b/src/vnet/dev/process.c
new file mode 100644
index 00000000000..3c1f0b8d2d8
--- /dev/null
+++ b/src/vnet/dev/process.c
@@ -0,0 +1,474 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/error.h"
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "process",
+};
+
+typedef enum
+{
+ VNET_DEV_EVENT_PERIODIC_STOP,
+ VNET_DEV_EVENT_PERIODIC_START,
+ VNET_DEV_EVENT_PORT_CONFIG_CHANGE_REQ,
+ VNET_DEV_EVENT_PROCESS_QUIT,
+ VNET_DEV_EVENT_CALL_OP,
+ VNET_DEV_EVENT_CALL_OP_NO_RV,
+ VNET_DEV_EVENT_CALL_OP_NO_WAIT,
+ VNET_DEV_EVENT_CALL_PORT_OP,
+ VNET_DEV_EVENT_CALL_PORT_OP_NO_RV,
+ VNET_DEV_EVENT_CALL_PORT_OP_NO_WAIT,
+ VNET_DEV_EVENT_CLOCK = ~0
+} __clib_packed vnet_dev_event_t;
+
+typedef struct
+{
+ vnet_dev_event_t event;
+ u8 reply_needed : 1;
+ u32 calling_process_index;
+ union
+ {
+ struct
+ {
+ vnet_dev_port_t *port;
+ vnet_dev_port_cfg_change_req_t *change_req;
+ } port_cfg_change;
+ struct
+ {
+ vnet_dev_op_t *op;
+ } call_op;
+ struct
+ {
+ vnet_dev_op_no_rv_t *op;
+ } call_op_no_rv;
+ struct
+ {
+ vnet_dev_op_no_rv_t *op;
+ } call_op_no_wait;
+ struct
+ {
+ vnet_dev_port_op_t *op;
+ vnet_dev_port_t *port;
+ } call_port_op;
+ struct
+ {
+ vnet_dev_port_op_no_rv_t *op;
+ vnet_dev_port_t *port;
+ } call_port_op_no_rv;
+ struct
+ {
+ vnet_dev_port_op_no_rv_t *op;
+ vnet_dev_port_t *port;
+ } call_port_op_no_wait;
+ };
+} vnet_dev_event_data_t;
+
+static vnet_dev_rv_t
+vnet_dev_process_one_event (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_event_data_t *ed)
+{
+ vnet_dev_port_t *p;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ switch (ed->event)
+ {
+ case VNET_DEV_EVENT_CLOCK:
+ break;
+ case VNET_DEV_EVENT_PROCESS_QUIT:
+ log_debug (dev, "quit requested");
+ dev->process_node_quit = 1;
+ break;
+ case VNET_DEV_EVENT_PERIODIC_START:
+ log_debug (dev, "periodic start");
+ dev->process_node_periodic = 1;
+ break;
+ case VNET_DEV_EVENT_PERIODIC_STOP:
+ log_debug (dev, "periodic stop");
+ dev->process_node_periodic = 0;
+ break;
+ case VNET_DEV_EVENT_PORT_CONFIG_CHANGE_REQ:
+ log_debug (dev, "port config change");
+ p = ed->port_cfg_change.port;
+ rv = vnet_dev_port_cfg_change (vm, p, ed->port_cfg_change.change_req);
+ break;
+ case VNET_DEV_EVENT_CALL_OP:
+ log_debug (dev, "call op");
+ rv = ed->call_op.op (vm, dev);
+ break;
+ case VNET_DEV_EVENT_CALL_OP_NO_RV:
+ log_debug (dev, "call op no rv");
+ ed->call_op_no_rv.op (vm, dev);
+ break;
+ case VNET_DEV_EVENT_CALL_OP_NO_WAIT:
+ log_debug (dev, "call op no wait");
+ ed->call_op_no_wait.op (vm, dev);
+ break;
+ case VNET_DEV_EVENT_CALL_PORT_OP:
+ log_debug (dev, "call port op");
+ rv = ed->call_port_op.op (vm, ed->call_port_op.port);
+ break;
+ case VNET_DEV_EVENT_CALL_PORT_OP_NO_RV:
+ log_debug (dev, "call port op no rv");
+ ed->call_port_op_no_rv.op (vm, ed->call_port_op_no_rv.port);
+ break;
+ case VNET_DEV_EVENT_CALL_PORT_OP_NO_WAIT:
+ log_debug (dev, "call port op no wait");
+ ed->call_port_op_no_wait.op (vm, ed->call_port_op_no_wait.port);
+ break;
+ default:
+ ASSERT (0);
+ }
+ return rv;
+}
+
+static uword
+vnet_dev_process (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_periodic_op_t *pop, *pops = 0;
+ f64 next = CLIB_F64_MAX;
+ vnet_dev_event_data_t *event_data = 0, *new_event_data, *ed;
+
+ vnet_dev_t *dev =
+ *((vnet_dev_t **) vlib_node_get_runtime_data (vm, rt->node_index));
+
+ log_debug (dev, "process '%U' started", format_vlib_node_name, vm,
+ rt->node_index);
+
+ while (dev->process_node_quit == 0)
+ {
+ uword event_type;
+ f64 now = vlib_time_now (vm);
+
+ if (dev->process_node_periodic)
+ vlib_process_wait_for_event_or_clock (vm, next > now ? next - now : 0);
+ else
+ vlib_process_wait_for_event (vm);
+
+ new_event_data = vlib_process_get_event_data (vm, &event_type);
+
+ if (new_event_data)
+ {
+ vec_append (event_data, new_event_data);
+ vlib_process_put_event_data (vm, new_event_data);
+
+ ASSERT (event_type == 0);
+
+ vec_foreach (ed, event_data)
+ {
+ vnet_dev_rv_t rv;
+ rv = vnet_dev_process_one_event (vm, dev, ed);
+ if (ed->reply_needed)
+ vlib_process_signal_event (vm, ed->calling_process_index,
+ ed->event, rv);
+ }
+ vec_reset_length (event_data);
+ }
+
+ next = CLIB_F64_MAX;
+ pool_foreach (pop, dev->periodic_ops)
+ {
+ if (pop->last_run + pop->interval < now)
+ {
+ vec_add1 (pops, *pop);
+ pop->last_run = now;
+ }
+ if (pop->last_run + pop->interval < next)
+ next = pop->last_run + pop->interval;
+ }
+
+ vec_foreach (pop, pops)
+ {
+ switch (pop->type)
+ {
+ case VNET_DEV_PERIODIC_OP_TYPE_DEV:
+ pop->dev_op (vm, pop->dev);
+ break;
+ case VNET_DEV_PERIODIC_OP_TYPE_PORT:
+ pop->port_op (vm, pop->port);
+ break;
+ default:
+ ASSERT (0);
+ }
+ }
+ vec_reset_length (pops);
+ }
+
+ log_debug (dev, "process '%U' quit", format_vlib_node_name, vm,
+ rt->node_index);
+ vlib_node_set_state (vm, rt->node_index, VLIB_NODE_STATE_DISABLED);
+ vlib_node_rename (vm, rt->node_index, "deleted-%u", rt->node_index);
+
+ /* add node index to the freelist */
+ vec_add1 (dm->free_process_node_indices, rt->node_index);
+ vec_free (pops);
+ vec_free (event_data);
+ return 0;
+}
+
+vnet_dev_rv_t
+vnet_dev_process_create (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vlib_node_t *n;
+ uword l;
+
+ l = vec_len (dm->free_process_node_indices);
+ if (l > 0)
+ {
+ n = vlib_get_node (vm, dm->free_process_node_indices[l - 1]);
+ if (n->function != vnet_dev_process)
+ {
+ vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, n->index);
+ n->function = vnet_dev_process;
+ rt->function = vnet_dev_process;
+ }
+ vlib_node_rename (vm, n->index, "%s-process", dev->device_id);
+ vlib_node_set_state (vm, n->index, VLIB_NODE_STATE_POLLING);
+ vec_set_len (dm->free_process_node_indices, l - 1);
+ log_debug (dev, "process node '%U' (%u) reused", format_vlib_node_name,
+ vm, n->index, n->index);
+ }
+ else
+ {
+ vlib_node_registration_t r = {
+ .function = vnet_dev_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .process_log2_n_stack_bytes = 16,
+ .runtime_data_bytes = sizeof (void *),
+ };
+
+ vlib_register_node (vm, &r, "%s-process", dev->device_id);
+
+ n = vlib_get_node (vm, r.index);
+ log_debug (dev, "process node '%U' (%u) created", format_vlib_node_name,
+ vm, r.index, r.index);
+ }
+
+ dev->process_node_index = n->index;
+ *(vnet_dev_t **) vlib_node_get_runtime_data (vm, n->index) = dev;
+ vlib_start_process (vm, n->runtime_index);
+
+ return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_process_event_send (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_event_data_t ed)
+{
+ vnet_dev_event_data_t *edp = vlib_process_signal_event_data (
+ vm, dev->process_node_index, 0, 1, sizeof (ed));
+ *edp = ed;
+}
+
+static vnet_dev_rv_t
+vnet_dev_process_event_send_and_wait (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_event_data_t ed)
+{
+ uword event, *event_data = 0;
+ vnet_dev_rv_t rv;
+
+ ed.calling_process_index = vlib_get_current_process_node_index (vm);
+
+ if (ed.calling_process_index == dev->process_node_index)
+ return vnet_dev_process_one_event (vm, dev, &ed);
+
+ ed.reply_needed = 1;
+ vnet_dev_process_event_send (vm, dev, ed);
+ vlib_process_wait_for_event_or_clock (vm, 5.0);
+ event = vlib_process_get_events (vm, &event_data);
+ if (event != ed.event)
+ {
+ log_err (dev, "%s",
+ event == VNET_DEV_EVENT_CLOCK ?
+ "timeout waiting for process node to respond" :
+ "unexpected event received");
+ rv = VNET_DEV_ERR_PROCESS_REPLY;
+ }
+ else
+ rv = event_data[0];
+ vec_free (event_data);
+ return rv;
+}
+
+void
+vnet_dev_process_quit (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vnet_dev_event_data_t ed = { .event = VNET_DEV_EVENT_PROCESS_QUIT };
+ vnet_dev_process_event_send_and_wait (vm, dev, ed);
+}
+
+static int
+_vnet_dev_poll_add (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_periodic_op_t pop)
+{
+ const vnet_dev_event_data_t ed = { .event = VNET_DEV_EVENT_PERIODIC_START };
+ vnet_dev_periodic_op_t *p;
+
+ pool_foreach (p, dev->periodic_ops)
+ if (p->op == pop.op && p->arg == pop.arg)
+ return 0;
+
+ pool_get_zero (dev->periodic_ops, p);
+ *p = pop;
+ if (pool_elts (dev->periodic_ops) == 1)
+ vnet_dev_process_event_send (vm, dev, ed);
+ return 1;
+}
+
+static int
+_vnet_dev_poll_remove (vlib_main_t *vm, vnet_dev_t *dev, void *op, void *arg)
+{
+ const vnet_dev_event_data_t ed = { .event = VNET_DEV_EVENT_PERIODIC_STOP };
+ vnet_dev_periodic_op_t *pop;
+
+ pool_foreach (pop, dev->periodic_ops)
+ if (pop->op == op && pop->arg == arg)
+ {
+ pool_put (dev->periodic_ops, pop);
+ if (pool_elts (dev->periodic_ops) == 0)
+ vnet_dev_process_event_send (vm, dev, ed);
+ return 1;
+ }
+ return 0;
+}
+
+void
+vnet_dev_poll_dev_add (vlib_main_t *vm, vnet_dev_t *dev, f64 interval,
+ vnet_dev_op_no_rv_t *dev_op)
+{
+ vnet_dev_periodic_op_t pop = {
+ .interval = interval,
+ .type = VNET_DEV_PERIODIC_OP_TYPE_DEV,
+ .dev_op = dev_op,
+ .dev = dev,
+ };
+
+ if (_vnet_dev_poll_add (vm, dev, pop) == 0)
+ log_warn (dev, "poll_dev_add: op already exists, not added");
+}
+
+void
+vnet_dev_poll_dev_remove (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_op_no_rv_t *dev_op)
+{
+ if (_vnet_dev_poll_remove (vm, dev, (void *) dev_op, (void *) dev) == 0)
+ log_warn (dev, "poll_dev_remove: op not found, not removed");
+}
+
+void
+vnet_dev_poll_port_add (vlib_main_t *vm, vnet_dev_port_t *port, f64 interval,
+ vnet_dev_port_op_no_rv_t *port_op)
+{
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_periodic_op_t pop = {
+ .interval = interval,
+ .type = VNET_DEV_PERIODIC_OP_TYPE_PORT,
+ .port_op = port_op,
+ .port = port,
+ };
+
+ if (_vnet_dev_poll_add (vm, dev, pop) == 0)
+ log_warn (dev, "poll_port_add: op already exists, not added");
+}
+
+void
+vnet_dev_poll_port_remove (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_op_no_rv_t *port_op)
+{
+ vnet_dev_t *dev = port->dev;
+ if (_vnet_dev_poll_remove (vm, dev, (void *) port_op, (void *) port) == 0)
+ log_warn (dev, "poll_port_remove: op not found, not removed");
+}
+
+vnet_dev_rv_t
+vnet_dev_process_port_cfg_change_req (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *pccr)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_PORT_CONFIG_CHANGE_REQ,
+ .port_cfg_change = {
+ .port = port,
+ .change_req = pccr,
+ },
+ };
+
+ return vnet_dev_process_event_send_and_wait (vm, port->dev, ed);
+}
+
+vnet_dev_rv_t
+vnet_dev_process_call_op (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_op_t *op)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_CALL_OP,
+ .call_op.op = op,
+ };
+
+ return vnet_dev_process_event_send_and_wait (vm, dev, ed);
+}
+
+vnet_dev_rv_t
+vnet_dev_process_call_op_no_rv (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_op_no_rv_t *op)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_CALL_OP_NO_RV,
+ .call_op_no_rv.op = op,
+ };
+
+ return vnet_dev_process_event_send_and_wait (vm, dev, ed);
+}
+
+void
+vnet_dev_process_call_op_no_wait (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_op_no_rv_t *op)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_CALL_OP_NO_WAIT,
+ .call_op_no_rv.op = op,
+ };
+
+ vnet_dev_process_event_send (vm, dev, ed);
+}
+
+vnet_dev_rv_t
+vnet_dev_process_call_port_op (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_op_t *op)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_CALL_PORT_OP,
+ .call_port_op = { .op = op, .port = port },
+ };
+
+ return vnet_dev_process_event_send_and_wait (vm, port->dev, ed);
+}
+
+vnet_dev_rv_t
+vnet_dev_process_call_port_op_no_rv (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_op_no_rv_t *op)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_CALL_PORT_OP_NO_RV,
+ .call_port_op_no_rv = { .op = op, .port = port },
+ };
+
+ return vnet_dev_process_event_send_and_wait (vm, port->dev, ed);
+}
+
+void
+vnet_dev_process_call_port_op_no_wait (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_op_no_rv_t *op)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_CALL_PORT_OP_NO_WAIT,
+ .call_port_op_no_wait = { .op = op, .port = port },
+ };
+
+ vnet_dev_process_event_send (vm, port->dev, ed);
+}
diff --git a/src/vnet/dev/process.h b/src/vnet/dev/process.h
new file mode 100644
index 00000000000..9223973dffc
--- /dev/null
+++ b/src/vnet/dev/process.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_PROCESS_H_
+#define _VNET_DEV_PROCESS_H_
+
+#include <vppinfra/clib.h>
+
+#endif /* _VNET_DEV_PROCESS_H_ */
diff --git a/src/vnet/dev/queue.c b/src/vnet/dev/queue.c
new file mode 100644
index 00000000000..9a016a626fb
--- /dev/null
+++ b/src/vnet/dev/queue.c
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "error",
+};
+
+void
+vnet_dev_rx_queue_free (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_port_t *port = rxq->port;
+ vnet_dev_t *dev = port->dev;
+ log_debug (dev, "queue %u", rxq->queue_id);
+ if (port->rx_queue_ops.free)
+ port->rx_queue_ops.free (vm, rxq);
+
+ vnet_dev_rx_queue_free_counters (vm, rxq);
+ pool_put_index (port->rx_queues, rxq->index);
+ clib_mem_free (rxq);
+}
+
+vnet_dev_rv_t
+vnet_dev_rx_queue_alloc (vlib_main_t *vm, vnet_dev_port_t *port,
+ u16 queue_size)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_rx_queue_t *rxq, **qp;
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ u16 n_threads = vlib_get_n_threads ();
+ u8 buffer_pool_index;
+
+ vnet_dev_port_validate (vm, port);
+
+ log_debug (dev, "port %u queue_size %u", port->port_id, queue_size);
+
+ if (pool_elts (port->rx_queues) == port->attr.max_rx_queues)
+ return VNET_DEV_ERR_NO_AVAIL_QUEUES;
+
+ rxq = vnet_dev_alloc_with_data (sizeof (vnet_dev_port_t),
+ port->rx_queue_config.data_size);
+ pool_get (port->rx_queues, qp);
+ qp[0] = rxq;
+ rxq->enabled = 1;
+ rxq->port = port;
+ rxq->size = queue_size;
+ rxq->index = qp - port->rx_queues;
+
+ /* default queue id - can be changed by driver */
+ rxq->queue_id = qp - port->rx_queues;
+ ASSERT (rxq->queue_id < port->attr.max_rx_queues);
+
+ if (n_threads > 1)
+ {
+ rxq->rx_thread_index = dm->next_rx_queue_thread++;
+ if (dm->next_rx_queue_thread >= n_threads)
+ dm->next_rx_queue_thread = 1;
+ }
+
+ buffer_pool_index =
+ vlib_buffer_pool_get_default_for_numa (vm, dev->numa_node);
+ vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index);
+
+ rxq->buffer_template = bp->buffer_template;
+ vnet_buffer (&rxq->buffer_template)->sw_if_index[VLIB_TX] = ~0;
+
+ rxq->next_index = vnet_dev_default_next_index_by_port_type[port->attr.type];
+
+ if (port->rx_queue_ops.alloc)
+ rv = port->rx_queue_ops.alloc (vm, rxq);
+
+ if (rv != VNET_DEV_OK)
+ {
+ log_err (dev, "driver rejected rx queue add with rv %d", rv);
+ vnet_dev_rx_queue_free (vm, rxq);
+ }
+ else
+ log_debug (dev, "queue %u added, assigned to thread %u", rxq->queue_id,
+ rxq->rx_thread_index);
+
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_rx_queue_start (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ if (rxq->port->rx_queue_ops.start)
+ rv = rxq->port->rx_queue_ops.start (vm, rxq);
+
+ if (rv == VNET_DEV_OK)
+ rxq->started = 1;
+
+ return rv;
+}
+
+void
+vnet_dev_rx_queue_stop (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ if (rxq->port->rx_queue_ops.stop)
+ rxq->port->rx_queue_ops.stop (vm, rxq);
+ vlib_node_set_state (vm, rxq->port->intf.rx_node_index,
+ VLIB_NODE_STATE_DISABLED);
+ rxq->started = 0;
+}
+
+void
+vnet_dev_tx_queue_free (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ vnet_dev_port_t *port = txq->port;
+ vnet_dev_t *dev = port->dev;
+
+ vnet_dev_port_validate (vm, port);
+
+ log_debug (dev, "queue %u", txq->queue_id);
+ if (port->tx_queue_ops.free)
+ port->tx_queue_ops.free (vm, txq);
+
+ clib_bitmap_free (txq->assigned_threads);
+ vnet_dev_tx_queue_free_counters (vm, txq);
+ pool_put_index (port->tx_queues, txq->index);
+ clib_mem_free (txq);
+}
+
+vnet_dev_rv_t
+vnet_dev_tx_queue_alloc (vlib_main_t *vm, vnet_dev_port_t *port,
+ u16 queue_size)
+{
+ vnet_dev_tx_queue_t *txq, **qp;
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ log_debug (dev, "port %u size %u", port->port_id, queue_size);
+
+ if (pool_elts (port->tx_queues) == port->attr.max_tx_queues)
+ return VNET_DEV_ERR_NO_AVAIL_QUEUES;
+
+ txq = vnet_dev_alloc_with_data (sizeof (vnet_dev_port_t),
+ port->tx_queue_config.data_size);
+ pool_get (port->tx_queues, qp);
+ qp[0] = txq;
+ txq->enabled = 1;
+ txq->port = port;
+ txq->size = queue_size;
+ txq->index = qp - port->tx_queues;
+
+ /* default queue id - can be changed by driver */
+ txq->queue_id = qp - port->tx_queues;
+ ASSERT (txq->queue_id < port->attr.max_tx_queues);
+
+ if (port->tx_queue_ops.alloc)
+ rv = port->tx_queue_ops.alloc (vm, txq);
+
+ if (rv != VNET_DEV_OK)
+ {
+ log_err (dev, "driver rejected tx queue alloc with rv %d", rv);
+ vnet_dev_tx_queue_free (vm, txq);
+ }
+ else
+ log_debug (dev, "queue %u added", txq->queue_id);
+
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_tx_queue_start (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ if (txq->port->tx_queue_ops.start)
+ rv = txq->port->tx_queue_ops.start (vm, txq);
+
+ if (rv == VNET_DEV_OK)
+ txq->started = 1;
+
+ return rv;
+}
+
+void
+vnet_dev_tx_queue_stop (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ if (txq->port->tx_queue_ops.stop)
+ txq->port->tx_queue_ops.stop (vm, txq);
+ txq->started = 0;
+}
+
+void
+vnet_dev_rx_queue_add_counters (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq,
+ vnet_dev_counter_t *counters, u16 n_counters)
+{
+ rxq->counter_main = vnet_dev_counters_alloc (
+ vm, counters, n_counters, "%s port %u rx-queue %u counters",
+ rxq->port->dev->device_id, rxq->port->port_id, rxq->queue_id);
+}
+
+void
+vnet_dev_rx_queue_free_counters (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ if (rxq->counter_main)
+ vnet_dev_counters_free (vm, rxq->counter_main);
+}
+
+void
+vnet_dev_tx_queue_add_counters (vlib_main_t *vm, vnet_dev_tx_queue_t *txq,
+ vnet_dev_counter_t *counters, u16 n_counters)
+{
+ txq->counter_main = vnet_dev_counters_alloc (
+ vm, counters, n_counters, "%s port %u tx-queue %u counters",
+ txq->port->dev->device_id, txq->port->port_id, txq->queue_id);
+}
+
+void
+vnet_dev_tx_queue_free_counters (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ if (!txq->counter_main)
+ return;
+
+ log_debug (txq->port->dev, "free");
+ vnet_dev_counters_free (vm, txq->counter_main);
+}
diff --git a/src/vnet/dev/runtime.c b/src/vnet/dev/runtime.c
new file mode 100644
index 00000000000..79c55cfbd53
--- /dev/null
+++ b/src/vnet/dev/runtime.c
@@ -0,0 +1,180 @@
+
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/bitmap.h"
+#include "vppinfra/lock.h"
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "runtime",
+};
+
+static vnet_dev_rt_op_t *rt_ops;
+
+static void
+_vnet_dev_rt_exec_op (vlib_main_t *vm, vnet_dev_rt_op_t *op)
+{
+ vnet_dev_port_t *port = op->port;
+ vnet_dev_rx_queue_t *previous = 0, *first = 0;
+ vnet_dev_rx_node_runtime_t *rtd;
+ vlib_node_state_t state = VLIB_NODE_STATE_DISABLED;
+ u32 node_index = port->intf.rx_node_index;
+
+ rtd = vlib_node_get_runtime_data (vm, node_index);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ if (q->rx_thread_index != vm->thread_index)
+ continue;
+
+ if (q->interrupt_mode == 0)
+ state = VLIB_NODE_STATE_POLLING;
+ else if (state != VLIB_NODE_STATE_POLLING)
+ state = VLIB_NODE_STATE_INTERRUPT;
+
+ q->next_on_thread = 0;
+ if (previous == 0)
+ first = q;
+ else
+ previous->next_on_thread = q;
+
+ previous = q;
+ }
+
+ rtd->first_rx_queue = first;
+ vlib_node_set_state (vm, port->intf.rx_node_index, state);
+ __atomic_store_n (&op->completed, 1, __ATOMIC_RELEASE);
+}
+
+static uword
+vnet_dev_rt_mgmt_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u16 thread_index = vm->thread_index;
+ vnet_dev_rt_op_t *op, *ops = __atomic_load_n (&rt_ops, __ATOMIC_ACQUIRE);
+ u32 n_pending = 0;
+ uword rv = 0;
+
+ vec_foreach (op, ops)
+ {
+ if (!op->completed && op->thread_index == thread_index)
+ {
+ if (op->in_order == 1 && n_pending)
+ {
+ vlib_node_set_interrupt_pending (vm, node->node_index);
+ return rv;
+ }
+ _vnet_dev_rt_exec_op (vm, op);
+ rv++;
+ }
+
+ if (op->completed == 0)
+ n_pending++;
+ }
+
+ return rv;
+}
+
+VLIB_REGISTER_NODE (vnet_dev_rt_mgmt_node, static) = {
+ .function = vnet_dev_rt_mgmt_node_fn,
+ .name = "dev-rt-mgmt",
+ .type = VLIB_NODE_TYPE_PRE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+};
+
+vnet_dev_rv_t
+vnet_dev_rt_exec_ops (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_rt_op_t *ops,
+ u32 n_ops)
+{
+ vnet_dev_rt_op_t *op = ops;
+ vnet_dev_rt_op_t *remote_ops = 0;
+ clib_bitmap_t *remote_bmp = 0;
+ u32 i;
+
+ ASSERT (rt_ops == 0);
+
+ if (vlib_worker_thread_barrier_held ())
+ {
+ for (op = ops; op < (ops + n_ops); op++)
+ {
+ vlib_main_t *tvm = vlib_get_main_by_index (op->thread_index);
+ _vnet_dev_rt_exec_op (tvm, op);
+ log_debug (
+ dev,
+ "port %u rx node runtime update on thread %u executed locally",
+ op->port->port_id, op->thread_index);
+ }
+ return VNET_DEV_OK;
+ }
+
+ while (n_ops)
+ {
+ if (op->thread_index != vm->thread_index)
+ break;
+
+ _vnet_dev_rt_exec_op (vm, op);
+ log_debug (
+ dev, "port %u rx node runtime update on thread %u executed locally",
+ op->port->port_id, op->thread_index);
+ op++;
+ n_ops--;
+ }
+
+ if (n_ops == 0)
+ return VNET_DEV_OK;
+
+ for (op = ops; op < (ops + n_ops); op++)
+ {
+ if (op->thread_index == vm->thread_index &&
+ (op->in_order == 0 || vec_len (remote_ops) == 0))
+ {
+ _vnet_dev_rt_exec_op (vm, op);
+ log_debug (dev,
+ "port %u rx node runtime update on thread "
+ "%u executed locally",
+ op->port->port_id, op->thread_index);
+ }
+ else
+ {
+ vec_add1 (remote_ops, *op);
+ log_debug (dev,
+ "port %u rx node runtime update on thread %u "
+ "enqueued for remote execution",
+ op->port->port_id, op->thread_index);
+ remote_bmp = clib_bitmap_set (remote_bmp, op->thread_index, 1);
+ }
+ }
+
+ if (remote_ops == 0)
+ return VNET_DEV_OK;
+
+ __atomic_store_n (&rt_ops, remote_ops, __ATOMIC_RELEASE);
+
+ clib_bitmap_foreach (i, remote_bmp)
+ {
+ vlib_node_set_interrupt_pending (vlib_get_main_by_index (i),
+ vnet_dev_rt_mgmt_node.index);
+ log_debug (dev, "interrupt sent to %s node on thread %u",
+ vnet_dev_rt_mgmt_node.name, i);
+ }
+
+ vec_foreach (op, remote_ops)
+ {
+ while (op->completed == 0)
+ vlib_process_suspend (vm, 5e-5);
+
+ log_debug (
+ dev, "port %u rx node runtime update on thread %u executed locally",
+ op->port->port_id, op->thread_index);
+ }
+
+ __atomic_store_n (&rt_ops, 0, __ATOMIC_RELAXED);
+ vec_free (remote_ops);
+ clib_bitmap_free (remote_bmp);
+ return VNET_DEV_OK;
+}
diff --git a/src/vnet/dev/types.h b/src/vnet/dev/types.h
new file mode 100644
index 00000000000..006d18e5bc5
--- /dev/null
+++ b/src/vnet/dev/types.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_TYPES_H_
+#define _VNET_DEV_TYPES_H_
+
+#include <vppinfra/types.h>
+#include <vnet/dev/errors.h>
+
+typedef char vnet_dev_device_id_t[48];
+typedef char vnet_dev_if_name_t[32];
+typedef char vnet_dev_driver_name_t[16];
+typedef char vnet_dev_bus_name_t[8];
+typedef u16 vnet_dev_port_id_t;
+typedef struct vnet_dev vnet_dev_t;
+typedef struct vnet_dev_port vnet_dev_port_t;
+typedef struct vnet_dev_rx_queue vnet_dev_rx_queue_t;
+typedef struct vnet_dev_tx_queue vnet_dev_tx_queue_t;
+
+typedef enum
+{
+ VNET_DEV_MINUS_OK = 0,
+#define _(n, d) VNET_DEV_ERR_MINUS_##n,
+ foreach_vnet_dev_rv_type
+#undef _
+} vnet_dev_minus_rv_t;
+
+typedef enum
+{
+ VNET_DEV_OK = 0,
+#define _(n, d) VNET_DEV_ERR_##n = -(VNET_DEV_ERR_MINUS_##n),
+ foreach_vnet_dev_rv_type
+#undef _
+} vnet_dev_rv_t;
+
+/* do not change bit assignments - API dependency */
+#define foreach_vnet_dev_flag _ (0, NO_STATS, "don't poll device stats")
+
+typedef union
+{
+ enum
+ {
+#define _(b, n, d) VNET_DEV_F_##n = 1ull << (b),
+ foreach_vnet_dev_flag
+#undef _
+ } e;
+ u32 n;
+} vnet_dev_flags_t;
+
+/* do not change bit assignments - API dependency */
+#define foreach_vnet_dev_port_flag \
+ _ (0, INTERRUPT_MODE, "enable interrupt mode")
+
+typedef union
+{
+ enum
+ {
+#define _(b, n, d) VNET_DEV_PORT_F_##n = 1ull << (b),
+ foreach_vnet_dev_port_flag
+#undef _
+ } e;
+ u32 n;
+} vnet_dev_port_flags_t;
+
+#endif /* _VNET_DEV_TYPES_H_ */
diff --git a/src/vnet/devices/af_packet/FEATURE.yaml b/src/vnet/devices/af_packet/FEATURE.yaml
deleted file mode 100644
index 25d8b2b5964..00000000000
--- a/src/vnet/devices/af_packet/FEATURE.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
----
-name: host-interface Device AF_PACKET
-maintainer: Damjan Marion <damarion@cisco.com>
-features:
- - L4 checksum offload
-description: "Create a host interface that will attach to a linux AF_PACKET
- interface, one side of a veth pair. The veth pair must
- already exist. Once created, a new host interface will
- exist in VPP with the name 'host-<ifname>', where '<ifname>'
- is the name of the specified veth pair. Use the 'show interface'
- command to display host interface details."
-missing:
- - API dump details beyond sw_if_index and name
-state: production
-properties: [API, CLI, STATS, MULTITHREAD]
diff --git a/src/vnet/devices/af_packet/af_packet.api b/src/vnet/devices/af_packet/af_packet.api
deleted file mode 100644
index c7a81c58f65..00000000000
--- a/src/vnet/devices/af_packet/af_packet.api
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2015-2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-option version = "2.0.0";
-
-import "vnet/interface_types.api";
-import "vnet/ethernet/ethernet_types.api";
-
-/** \brief Create host-interface
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param hw_addr - interface MAC
- @param use_random_hw_addr - use random generated MAC
- @param host_if_name - interface name
-*/
-define af_packet_create
-{
- u32 client_index;
- u32 context;
-
- vl_api_mac_address_t hw_addr;
- bool use_random_hw_addr;
- string host_if_name[64];
-};
-
-/** \brief Create host-interface response
- @param context - sender context, to match reply w/ request
- @param retval - return value for request
-*/
-define af_packet_create_reply
-{
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
-};
-
-/** \brief Create host-interface
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param hw_addr - interface MAC
- @param use_random_hw_addr - use random generated MAC
- @param host_if_name - interface name
- @param rx_frame_size - frame size for RX
- @param tx_frame_size - frame size for TX
- @param rx_frames_per_block - frames per block for RX
- @param tx_frames_per_block - frames per block for TX
- @param flags - flags for the af_packet interface creation
- @param num_rx_queues - number of rx queues
-*/
-define af_packet_create_v2
-{
- u32 client_index;
- u32 context;
-
- vl_api_mac_address_t hw_addr;
- bool use_random_hw_addr;
- string host_if_name[64];
- u32 rx_frame_size;
- u32 tx_frame_size;
- u32 rx_frames_per_block;
- u32 tx_frames_per_block;
- u32 flags;
- u16 num_rx_queues [default=1];
-};
-
-/** \brief Create host-interface response
- @param context - sender context, to match reply w/ request
- @param retval - return value for request
-*/
-define af_packet_create_v2_reply
-{
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
-};
-
-/** \brief Delete host-interface
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param host_if_name - interface name
-*/
-autoreply define af_packet_delete
-{
- u32 client_index;
- u32 context;
-
- string host_if_name[64];
-};
-
-/** \brief Set l4 offload checksum calculation
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
-*/
-autoreply define af_packet_set_l4_cksum_offload
-{
- u32 client_index;
- u32 context;
-
- vl_api_interface_index_t sw_if_index;
- bool set;
-};
-
-/** \brief Dump af_packet interfaces request */
-define af_packet_dump
-{
- u32 client_index;
- u32 context;
-};
-
-/** \brief Reply for af_packet dump request
- @param sw_if_index - software index of af_packet interface
- @param host_if_name - interface name
-*/
-define af_packet_details
-{
- u32 context;
- vl_api_interface_index_t sw_if_index;
- string host_if_name[64];
-};
-
-/*
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c
deleted file mode 100644
index 69e3c871412..00000000000
--- a/src/vnet/devices/af_packet/af_packet.c
+++ /dev/null
@@ -1,607 +0,0 @@
-/*
- *------------------------------------------------------------------
- * af_packet.c - linux kernel packet interface
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <sys/ioctl.h>
-#include <net/if.h>
-#include <dirent.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <fcntl.h>
-
-#include <vppinfra/linux/sysfs.h>
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-#include <vnet/ip/ip.h>
-#include <vnet/devices/netlink.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/interface/rx_queue_funcs.h>
-
-#include <vnet/devices/af_packet/af_packet.h>
-
-af_packet_main_t af_packet_main;
-
-VNET_HW_INTERFACE_CLASS (af_packet_ip_device_hw_interface_class, static) = {
- .name = "af-packet-ip-device",
- .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
-};
-
-#define AF_PACKET_DEFAULT_TX_FRAMES_PER_BLOCK 1024
-#define AF_PACKET_DEFAULT_TX_FRAME_SIZE (2048 * 5)
-#define AF_PACKET_TX_BLOCK_NR 1
-
-#define AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK 1024
-#define AF_PACKET_DEFAULT_RX_FRAME_SIZE (2048 * 5)
-#define AF_PACKET_RX_BLOCK_NR 1
-
-/*defined in net/if.h but clashes with dpdk headers */
-unsigned int if_nametoindex (const char *ifname);
-
-typedef struct tpacket_req tpacket_req_t;
-
-static u32
-af_packet_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi,
- u32 flags)
-{
- clib_error_t *error;
- af_packet_main_t *apm = &af_packet_main;
- af_packet_if_t *apif =
- pool_elt_at_index (apm->interfaces, hi->dev_instance);
-
- if (flags == ETHERNET_INTERFACE_FLAG_MTU)
- {
- error =
- vnet_netlink_set_link_mtu (apif->host_if_index, hi->max_packet_bytes);
-
- if (error)
- {
- vlib_log_err (apm->log_class, "netlink failed to change MTU: %U",
- format_clib_error, error);
- clib_error_free (error);
- return VNET_API_ERROR_SYSCALL_ERROR_1;
- }
- else
- apif->host_mtu = hi->max_packet_bytes;
- }
-
- return 0;
-}
-
-static int
-af_packet_read_mtu (af_packet_if_t *apif)
-{
- af_packet_main_t *apm = &af_packet_main;
- clib_error_t *error;
- error = vnet_netlink_get_link_mtu (apif->host_if_index, &apif->host_mtu);
- if (error)
- {
- vlib_log_err (apm->log_class, "netlink failed to get MTU: %U",
- format_clib_error, error);
- clib_error_free (error);
- return VNET_API_ERROR_SYSCALL_ERROR_1;
- }
- return 0;
-}
-
-static clib_error_t *
-af_packet_fd_read_ready (clib_file_t * uf)
-{
- af_packet_main_t *apm = &af_packet_main;
- vnet_main_t *vnm = vnet_get_main ();
- u32 idx = uf->private_data;
- af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, idx);
-
- apm->pending_input_bitmap =
- clib_bitmap_set (apm->pending_input_bitmap, idx, 1);
-
- /* Schedule the rx node */
- vnet_hw_if_rx_queue_set_int_pending (vnm, apif->queue_index);
- return 0;
-}
-
-static int
-is_bridge (const u8 * host_if_name)
-{
- u8 *s;
- DIR *dir = NULL;
-
- s = format (0, "/sys/class/net/%s/bridge%c", host_if_name, 0);
- dir = opendir ((char *) s);
- vec_free (s);
-
- if (dir)
- {
- closedir (dir);
- return 0;
- }
-
- return -1;
-}
-
-static int
-create_packet_v2_sock (int host_if_index, tpacket_req_t * rx_req,
- tpacket_req_t * tx_req, int *fd, u8 ** ring)
-{
- af_packet_main_t *apm = &af_packet_main;
- int ret;
- struct sockaddr_ll sll;
- int ver = TPACKET_V2;
- socklen_t req_sz = sizeof (struct tpacket_req);
- u32 ring_sz = rx_req->tp_block_size * rx_req->tp_block_nr +
- tx_req->tp_block_size * tx_req->tp_block_nr;
-
- if ((*fd = socket (AF_PACKET, SOCK_RAW, htons (ETH_P_ALL))) < 0)
- {
- vlib_log_debug (apm->log_class,
- "Failed to create AF_PACKET socket: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- /* bind before rx ring is cfged so we don't receive packets from other interfaces */
- clib_memset (&sll, 0, sizeof (sll));
- sll.sll_family = PF_PACKET;
- sll.sll_protocol = htons (ETH_P_ALL);
- sll.sll_ifindex = host_if_index;
- if (bind (*fd, (struct sockaddr *) &sll, sizeof (sll)) < 0)
- {
- vlib_log_debug (apm->log_class,
- "Failed to bind rx packet socket: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- if (setsockopt (*fd, SOL_PACKET, PACKET_VERSION, &ver, sizeof (ver)) < 0)
- {
- vlib_log_debug (apm->log_class,
- "Failed to set rx packet interface version: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- int opt = 1;
- if (setsockopt (*fd, SOL_PACKET, PACKET_LOSS, &opt, sizeof (opt)) < 0)
- {
- vlib_log_debug (apm->log_class,
- "Failed to set packet tx ring error handling option: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- if (setsockopt (*fd, SOL_PACKET, PACKET_QDISC_BYPASS, &opt, sizeof (opt)) <
- 0)
- {
- vlib_log_debug (apm->log_class,
- "Failed to set qdisc bypass error "
- "handling option: %s (errno %d)",
- strerror (errno), errno);
- }
-
- if (setsockopt (*fd, SOL_PACKET, PACKET_RX_RING, rx_req, req_sz) < 0)
- {
- vlib_log_debug (apm->log_class,
- "Failed to set packet rx ring options: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- if (setsockopt (*fd, SOL_PACKET, PACKET_TX_RING, tx_req, req_sz) < 0)
- {
- vlib_log_debug (apm->log_class,
- "Failed to set packet tx ring options: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- *ring =
- mmap (NULL, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, *fd,
- 0);
- if (*ring == MAP_FAILED)
- {
- vlib_log_debug (apm->log_class, "mmap failure: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- return 0;
-error:
- if (*fd >= 0)
- {
- close (*fd);
- *fd = -1;
- }
- return ret;
-}
-
-int
-af_packet_create_if (af_packet_create_if_arg_t *arg)
-{
- af_packet_main_t *apm = &af_packet_main;
- vlib_main_t *vm = vlib_get_main ();
- int ret, fd = -1, fd2 = -1;
- struct tpacket_req *rx_req = 0;
- struct tpacket_req *tx_req = 0;
- struct ifreq ifr;
- u8 *ring = 0;
- af_packet_if_t *apif = 0;
- u8 hw_addr[6];
- clib_error_t *error;
- vnet_sw_interface_t *sw;
- vnet_hw_interface_t *hw;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- vnet_main_t *vnm = vnet_get_main ();
- uword *p;
- uword if_index;
- u8 *host_if_name_dup = 0;
- int host_if_index = -1;
- u32 rx_frames_per_block, tx_frames_per_block;
- u32 rx_frame_size, tx_frame_size;
-
- p = mhash_get (&apm->if_index_by_host_if_name, arg->host_if_name);
- if (p)
- {
- apif = vec_elt_at_index (apm->interfaces, p[0]);
- arg->sw_if_index = apif->sw_if_index;
- return VNET_API_ERROR_IF_ALREADY_EXISTS;
- }
-
- host_if_name_dup = vec_dup (arg->host_if_name);
-
- rx_frames_per_block = arg->rx_frames_per_block ?
- arg->rx_frames_per_block :
- AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK;
- tx_frames_per_block = arg->tx_frames_per_block ?
- arg->tx_frames_per_block :
- AF_PACKET_DEFAULT_TX_FRAMES_PER_BLOCK;
- rx_frame_size =
- arg->rx_frame_size ? arg->rx_frame_size : AF_PACKET_DEFAULT_RX_FRAME_SIZE;
- tx_frame_size =
- arg->tx_frame_size ? arg->tx_frame_size : AF_PACKET_DEFAULT_TX_FRAME_SIZE;
-
- vec_validate (rx_req, 0);
- rx_req->tp_block_size = rx_frame_size * rx_frames_per_block;
- rx_req->tp_frame_size = rx_frame_size;
- rx_req->tp_block_nr = AF_PACKET_RX_BLOCK_NR;
- rx_req->tp_frame_nr = AF_PACKET_RX_BLOCK_NR * rx_frames_per_block;
-
- vec_validate (tx_req, 0);
- tx_req->tp_block_size = tx_frame_size * tx_frames_per_block;
- tx_req->tp_frame_size = tx_frame_size;
- tx_req->tp_block_nr = AF_PACKET_TX_BLOCK_NR;
- tx_req->tp_frame_nr = AF_PACKET_TX_BLOCK_NR * tx_frames_per_block;
-
- /*
- * make sure host side of interface is 'UP' before binding AF_PACKET
- * socket on it.
- */
- if ((fd2 = socket (AF_UNIX, SOCK_DGRAM, 0)) < 0)
- {
- vlib_log_debug (apm->log_class,
- "Failed to create AF_UNIX socket: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- clib_memcpy (ifr.ifr_name, (const char *) arg->host_if_name,
- vec_len (arg->host_if_name));
- if (ioctl (fd2, SIOCGIFINDEX, &ifr) < 0)
- {
- vlib_log_debug (
- apm->log_class,
- "Failed to retrieve the interface (%s) index: %s (errno %d)",
- arg->host_if_name, strerror (errno), errno);
- ret = VNET_API_ERROR_INVALID_INTERFACE;
- goto error;
- }
-
- host_if_index = ifr.ifr_ifindex;
- if (ioctl (fd2, SIOCGIFFLAGS, &ifr) < 0)
- {
- vlib_log_debug (apm->log_class,
- "Failed to get the active flag: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- if (!(ifr.ifr_flags & IFF_UP))
- {
- ifr.ifr_flags |= IFF_UP;
- if (ioctl (fd2, SIOCSIFFLAGS, &ifr) < 0)
- {
- vlib_log_debug (apm->log_class,
- "Failed to set the active flag: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
- }
-
- if (fd2 > -1)
- {
- close (fd2);
- fd2 = -1;
- }
-
- ret = create_packet_v2_sock (host_if_index, rx_req, tx_req, &fd, &ring);
-
- if (ret != 0)
- goto error;
-
- ret = is_bridge (arg->host_if_name);
-
- if (ret == 0) /* is a bridge, ignore state */
- host_if_index = -1;
-
- /* So far everything looks good, let's create interface */
- pool_get (apm->interfaces, apif);
- if_index = apif - apm->interfaces;
-
- apif->host_if_index = host_if_index;
- apif->fd = fd;
- apif->rx_ring = ring;
- apif->tx_ring = ring + rx_req->tp_block_size * rx_req->tp_block_nr;
- apif->rx_req = rx_req;
- apif->tx_req = tx_req;
- apif->host_if_name = host_if_name_dup;
- apif->per_interface_next_index = ~0;
- apif->next_tx_frame = 0;
- apif->next_rx_frame = 0;
- apif->mode = arg->mode;
-
- ret = af_packet_read_mtu (apif);
- if (ret != 0)
- goto error;
-
- if (tm->n_vlib_mains > 1)
- clib_spinlock_init (&apif->lockp);
-
- if (apif->mode == AF_PACKET_IF_MODE_ETHERNET)
- {
- /*use configured or generate random MAC address */
- if (arg->hw_addr)
- clib_memcpy (hw_addr, arg->hw_addr, 6);
- else
- {
- f64 now = vlib_time_now (vm);
- u32 rnd;
- rnd = (u32) (now * 1e6);
- rnd = random_u32 (&rnd);
-
- clib_memcpy (hw_addr + 2, &rnd, sizeof (rnd));
- hw_addr[0] = 2;
- hw_addr[1] = 0xfe;
- }
-
- error = ethernet_register_interface (
- vnm, af_packet_device_class.index, if_index, hw_addr,
- &apif->hw_if_index, af_packet_eth_flag_change);
-
- if (error)
- {
- clib_memset (apif, 0, sizeof (*apif));
- pool_put (apm->interfaces, apif);
- vlib_log_err (apm->log_class, "Unable to register interface: %U",
- format_clib_error, error);
- clib_error_free (error);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
- }
- else
- {
- apif->hw_if_index = vnet_register_interface (
- vnm, af_packet_device_class.index, if_index,
- af_packet_ip_device_hw_interface_class.index, if_index);
- }
- sw = vnet_get_hw_sw_interface (vnm, apif->hw_if_index);
- hw = vnet_get_hw_interface (vnm, apif->hw_if_index);
- apif->sw_if_index = sw->sw_if_index;
- vnet_hw_if_set_input_node (vnm, apif->hw_if_index,
- af_packet_input_node.index);
- apif->queue_index = vnet_hw_if_register_rx_queue (vnm, apif->hw_if_index, 0,
- VNET_HW_IF_RXQ_THREAD_ANY);
-
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
- vnet_hw_interface_set_flags (vnm, apif->hw_if_index,
- VNET_HW_INTERFACE_FLAG_LINK_UP);
-
- vnet_hw_if_set_rx_queue_mode (vnm, apif->queue_index,
- VNET_HW_IF_RX_MODE_INTERRUPT);
- vnet_hw_if_update_runtime_data (vnm, apif->hw_if_index);
- {
- clib_file_t template = { 0 };
- template.read_function = af_packet_fd_read_ready;
- template.file_descriptor = fd;
- template.private_data = if_index;
- template.flags = UNIX_FILE_EVENT_EDGE_TRIGGERED;
- template.description =
- format (0, "%U", format_af_packet_device_name, if_index);
- apif->clib_file_index = clib_file_add (&file_main, &template);
- }
- vnet_hw_if_set_rx_queue_file_index (vnm, apif->queue_index,
- apif->clib_file_index);
-
- mhash_set_mem (&apm->if_index_by_host_if_name, host_if_name_dup, &if_index,
- 0);
- arg->sw_if_index = apif->sw_if_index;
-
- return 0;
-
-error:
- if (fd2 > -1)
- {
- close (fd2);
- fd2 = -1;
- }
- vec_free (host_if_name_dup);
- vec_free (rx_req);
- vec_free (tx_req);
- return ret;
-}
-
-int
-af_packet_delete_if (u8 *host_if_name)
-{
- vnet_main_t *vnm = vnet_get_main ();
- af_packet_main_t *apm = &af_packet_main;
- af_packet_if_t *apif;
- uword *p;
- uword if_index;
- u32 ring_sz;
-
- p = mhash_get (&apm->if_index_by_host_if_name, host_if_name);
- if (p == NULL)
- {
- vlib_log_warn (apm->log_class, "Host interface %s does not exist",
- host_if_name);
- return VNET_API_ERROR_SYSCALL_ERROR_1;
- }
- apif = pool_elt_at_index (apm->interfaces, p[0]);
- if_index = apif - apm->interfaces;
-
- /* bring down the interface */
- vnet_hw_interface_set_flags (vnm, apif->hw_if_index, 0);
-
- /* clean up */
- if (apif->clib_file_index != ~0)
- {
- clib_file_del (&file_main, file_main.file_pool + apif->clib_file_index);
- apif->clib_file_index = ~0;
- }
- else
- close (apif->fd);
-
- ring_sz = apif->rx_req->tp_block_size * apif->rx_req->tp_block_nr +
- apif->tx_req->tp_block_size * apif->tx_req->tp_block_nr;
- if (munmap (apif->rx_ring, ring_sz))
- vlib_log_warn (apm->log_class,
- "Host interface %s could not free rx/tx ring",
- host_if_name);
- apif->rx_ring = NULL;
- apif->tx_ring = NULL;
- apif->fd = -1;
-
- vec_free (apif->rx_req);
- apif->rx_req = NULL;
- vec_free (apif->tx_req);
- apif->tx_req = NULL;
-
- vec_free (apif->host_if_name);
- apif->host_if_name = NULL;
- apif->host_if_index = -1;
-
- mhash_unset (&apm->if_index_by_host_if_name, host_if_name, &if_index);
-
- if (apif->mode == AF_PACKET_IF_MODE_ETHERNET)
- ethernet_delete_interface (vnm, apif->hw_if_index);
- else
- vnet_delete_hw_interface (vnm, apif->hw_if_index);
-
- pool_put (apm->interfaces, apif);
-
- return 0;
-}
-
-int
-af_packet_set_l4_cksum_offload (u32 sw_if_index, u8 set)
-{
- vnet_main_t *vnm = vnet_get_main ();
- vnet_hw_interface_t *hw;
-
- hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
-
- if (hw->dev_class_index != af_packet_device_class.index)
- return VNET_API_ERROR_INVALID_INTERFACE;
-
- if (set)
- {
- hw->caps &= ~(VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM);
- }
- else
- {
- hw->caps |= (VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM);
- }
- return 0;
-}
-
-int
-af_packet_dump_ifs (af_packet_if_detail_t ** out_af_packet_ifs)
-{
- af_packet_main_t *apm = &af_packet_main;
- af_packet_if_t *apif;
- af_packet_if_detail_t *r_af_packet_ifs = NULL;
- af_packet_if_detail_t *af_packet_if = NULL;
-
- pool_foreach (apif, apm->interfaces)
- {
- vec_add2 (r_af_packet_ifs, af_packet_if, 1);
- af_packet_if->sw_if_index = apif->sw_if_index;
- if (apif->host_if_name)
- {
- clib_memcpy (af_packet_if->host_if_name, apif->host_if_name,
- MIN (ARRAY_LEN (af_packet_if->host_if_name) - 1,
- strlen ((const char *) apif->host_if_name)));
- }
- }
-
- *out_af_packet_ifs = r_af_packet_ifs;
-
- return 0;
-}
-
-static clib_error_t *
-af_packet_init (vlib_main_t * vm)
-{
- af_packet_main_t *apm = &af_packet_main;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
-
- clib_memset (apm, 0, sizeof (af_packet_main_t));
-
- mhash_init_vec_string (&apm->if_index_by_host_if_name, sizeof (uword));
-
- vec_validate_aligned (apm->rx_buffers, tm->n_vlib_mains - 1,
- CLIB_CACHE_LINE_BYTES);
-
- apm->log_class = vlib_log_register_class ("af_packet", 0);
- vlib_log_debug (apm->log_class, "initialized");
-
- return 0;
-}
-
-VLIB_INIT_FUNCTION (af_packet_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/af_packet/af_packet.h b/src/vnet/devices/af_packet/af_packet.h
deleted file mode 100644
index 652e173fd2a..00000000000
--- a/src/vnet/devices/af_packet/af_packet.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- *------------------------------------------------------------------
- * af_packet.h - linux kernel packet interface header file
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <vppinfra/lock.h>
-#include <vlib/log.h>
-
-typedef enum
-{
- AF_PACKET_IF_MODE_ETHERNET = 1,
- AF_PACKET_IF_MODE_IP = 2
-} af_packet_if_mode_t;
-
-typedef struct
-{
- u32 sw_if_index;
- u8 host_if_name[64];
-} af_packet_if_detail_t;
-
-typedef struct
-{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- clib_spinlock_t lockp;
- u8 *host_if_name;
- int host_if_index;
- int fd;
- struct tpacket_req *rx_req;
- struct tpacket_req *tx_req;
- u8 *rx_ring;
- u8 *tx_ring;
- u32 hw_if_index;
- u32 sw_if_index;
- u32 clib_file_index;
-
- u32 next_rx_frame;
- u32 next_tx_frame;
-
- u32 per_interface_next_index;
- u8 is_admin_up;
- u32 queue_index;
- u32 host_mtu;
- af_packet_if_mode_t mode;
-} af_packet_if_t;
-
-typedef struct
-{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- af_packet_if_t *interfaces;
-
- /* bitmap of pending rx interfaces */
- uword *pending_input_bitmap;
-
- /* rx buffer cache */
- u32 **rx_buffers;
-
- /* hash of host interface names */
- mhash_t if_index_by_host_if_name;
-
- /** log class */
- vlib_log_class_t log_class;
-} af_packet_main_t;
-
-typedef struct
-{
- u8 *host_if_name;
- u8 *hw_addr;
- u32 rx_frame_size;
- u32 tx_frame_size;
- u32 rx_frames_per_block;
- u32 tx_frames_per_block;
- af_packet_if_mode_t mode;
-
- /* return */
- u32 sw_if_index;
-} af_packet_create_if_arg_t;
-
-extern af_packet_main_t af_packet_main;
-extern vnet_device_class_t af_packet_device_class;
-extern vlib_node_registration_t af_packet_input_node;
-
-int af_packet_create_if (af_packet_create_if_arg_t *arg);
-int af_packet_delete_if (u8 *host_if_name);
-int af_packet_set_l4_cksum_offload (u32 sw_if_index, u8 set);
-int af_packet_dump_ifs (af_packet_if_detail_t ** out_af_packet_ifs);
-
-format_function_t format_af_packet_device_name;
-
-#define MIN(x,y) (((x)<(y))?(x):(y))
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/af_packet/af_packet_api.c b/src/vnet/devices/af_packet/af_packet_api.c
deleted file mode 100644
index 80a2d9222c6..00000000000
--- a/src/vnet/devices/af_packet/af_packet_api.c
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- *------------------------------------------------------------------
- * af_packet_api.c - af-packet api
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <vnet/vnet.h>
-#include <vlibmemory/api.h>
-
-#include <vnet/interface.h>
-#include <vnet/api_errno.h>
-#include <vnet/devices/af_packet/af_packet.h>
-
-#include <vnet/format_fns.h>
-#include <vnet/devices/af_packet/af_packet.api_enum.h>
-#include <vnet/devices/af_packet/af_packet.api_types.h>
-
-#define REPLY_MSG_ID_BASE msg_id_base
-#include <vlibapi/api_helper_macros.h>
-
-static u16 msg_id_base;
-
-static void
-vl_api_af_packet_create_t_handler (vl_api_af_packet_create_t * mp)
-{
- af_packet_create_if_arg_t _arg, *arg = &_arg;
- vl_api_af_packet_create_reply_t *rmp;
- int rv = 0;
-
- clib_memset (arg, 0, sizeof (*arg));
-
- arg->host_if_name = format (0, "%s", mp->host_if_name);
- vec_add1 (arg->host_if_name, 0);
-
- arg->hw_addr = mp->use_random_hw_addr ? 0 : mp->hw_addr;
- rv = af_packet_create_if (arg);
-
- vec_free (arg->host_if_name);
-
- REPLY_MACRO2 (VL_API_AF_PACKET_CREATE_REPLY, ({
- rmp->sw_if_index = clib_host_to_net_u32 (arg->sw_if_index);
- }));
-}
-
-static void
-vl_api_af_packet_create_v2_t_handler (vl_api_af_packet_create_v2_t *mp)
-{
- af_packet_create_if_arg_t _arg, *arg = &_arg;
- vl_api_af_packet_create_v2_reply_t *rmp;
- int rv = 0;
-
- clib_memset (arg, 0, sizeof (*arg));
-
- arg->host_if_name = format (0, "%s", mp->host_if_name);
- vec_add1 (arg->host_if_name, 0);
-
- arg->rx_frame_size = clib_net_to_host_u32 (mp->rx_frame_size);
- arg->tx_frame_size = clib_net_to_host_u32 (mp->tx_frame_size);
- arg->rx_frames_per_block = clib_net_to_host_u32 (mp->rx_frames_per_block);
- arg->tx_frames_per_block = clib_net_to_host_u32 (mp->tx_frames_per_block);
- arg->hw_addr = mp->use_random_hw_addr ? 0 : mp->hw_addr;
-
- if (mp->num_rx_queues > 1)
- {
- rv = VNET_API_ERROR_INVALID_VALUE;
- goto out;
- }
-
- rv = af_packet_create_if (arg);
-
-out:
- vec_free (arg->host_if_name);
- REPLY_MACRO2 (VL_API_AF_PACKET_CREATE_V2_REPLY, ({
- rmp->sw_if_index = clib_host_to_net_u32 (arg->sw_if_index);
- }));
-}
-
-static void
-vl_api_af_packet_delete_t_handler (vl_api_af_packet_delete_t * mp)
-{
- vl_api_af_packet_delete_reply_t *rmp;
- int rv = 0;
- u8 *host_if_name = NULL;
-
- host_if_name = format (0, "%s", mp->host_if_name);
- vec_add1 (host_if_name, 0);
-
- rv = af_packet_delete_if (host_if_name);
-
- vec_free (host_if_name);
-
- REPLY_MACRO (VL_API_AF_PACKET_DELETE_REPLY);
-}
-
-static void
- vl_api_af_packet_set_l4_cksum_offload_t_handler
- (vl_api_af_packet_set_l4_cksum_offload_t * mp)
-{
- vl_api_af_packet_delete_reply_t *rmp;
- int rv = 0;
-
- rv = af_packet_set_l4_cksum_offload (ntohl (mp->sw_if_index), mp->set);
- REPLY_MACRO (VL_API_AF_PACKET_SET_L4_CKSUM_OFFLOAD_REPLY);
-}
-
-static void
-af_packet_send_details (vpe_api_main_t * am,
- vl_api_registration_t * reg,
- af_packet_if_detail_t * af_packet_if, u32 context)
-{
- vl_api_af_packet_details_t *mp;
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = htons (REPLY_MSG_ID_BASE + VL_API_AF_PACKET_DETAILS);
- mp->sw_if_index = htonl (af_packet_if->sw_if_index);
- clib_memcpy (mp->host_if_name, af_packet_if->host_if_name,
- MIN (ARRAY_LEN (mp->host_if_name) - 1,
- strlen ((const char *) af_packet_if->host_if_name)));
-
- mp->context = context;
- vl_api_send_msg (reg, (u8 *) mp);
-}
-
-
-static void
-vl_api_af_packet_dump_t_handler (vl_api_af_packet_dump_t * mp)
-{
- int rv;
- vpe_api_main_t *am = &vpe_api_main;
- vl_api_registration_t *reg;
- af_packet_if_detail_t *out_af_packet_ifs = NULL;
- af_packet_if_detail_t *af_packet_if = NULL;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- rv = af_packet_dump_ifs (&out_af_packet_ifs);
- if (rv)
- return;
-
- vec_foreach (af_packet_if, out_af_packet_ifs)
- {
- af_packet_send_details (am, reg, af_packet_if, mp->context);
- }
-
- vec_free (out_af_packet_ifs);
-}
-
-#include <vnet/devices/af_packet/af_packet.api.c>
-static clib_error_t *
-af_packet_api_hookup (vlib_main_t * vm)
-{
- /*
- * Set up the (msg_name, crc, message-id) table
- */
- REPLY_MSG_ID_BASE = setup_message_id_table ();
-
- return 0;
-}
-
-VLIB_API_INIT_FUNCTION (af_packet_api_hookup);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/af_packet/cli.c b/src/vnet/devices/af_packet/cli.c
deleted file mode 100644
index 3dd3c8ee848..00000000000
--- a/src/vnet/devices/af_packet/cli.c
+++ /dev/null
@@ -1,277 +0,0 @@
-/*
- *------------------------------------------------------------------
- * af_packet.c - linux kernel packet interface
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <fcntl.h> /* for open */
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/uio.h> /* for iovec */
-#include <netinet/in.h>
-
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ethernet/ethernet.h>
-
-#include <vnet/devices/af_packet/af_packet.h>
-
-/**
- * @file
- * @brief CLI for Host Interface Device Driver.
- *
- * This file contains the source code for CLI for the host interface.
- */
-
-static clib_error_t *
-af_packet_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- af_packet_create_if_arg_t _arg, *arg = &_arg;
- clib_error_t *error = NULL;
- u8 hwaddr[6];
- int r;
-
- clib_memset (arg, 0, sizeof (*arg));
-
- // Default mode
- arg->mode = AF_PACKET_IF_MODE_ETHERNET;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "name %s", &arg->host_if_name))
- ;
- else if (unformat (line_input, "rx-size %u", &arg->rx_frame_size))
- ;
- else if (unformat (line_input, "tx-size %u", &arg->tx_frame_size))
- ;
- else if (unformat (line_input, "rx-per-block %u",
- &arg->rx_frames_per_block))
- ;
- else if (unformat (line_input, "tx-per-block %u",
- &arg->tx_frames_per_block))
- ;
- else if (unformat (line_input, "mode ip"))
- arg->mode = AF_PACKET_IF_MODE_IP;
- else if (unformat (line_input, "hw-addr %U", unformat_ethernet_address,
- hwaddr))
- arg->hw_addr = hwaddr;
- else
- {
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
- goto done;
- }
- }
-
- if (arg->host_if_name == NULL)
- {
- error = clib_error_return (0, "missing host interface name");
- goto done;
- }
-
- r = af_packet_create_if (arg);
-
- if (r == VNET_API_ERROR_SYSCALL_ERROR_1)
- {
- error = clib_error_return (0, "%s (errno %d)", strerror (errno), errno);
- goto done;
- }
-
- if (r == VNET_API_ERROR_INVALID_INTERFACE)
- {
- error = clib_error_return (0, "Invalid interface name");
- goto done;
- }
-
- if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS)
- {
- error = clib_error_return (0, "Interface already exists");
- goto done;
- }
-
- vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (),
- arg->sw_if_index);
-
-done:
- vec_free (arg->host_if_name);
- unformat_free (line_input);
-
- return error;
-}
-
-/*?
- * Create a host interface that will attach to a linux AF_PACKET
- * interface, one side of a veth pair. The veth pair must already
- * exist. Once created, a new host interface will exist in VPP
- * with the name '<em>host-<ifname></em>', where '<em><ifname></em>'
- * is the name of the specified veth pair. Use the
- * '<em>show interface</em>' command to display host interface details.
- *
- * This command has the following optional parameters:
- *
- * - <b>hw-addr <mac-addr></b> - Optional ethernet address, can be in either
- * X:X:X:X:X:X unix or X.X.X cisco format.
- *
- * @cliexpar
- * Example of how to create a host interface tied to one side of an
- * existing linux veth pair named vpp1:
- * @cliexstart{create host-interface name vpp1}
- * host-vpp1
- * @cliexend
- * Once the host interface is created, enable the interface using:
- * @cliexcmd{set interface state host-vpp1 up}
-?*/
-VLIB_CLI_COMMAND (af_packet_create_command, static) = {
- .path = "create host-interface",
- .short_help =
- "create host-interface name <ifname> [hw-addr <mac-addr>] [mode ip]",
- .function = af_packet_create_command_fn,
-};
-
-static clib_error_t *
-af_packet_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- u8 *host_if_name = NULL;
- clib_error_t *error = NULL;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "name %s", &host_if_name))
- ;
- else
- {
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
- goto done;
- }
- }
-
- if (host_if_name == NULL)
- {
- error = clib_error_return (0, "missing host interface name");
- goto done;
- }
-
- af_packet_delete_if (host_if_name);
-
-done:
- vec_free (host_if_name);
- unformat_free (line_input);
-
- return error;
-}
-
-/*?
- * Delete a host interface. Use the linux interface name to identify
- * the host interface to be deleted. In VPP, host interfaces are
- * named as '<em>host-<ifname></em>', where '<em><ifname></em>'
- * is the name of the linux interface.
- *
- * @cliexpar
- * Example of how to delete a host interface named host-vpp1:
- * @cliexcmd{delete host-interface name vpp1}
-?*/
-VLIB_CLI_COMMAND (af_packet_delete_command, static) = {
- .path = "delete host-interface",
- .short_help = "delete host-interface name <ifname>",
- .function = af_packet_delete_command_fn,
-};
-
-static clib_error_t *
-af_packet_set_l4_cksum_offload_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- u8 set = 0;
- clib_error_t *error = NULL;
- vnet_main_t *vnm = vnet_get_main ();
- u32 sw_if_index;
-
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
- &sw_if_index))
- ;
- else if (unformat (line_input, "on"))
- set = 1;
- else if (unformat (line_input, "off"))
- set = 0;
- else
- {
- error = clib_error_return (0, "unknown input '%U'",
- format_unformat_error, line_input);
- goto done;
- }
- }
-
- if (af_packet_set_l4_cksum_offload (sw_if_index, set) < 0)
- error = clib_error_return (0, "not an af_packet interface");
-
-done:
- unformat_free (line_input);
- return error;
-}
-
-/*?
- * Set TCP/UDP offload checksum calculation. Use interface
- * name to identify the interface to set TCP/UDP offload checksum
- * calculation.
- *
- * @cliexpar
- * Example of how to set TCP/UDP offload checksum calculation on host-vpp0:
- * @cliexcmd{set host-interface l4-cksum-offload host-vpp0 off}
- * @cliexcmd{set host-interface l4-cksum-offload host-vpp0 on}
-?*/
-VLIB_CLI_COMMAND (af_packet_set_l4_cksum_offload_command, static) = {
- .path = "set host-interface l4-cksum-offload",
- .short_help = "set host-interface l4-cksum-offload <host-if-name> <on|off>",
- .function = af_packet_set_l4_cksum_offload_command_fn,
-};
-
-clib_error_t *
-af_packet_cli_init (vlib_main_t * vm)
-{
- return 0;
-}
-
-VLIB_INIT_FUNCTION (af_packet_cli_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/af_packet/device.c b/src/vnet/devices/af_packet/device.c
deleted file mode 100644
index c8e59c3566c..00000000000
--- a/src/vnet/devices/af_packet/device.c
+++ /dev/null
@@ -1,412 +0,0 @@
-/*
- *------------------------------------------------------------------
- * af_packet.c - linux kernel packet interface
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <linux/if_packet.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <net/if.h>
-#include <net/if_arp.h>
-
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ethernet/ethernet.h>
-
-#include <vnet/devices/af_packet/af_packet.h>
-
-#define foreach_af_packet_tx_func_error \
-_(FRAME_NOT_READY, "tx frame not ready") \
-_(TXRING_EAGAIN, "tx sendto temporary failure") \
-_(TXRING_FATAL, "tx sendto fatal failure") \
-_(TXRING_OVERRUN, "tx ring overrun")
-
-typedef enum
-{
-#define _(f,s) AF_PACKET_TX_ERROR_##f,
- foreach_af_packet_tx_func_error
-#undef _
- AF_PACKET_TX_N_ERROR,
-} af_packet_tx_func_error_t;
-
-static char *af_packet_tx_func_error_strings[] = {
-#define _(n,s) s,
- foreach_af_packet_tx_func_error
-#undef _
-};
-
-
-#ifndef CLIB_MARCH_VARIANT
-u8 *
-format_af_packet_device_name (u8 * s, va_list * args)
-{
- u32 i = va_arg (*args, u32);
- af_packet_main_t *apm = &af_packet_main;
- af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, i);
-
- s = format (s, "host-%s", apif->host_if_name);
- return s;
-}
-#endif /* CLIB_MARCH_VARIANT */
-
-static u8 *
-format_af_packet_device (u8 * s, va_list * args)
-{
- u32 dev_instance = va_arg (*args, u32);
- u32 indent = format_get_indent (s);
- int __clib_unused verbose = va_arg (*args, int);
-
- af_packet_main_t *apm = &af_packet_main;
- af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, dev_instance);
- clib_spinlock_lock_if_init (&apif->lockp);
- u32 tx_block_sz = apif->tx_req->tp_block_size;
- u32 tx_frame_sz = apif->tx_req->tp_frame_size;
- u32 tx_frame_nr = apif->tx_req->tp_frame_nr;
- u32 tx_block_nr = apif->tx_req->tp_block_nr;
- u32 rx_block_size = apif->rx_req->tp_block_size;
- u32 rx_frame_size = apif->rx_req->tp_frame_size;
- u32 rx_frame_nr = apif->rx_req->tp_frame_nr;
- u32 rx_block_nr = apif->rx_req->tp_block_nr;
- int block = 0;
- u8 *tx_block_start = apif->tx_ring + block * tx_block_sz;
- u32 tx_frame = apif->next_tx_frame;
- struct tpacket2_hdr *tph;
-
- s = format (s, "Linux PACKET socket interface\n");
- s = format (s, "%UTX block size:%d nr:%d TX frame size:%d nr:%d\n",
- format_white_space, indent, tx_block_sz, tx_block_nr,
- tx_frame_sz, tx_frame_nr);
- s = format (s, "%URX block size:%d nr:%d RX frame size:%d nr:%d\n",
- format_white_space, indent, rx_block_size, rx_block_nr,
- rx_frame_size, rx_frame_nr);
- s = format (s, "%Unext frame:%d\n", format_white_space, indent,
- apif->next_tx_frame);
-
- int n_send_req = 0, n_avail = 0, n_sending = 0, n_tot = 0, n_wrong = 0;
- do
- {
- tph = (struct tpacket2_hdr *) (tx_block_start + tx_frame * tx_frame_sz);
- tx_frame = (tx_frame + 1) % tx_frame_nr;
- if (tph->tp_status == 0)
- n_avail++;
- else if (tph->tp_status & TP_STATUS_SEND_REQUEST)
- n_send_req++;
- else if (tph->tp_status & TP_STATUS_SENDING)
- n_sending++;
- else
- n_wrong++;
- n_tot++;
- }
- while (tx_frame != apif->next_tx_frame);
- s = format (s, "%Uavailable:%d request:%d sending:%d wrong:%d total:%d\n",
- format_white_space, indent, n_avail, n_send_req, n_sending,
- n_wrong, n_tot);
-
- clib_spinlock_unlock_if_init (&apif->lockp);
- return s;
-}
-
-static u8 *
-format_af_packet_tx_trace (u8 * s, va_list * args)
-{
- s = format (s, "Unimplemented...");
- return s;
-}
-
-VNET_DEVICE_CLASS_TX_FN (af_packet_device_class) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- af_packet_main_t *apm = &af_packet_main;
- u32 *buffers = vlib_frame_vector_args (frame);
- u32 n_left = frame->n_vectors;
- u32 n_sent = 0;
- vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
- af_packet_if_t *apif =
- pool_elt_at_index (apm->interfaces, rd->dev_instance);
- clib_spinlock_lock_if_init (&apif->lockp);
- int block = 0;
- u32 block_size = apif->tx_req->tp_block_size;
- u32 frame_size = apif->tx_req->tp_frame_size;
- u32 frame_num = apif->tx_req->tp_frame_nr;
- u8 *block_start = apif->tx_ring + block * block_size;
- u32 tx_frame = apif->next_tx_frame;
- struct tpacket2_hdr *tph;
- u32 frame_not_ready = 0;
-
- while (n_left)
- {
- u32 len;
- u32 offset = 0;
- vlib_buffer_t *b0;
- n_left--;
- u32 bi = buffers[0];
- buffers++;
-
- tph = (struct tpacket2_hdr *) (block_start + tx_frame * frame_size);
- if (PREDICT_FALSE (tph->tp_status &
- (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING)))
- {
- frame_not_ready++;
- goto next;
- }
-
- do
- {
- b0 = vlib_get_buffer (vm, bi);
- len = b0->current_length;
- clib_memcpy_fast ((u8 *) tph +
- TPACKET_ALIGN (sizeof (struct tpacket2_hdr)) +
- offset, vlib_buffer_get_current (b0), len);
- offset += len;
- }
- while ((bi =
- (b0->flags & VLIB_BUFFER_NEXT_PRESENT) ? b0->next_buffer : 0));
-
- tph->tp_len = tph->tp_snaplen = offset;
- tph->tp_status = TP_STATUS_SEND_REQUEST;
- n_sent++;
-
- tx_frame = (tx_frame + 1) % frame_num;
-
- next:
- /* check if we've exhausted the ring */
- if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num))
- break;
- }
-
- CLIB_MEMORY_BARRIER ();
-
- if (PREDICT_TRUE (n_sent))
- {
- apif->next_tx_frame = tx_frame;
-
- if (PREDICT_FALSE (sendto (apif->fd, NULL, 0, MSG_DONTWAIT, NULL, 0) ==
- -1))
- {
- /* Uh-oh, drop & move on, but count whether it was fatal or not.
- * Note that we have no reliable way to properly determine the
- * disposition of the packets we just enqueued for delivery.
- */
- vlib_error_count (vm, node->node_index,
- unix_error_is_fatal (errno) ?
- AF_PACKET_TX_ERROR_TXRING_FATAL :
- AF_PACKET_TX_ERROR_TXRING_EAGAIN,
- n_sent);
- }
- }
-
- clib_spinlock_unlock_if_init (&apif->lockp);
-
- if (PREDICT_FALSE (frame_not_ready))
- vlib_error_count (vm, node->node_index,
- AF_PACKET_TX_ERROR_FRAME_NOT_READY, frame_not_ready);
-
- if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num))
- vlib_error_count (vm, node->node_index, AF_PACKET_TX_ERROR_TXRING_OVERRUN,
- n_left);
-
- vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
- return frame->n_vectors;
-}
-
-static void
-af_packet_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
- u32 node_index)
-{
- af_packet_main_t *apm = &af_packet_main;
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
- af_packet_if_t *apif =
- pool_elt_at_index (apm->interfaces, hw->dev_instance);
-
- /* Shut off redirection */
- if (node_index == ~0)
- {
- apif->per_interface_next_index = node_index;
- return;
- }
-
- apif->per_interface_next_index =
- vlib_node_add_next (vlib_get_main (), af_packet_input_node.index,
- node_index);
-}
-
-static void
-af_packet_clear_hw_interface_counters (u32 instance)
-{
- /* Nothing for now */
-}
-
-static clib_error_t *
-af_packet_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
- u32 flags)
-{
- af_packet_main_t *apm = &af_packet_main;
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
- af_packet_if_t *apif =
- pool_elt_at_index (apm->interfaces, hw->dev_instance);
- u32 hw_flags;
- int rv, fd = socket (AF_UNIX, SOCK_DGRAM, 0);
- struct ifreq ifr;
-
- if (0 > fd)
- {
- vlib_log_warn (apm->log_class, "af_packet_%s could not open socket",
- apif->host_if_name);
- return 0;
- }
-
- /* if interface is a bridge ignore */
- if (apif->host_if_index < 0)
- goto error; /* no error */
-
- /* use host_if_index in case host name has changed */
- ifr.ifr_ifindex = apif->host_if_index;
- if ((rv = ioctl (fd, SIOCGIFNAME, &ifr)) < 0)
- {
- vlib_log_warn (apm->log_class,
- "af_packet_%s ioctl could not retrieve eth name",
- apif->host_if_name);
- goto error;
- }
-
- apif->is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
-
- if ((rv = ioctl (fd, SIOCGIFFLAGS, &ifr)) < 0)
- {
- vlib_log_warn (apm->log_class, "af_packet_%s error: %d",
- apif->is_admin_up ? "up" : "down", rv);
- goto error;
- }
-
- if (apif->is_admin_up)
- {
- hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP;
- ifr.ifr_flags |= IFF_UP;
- }
- else
- {
- hw_flags = 0;
- ifr.ifr_flags &= ~IFF_UP;
- }
-
- if ((rv = ioctl (fd, SIOCSIFFLAGS, &ifr)) < 0)
- {
- vlib_log_warn (apm->log_class, "af_packet_%s error: %d",
- apif->is_admin_up ? "up" : "down", rv);
- goto error;
- }
-
- vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
-
-error:
- if (0 <= fd)
- close (fd);
-
- return 0; /* no error */
-}
-
-static clib_error_t *
-af_packet_subif_add_del_function (vnet_main_t * vnm,
- u32 hw_if_index,
- struct vnet_sw_interface_t *st, int is_add)
-{
- /* Nothing for now */
- return 0;
-}
-
-static clib_error_t *af_packet_set_mac_address_function
- (struct vnet_hw_interface_t *hi, const u8 * old_address, const u8 * address)
-{
- af_packet_main_t *apm = &af_packet_main;
- af_packet_if_t *apif =
- pool_elt_at_index (apm->interfaces, hi->dev_instance);
- int rv, fd;
- struct ifreq ifr;
-
- if (apif->mode == AF_PACKET_IF_MODE_IP)
- {
- vlib_log_warn (apm->log_class, "af_packet_%s interface is in IP mode",
- apif->host_if_name);
- return clib_error_return (0,
- " MAC update failed, interface is in IP mode");
- }
-
- fd = socket (AF_UNIX, SOCK_DGRAM, 0);
- if (0 > fd)
- {
- vlib_log_warn (apm->log_class, "af_packet_%s could not open socket",
- apif->host_if_name);
- return 0;
- }
-
- /* if interface is a bridge ignore */
- if (apif->host_if_index < 0)
- goto error; /* no error */
-
- /* use host_if_index in case host name has changed */
- ifr.ifr_ifindex = apif->host_if_index;
- if ((rv = ioctl (fd, SIOCGIFNAME, &ifr)) < 0)
- {
- vlib_log_warn
- (apm->log_class,
- "af_packet_%s ioctl could not retrieve eth name, error: %d",
- apif->host_if_name, rv);
- goto error;
- }
-
- clib_memcpy (ifr.ifr_hwaddr.sa_data, address, 6);
- ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER;
-
- if ((rv = ioctl (fd, SIOCSIFHWADDR, &ifr)) < 0)
- {
- vlib_log_warn (apm->log_class,
- "af_packet_%s ioctl could not set mac, error: %d",
- apif->host_if_name, rv);
- goto error;
- }
-
-error:
-
- if (0 <= fd)
- close (fd);
-
- return 0; /* no error */
-}
-
-VNET_DEVICE_CLASS (af_packet_device_class) = {
- .name = "af-packet",
- .format_device_name = format_af_packet_device_name,
- .format_device = format_af_packet_device,
- .format_tx_trace = format_af_packet_tx_trace,
- .tx_function_n_errors = AF_PACKET_TX_N_ERROR,
- .tx_function_error_strings = af_packet_tx_func_error_strings,
- .rx_redirect_to_node = af_packet_set_interface_next_node,
- .clear_counters = af_packet_clear_hw_interface_counters,
- .admin_up_down_function = af_packet_interface_admin_up_down,
- .subif_add_del_function = af_packet_subif_add_del_function,
- .mac_addr_change_function = af_packet_set_mac_address_function,
-};
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/af_packet/dir.dox b/src/vnet/devices/af_packet/dir.dox
deleted file mode 100644
index 78991c6d97f..00000000000
--- a/src/vnet/devices/af_packet/dir.dox
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2016 Cisco and/or its affiliates.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Doxygen directory documentation */
-
-/**
-@dir
-@brief Host Interface Implementation.
-
-This directory contains the source code for Host Interface driver. The
-Host Interface driver leverages the DPDK AF_PACKET driver.
-
-
-*/
-/*? %%clicmd:group_label Host Interface %% ?*/
-/*? %%syscfg:group_label Host Interface %% ?*/
diff --git a/src/vnet/devices/af_packet/node.c b/src/vnet/devices/af_packet/node.c
deleted file mode 100644
index 0fdae5c3039..00000000000
--- a/src/vnet/devices/af_packet/node.c
+++ /dev/null
@@ -1,435 +0,0 @@
-/*
- *------------------------------------------------------------------
- * af_packet.c - linux kernel packet interface
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <linux/if_packet.h>
-
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/interface/rx_queue_funcs.h>
-#include <vnet/feature/feature.h>
-#include <vnet/ethernet/packet.h>
-
-#include <vnet/devices/af_packet/af_packet.h>
-
-#define foreach_af_packet_input_error \
- _(PARTIAL_PKT, "partial packet")
-
-typedef enum
-{
-#define _(f,s) AF_PACKET_INPUT_ERROR_##f,
- foreach_af_packet_input_error
-#undef _
- AF_PACKET_INPUT_N_ERROR,
-} af_packet_input_error_t;
-
-static char *af_packet_input_error_strings[] = {
-#define _(n,s) s,
- foreach_af_packet_input_error
-#undef _
-};
-
-typedef struct
-{
- u32 next_index;
- u32 hw_if_index;
- int block;
- struct tpacket2_hdr tph;
-} af_packet_input_trace_t;
-
-static u8 *
-format_af_packet_input_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- af_packet_input_trace_t *t = va_arg (*args, af_packet_input_trace_t *);
- u32 indent = format_get_indent (s);
-
- s = format (s, "af_packet: hw_if_index %d next-index %d",
- t->hw_if_index, t->next_index);
-
- s =
- format (s,
- "\n%Utpacket2_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u"
- "\n%Usec 0x%x nsec 0x%x vlan %U"
-#ifdef TP_STATUS_VLAN_TPID_VALID
- " vlan_tpid %u"
-#endif
- ,
- format_white_space, indent + 2,
- format_white_space, indent + 4,
- t->tph.tp_status,
- t->tph.tp_len,
- t->tph.tp_snaplen,
- t->tph.tp_mac,
- t->tph.tp_net,
- format_white_space, indent + 4,
- t->tph.tp_sec,
- t->tph.tp_nsec, format_ethernet_vlan_tci, t->tph.tp_vlan_tci
-#ifdef TP_STATUS_VLAN_TPID_VALID
- , t->tph.tp_vlan_tpid
-#endif
- );
- return s;
-}
-
-always_inline void
-buffer_add_to_chain (vlib_main_t * vm, u32 bi, u32 first_bi, u32 prev_bi)
-{
- vlib_buffer_t *b = vlib_get_buffer (vm, bi);
- vlib_buffer_t *first_b = vlib_get_buffer (vm, first_bi);
- vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_bi);
-
- /* update first buffer */
- first_b->total_length_not_including_first_buffer += b->current_length;
-
- /* update previous buffer */
- prev_b->next_buffer = bi;
- prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
-
- /* update current buffer */
- b->next_buffer = 0;
-}
-
-static_always_inline void
-fill_gso_buffer_flags (vlib_buffer_t *b, u32 gso_size, u8 l4_hdr_sz)
-{
- b->flags |= VNET_BUFFER_F_GSO;
- vnet_buffer2 (b)->gso_size = gso_size;
- vnet_buffer2 (b)->gso_l4_hdr_sz = l4_hdr_sz;
-}
-
-static_always_inline void
-mark_tcp_udp_cksum_calc (vlib_buffer_t *b, u8 *l4_hdr_sz)
-{
- ethernet_header_t *eth = vlib_buffer_get_current (b);
- vnet_buffer_oflags_t oflags = 0;
- if (clib_net_to_host_u16 (eth->type) == ETHERNET_TYPE_IP4)
- {
- ip4_header_t *ip4 =
- (vlib_buffer_get_current (b) + sizeof (ethernet_header_t));
- b->flags |= VNET_BUFFER_F_IS_IP4;
- if (ip4->protocol == IP_PROTOCOL_TCP)
- {
- oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
- tcp_header_t *tcp = (tcp_header_t *) (vlib_buffer_get_current (b) +
- sizeof (ethernet_header_t) +
- ip4_header_bytes (ip4));
- *l4_hdr_sz = tcp_header_bytes (tcp);
- }
- else if (ip4->protocol == IP_PROTOCOL_UDP)
- {
- oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
- udp_header_t *udp = (udp_header_t *) (vlib_buffer_get_current (b) +
- sizeof (ethernet_header_t) +
- ip4_header_bytes (ip4));
- *l4_hdr_sz = sizeof (*udp);
- }
- vnet_buffer (b)->l3_hdr_offset = sizeof (ethernet_header_t);
- vnet_buffer (b)->l4_hdr_offset =
- sizeof (ethernet_header_t) + ip4_header_bytes (ip4);
- if (oflags)
- vnet_buffer_offload_flags_set (b, oflags);
- }
- else if (clib_net_to_host_u16 (eth->type) == ETHERNET_TYPE_IP6)
- {
- ip6_header_t *ip6 =
- (vlib_buffer_get_current (b) + sizeof (ethernet_header_t));
- b->flags |= VNET_BUFFER_F_IS_IP6;
- u16 ip6_hdr_len = sizeof (ip6_header_t);
- if (ip6_ext_hdr (ip6->protocol))
- {
- ip6_ext_header_t *p = (void *) (ip6 + 1);
- ip6_hdr_len += ip6_ext_header_len (p);
- while (ip6_ext_hdr (p->next_hdr))
- {
- ip6_hdr_len += ip6_ext_header_len (p);
- p = ip6_ext_next_header (p);
- }
- }
- if (ip6->protocol == IP_PROTOCOL_TCP)
- {
- oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
- tcp_header_t *tcp =
- (tcp_header_t *) (vlib_buffer_get_current (b) +
- sizeof (ethernet_header_t) + ip6_hdr_len);
- *l4_hdr_sz = tcp_header_bytes (tcp);
- }
- else if (ip6->protocol == IP_PROTOCOL_UDP)
- {
- oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
- udp_header_t *udp =
- (udp_header_t *) (vlib_buffer_get_current (b) +
- sizeof (ethernet_header_t) + ip6_hdr_len);
- *l4_hdr_sz = sizeof (*udp);
- }
- vnet_buffer (b)->l3_hdr_offset = sizeof (ethernet_header_t);
- vnet_buffer (b)->l4_hdr_offset =
- sizeof (ethernet_header_t) + ip6_hdr_len;
- if (oflags)
- vnet_buffer_offload_flags_set (b, oflags);
- }
-}
-
-always_inline uword
-af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame, af_packet_if_t * apif)
-{
- af_packet_main_t *apm = &af_packet_main;
- struct tpacket2_hdr *tph;
- u32 next_index;
- u32 block = 0;
- u32 rx_frame;
- u32 n_free_bufs;
- u32 n_rx_packets = 0;
- u32 n_rx_bytes = 0;
- u32 *to_next = 0;
- u32 block_size = apif->rx_req->tp_block_size;
- u32 frame_size = apif->rx_req->tp_frame_size;
- u32 frame_num = apif->rx_req->tp_frame_nr;
- u8 *block_start = apif->rx_ring + block * block_size;
- uword n_trace = vlib_get_trace_count (vm, node);
- u32 thread_index = vm->thread_index;
- u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm);
- u32 min_bufs = apif->rx_req->tp_frame_size / n_buffer_bytes;
- vlib_buffer_t bt;
-
- if (apif->mode == AF_PACKET_IF_MODE_IP)
- {
- next_index = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
- }
- else
- {
- next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
- if (PREDICT_FALSE (apif->per_interface_next_index != ~0))
- next_index = apif->per_interface_next_index;
-
- /* redirect if feature path enabled */
- vnet_feature_start_device_input_x1 (apif->sw_if_index, &next_index, &bt);
- }
-
- n_free_bufs = vec_len (apm->rx_buffers[thread_index]);
- if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE))
- {
- vec_validate (apm->rx_buffers[thread_index],
- VLIB_FRAME_SIZE + n_free_bufs - 1);
- n_free_bufs +=
- vlib_buffer_alloc (vm, &apm->rx_buffers[thread_index][n_free_bufs],
- VLIB_FRAME_SIZE);
- _vec_len (apm->rx_buffers[thread_index]) = n_free_bufs;
- }
-
- rx_frame = apif->next_rx_frame;
- tph = (struct tpacket2_hdr *) (block_start + rx_frame * frame_size);
- while ((tph->tp_status & TP_STATUS_USER) && (n_free_bufs > min_bufs))
- {
- vlib_buffer_t *b0 = 0, *first_b0 = 0;
- u32 next0 = next_index;
-
- u32 n_left_to_next;
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
- while ((tph->tp_status & TP_STATUS_USER) && (n_free_bufs > min_bufs) &&
- n_left_to_next)
- {
- u32 data_len = tph->tp_snaplen;
- u32 offset = 0;
- u32 bi0 = 0, first_bi0 = 0, prev_bi0;
- u8 l4_hdr_sz = 0;
-
- while (data_len)
- {
- /* grab free buffer */
- u32 last_empty_buffer =
- vec_len (apm->rx_buffers[thread_index]) - 1;
- prev_bi0 = bi0;
- bi0 = apm->rx_buffers[thread_index][last_empty_buffer];
- b0 = vlib_get_buffer (vm, bi0);
- _vec_len (apm->rx_buffers[thread_index]) = last_empty_buffer;
- n_free_bufs--;
-
- /* copy data */
- u32 bytes_to_copy =
- data_len > n_buffer_bytes ? n_buffer_bytes : data_len;
- u32 vlan_len = 0;
- u32 bytes_copied = 0;
- b0->current_data = 0;
- /* Kernel removes VLAN headers, so reconstruct VLAN */
- if (PREDICT_FALSE (tph->tp_status & TP_STATUS_VLAN_VALID))
- {
- if (PREDICT_TRUE (offset == 0))
- {
- clib_memcpy_fast (vlib_buffer_get_current (b0),
- (u8 *) tph + tph->tp_mac,
- sizeof (ethernet_header_t));
- ethernet_header_t *eth = vlib_buffer_get_current (b0);
- ethernet_vlan_header_t *vlan =
- (ethernet_vlan_header_t *) (eth + 1);
- vlan->priority_cfi_and_id =
- clib_host_to_net_u16 (tph->tp_vlan_tci);
- vlan->type = eth->type;
- eth->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
- vlan_len = sizeof (ethernet_vlan_header_t);
- bytes_copied = sizeof (ethernet_header_t);
- }
- }
- clib_memcpy_fast (((u8 *) vlib_buffer_get_current (b0)) +
- bytes_copied + vlan_len,
- (u8 *) tph + tph->tp_mac + offset +
- bytes_copied, (bytes_to_copy - bytes_copied));
-
- /* fill buffer header */
- b0->current_length = bytes_to_copy + vlan_len;
-
- if (offset == 0)
- {
- b0->total_length_not_including_first_buffer = 0;
- b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
- vnet_buffer (b0)->sw_if_index[VLIB_RX] = apif->sw_if_index;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
- first_bi0 = bi0;
- first_b0 = vlib_get_buffer (vm, first_bi0);
- if (tph->tp_status & TP_STATUS_CSUMNOTREADY)
- mark_tcp_udp_cksum_calc (first_b0, &l4_hdr_sz);
- if (tph->tp_snaplen > apif->host_mtu)
- fill_gso_buffer_flags (first_b0, apif->host_mtu,
- l4_hdr_sz);
- }
- else
- buffer_add_to_chain (vm, bi0, first_bi0, prev_bi0);
-
- offset += bytes_to_copy;
- data_len -= bytes_to_copy;
- }
- n_rx_packets++;
- n_rx_bytes += tph->tp_snaplen;
- to_next[0] = first_bi0;
- to_next += 1;
- n_left_to_next--;
-
- /* drop partial packets */
- if (PREDICT_FALSE (tph->tp_len != tph->tp_snaplen))
- {
- next0 = VNET_DEVICE_INPUT_NEXT_DROP;
- first_b0->error =
- node->errors[AF_PACKET_INPUT_ERROR_PARTIAL_PKT];
- }
- else
- {
- if (PREDICT_FALSE (apif->mode == AF_PACKET_IF_MODE_IP))
- {
- switch (first_b0->data[0] & 0xf0)
- {
- case 0x40:
- next0 = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
- break;
- case 0x60:
- next0 = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
- break;
- default:
- next0 = VNET_DEVICE_INPUT_NEXT_DROP;
- break;
- }
- if (PREDICT_FALSE (apif->per_interface_next_index != ~0))
- next0 = apif->per_interface_next_index;
- }
- else
- {
- /* copy feature arc data from template */
- first_b0->current_config_index = bt.current_config_index;
- vnet_buffer (first_b0)->feature_arc_index =
- vnet_buffer (&bt)->feature_arc_index;
- }
- }
-
- /* trace */
- if (PREDICT_FALSE
- (n_trace > 0 && vlib_trace_buffer (vm, node, next0, first_b0,
- /* follow_chain */ 0)))
- {
- af_packet_input_trace_t *tr;
- vlib_set_trace_count (vm, node, --n_trace);
- tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr));
- tr->next_index = next0;
- tr->hw_if_index = apif->hw_if_index;
- clib_memcpy_fast (&tr->tph, tph, sizeof (struct tpacket2_hdr));
- }
-
- /* enque and take next packet */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, first_bi0, next0);
-
- /* next packet */
- tph->tp_status = TP_STATUS_KERNEL;
- rx_frame = (rx_frame + 1) % frame_num;
- tph = (struct tpacket2_hdr *) (block_start + rx_frame * frame_size);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- apif->next_rx_frame = rx_frame;
-
- vlib_increment_combined_counter
- (vnet_get_main ()->interface_main.combined_sw_if_counters
- + VNET_INTERFACE_COUNTER_RX,
- vlib_get_thread_index (), apif->hw_if_index, n_rx_packets, n_rx_bytes);
-
- vnet_device_increment_rx_packets (thread_index, n_rx_packets);
- return n_rx_packets;
-}
-
-VLIB_NODE_FN (af_packet_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- u32 n_rx_packets = 0;
- af_packet_main_t *apm = &af_packet_main;
- vnet_hw_if_rxq_poll_vector_t *pv;
- pv = vnet_hw_if_get_rxq_poll_vector (vm, node);
- for (int i = 0; i < vec_len (pv); i++)
- {
- af_packet_if_t *apif;
- apif = vec_elt_at_index (apm->interfaces, pv[i].dev_instance);
- if (apif->is_admin_up)
- n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif);
- }
-
- return n_rx_packets;
-}
-
-VLIB_REGISTER_NODE (af_packet_input_node) = {
- .name = "af-packet-input",
- .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
- .sibling_of = "device-input",
- .format_trace = format_af_packet_input_trace,
- .type = VLIB_NODE_TYPE_INPUT,
- .state = VLIB_NODE_STATE_INTERRUPT,
- .n_errors = AF_PACKET_INPUT_N_ERROR,
- .error_strings = af_packet_input_error_strings,
-};
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c
index 5c28cadc03c..ee380bebbde 100644
--- a/src/vnet/devices/devices.c
+++ b/src/vnet/devices/devices.c
@@ -18,6 +18,7 @@
#include <vnet/feature/feature.h>
#include <vnet/ip/ip.h>
#include <vnet/ethernet/ethernet.h>
+#include <vlib/stats/stats.h>
vnet_device_main_t vnet_device_main;
@@ -28,7 +29,6 @@ device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (device_input_node) = {
.function = device_input_fn,
.name = "device-input",
@@ -39,29 +39,6 @@ VLIB_REGISTER_NODE (device_input_node) = {
.next_nodes = VNET_DEVICE_INPUT_NEXT_NODES,
};
-/* Table defines how much we need to advance current data pointer
- in the buffer if we shortcut to l3 nodes */
-
-const u32 __attribute__((aligned (CLIB_CACHE_LINE_BYTES)))
-device_input_next_node_advance[((VNET_DEVICE_INPUT_N_NEXT_NODES /
- CLIB_CACHE_LINE_BYTES) +1) * CLIB_CACHE_LINE_BYTES] =
-{
- [VNET_DEVICE_INPUT_NEXT_IP4_INPUT] = sizeof (ethernet_header_t),
- [VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT] = sizeof (ethernet_header_t),
- [VNET_DEVICE_INPUT_NEXT_IP6_INPUT] = sizeof (ethernet_header_t),
- [VNET_DEVICE_INPUT_NEXT_MPLS_INPUT] = sizeof (ethernet_header_t),
-};
-
-const u32 __attribute__((aligned (CLIB_CACHE_LINE_BYTES)))
-device_input_next_node_flags[((VNET_DEVICE_INPUT_N_NEXT_NODES /
- CLIB_CACHE_LINE_BYTES) +1) * CLIB_CACHE_LINE_BYTES] =
-{
- [VNET_DEVICE_INPUT_NEXT_IP4_INPUT] = VNET_BUFFER_F_L3_HDR_OFFSET_VALID,
- [VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT] = VNET_BUFFER_F_L3_HDR_OFFSET_VALID,
- [VNET_DEVICE_INPUT_NEXT_IP6_INPUT] = VNET_BUFFER_F_L3_HDR_OFFSET_VALID,
- [VNET_DEVICE_INPUT_NEXT_MPLS_INPUT] = VNET_BUFFER_F_L3_HDR_OFFSET_VALID,
-};
-
VNET_FEATURE_ARC_INIT (device_input, static) =
{
.arc_name = "device-input",
@@ -99,7 +76,23 @@ VNET_FEATURE_INIT (ethernet_input, static) = {
.node_name = "ethernet-input",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
+
+static void
+input_rate_collector_fn (vlib_stats_collector_data_t *d)
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+ vlib_stats_entry_t *e2 = sm->directory_vector + d->private_data;
+ static u64 last_input_packets = 0;
+ f64 dt, now;
+
+ now = vlib_time_now (vlib_get_main ());
+ u64 input_packets = vnet_get_aggregate_rx_packets ();
+
+ dt = now - e2->value;
+ d->entry->value = (f64) (input_packets - last_input_packets) / dt;
+ last_input_packets = input_packets;
+ e2->value = now;
+}
static clib_error_t *
vnet_device_init (vlib_main_t * vm)
@@ -107,6 +100,7 @@ vnet_device_init (vlib_main_t * vm)
vnet_device_main_t *vdm = &vnet_device_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
vlib_thread_registration_t *tr;
+ vlib_stats_collector_reg_t reg = {};
uword *p;
vec_validate_aligned (vdm->workers, tm->n_vlib_mains - 1,
@@ -120,6 +114,12 @@ vnet_device_init (vlib_main_t * vm)
vdm->next_worker_thread_index = tr->first_index;
vdm->last_worker_thread_index = tr->first_index + tr->count - 1;
}
+
+ reg.private_data = vlib_stats_add_timestamp ("/sys/last_update");
+ reg.entry_index = vlib_stats_add_gauge ("/sys/input_rate");
+ reg.collect_fn = input_rate_collector_fn;
+ vlib_stats_register_collector_fn (&reg);
+
return 0;
}
diff --git a/src/vnet/devices/devices.h b/src/vnet/devices/devices.h
index e54c7a29130..cadf1f857a6 100644
--- a/src/vnet/devices/devices.h
+++ b/src/vnet/devices/devices.h
@@ -67,8 +67,6 @@ typedef struct
extern vnet_device_main_t vnet_device_main;
extern vlib_node_registration_t device_input_node;
-extern const u32 device_input_next_node_advance[];
-extern const u32 device_input_next_node_flags[];
static inline u64
vnet_get_aggregate_rx_packets (void)
diff --git a/src/vnet/devices/netlink.c b/src/vnet/devices/netlink.c
index 9aae205c54f..3fd3e13bf77 100644
--- a/src/vnet/devices/netlink.c
+++ b/src/vnet/devices/netlink.c
@@ -20,8 +20,13 @@
#include <fcntl.h>
#include <net/if.h>
+#ifdef __linux__
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
+#elif __FreeBSD__
+#include <netlink/netlink.h>
+#include <netlink/netlink_route.h>
+#endif
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
@@ -273,7 +278,6 @@ vnet_netlink_get_link_mtu (int ifindex, u32 *mtu)
*mtu = clib_net_to_host_u32 (msg_mtu);
else
*mtu = msg_mtu;
- clib_warning ("mtu: %d", *mtu);
goto done;
}
offset = NLA_ALIGN (attr->nla_len);
@@ -409,6 +413,50 @@ vnet_netlink_add_ip6_route (void *dst, u8 dst_len, void *gw)
return err;
}
+clib_error_t *
+vnet_netlink_del_ip4_addr (int ifindex, void *addr, int pfx_len)
+{
+ vnet_netlink_msg_t m;
+ struct ifaddrmsg ifa = { 0 };
+ clib_error_t *err = 0;
+
+ ifa.ifa_family = AF_INET;
+ ifa.ifa_prefixlen = pfx_len;
+ ifa.ifa_index = ifindex;
+
+ vnet_netlink_msg_init (&m, RTM_DELADDR, NLM_F_REQUEST, &ifa,
+ sizeof (struct ifaddrmsg));
+
+ vnet_netlink_msg_add_rtattr (&m, IFA_LOCAL, addr, 4);
+ vnet_netlink_msg_add_rtattr (&m, IFA_ADDRESS, addr, 4);
+ err = vnet_netlink_msg_send (&m, NULL);
+ if (err)
+ err = clib_error_return (0, "del ip4 addr %U", format_clib_error, err);
+ return err;
+}
+
+clib_error_t *
+vnet_netlink_del_ip6_addr (int ifindex, void *addr, int pfx_len)
+{
+ vnet_netlink_msg_t m;
+ struct ifaddrmsg ifa = { 0 };
+ clib_error_t *err = 0;
+
+ ifa.ifa_family = AF_INET6;
+ ifa.ifa_prefixlen = pfx_len;
+ ifa.ifa_index = ifindex;
+
+ vnet_netlink_msg_init (&m, RTM_DELADDR, NLM_F_REQUEST, &ifa,
+ sizeof (struct ifaddrmsg));
+
+ vnet_netlink_msg_add_rtattr (&m, IFA_LOCAL, addr, 16);
+ vnet_netlink_msg_add_rtattr (&m, IFA_ADDRESS, addr, 16);
+ err = vnet_netlink_msg_send (&m, NULL);
+ if (err)
+ err = clib_error_return (0, "del ip6 addr %U", format_clib_error, err);
+ return err;
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/devices/netlink.h b/src/vnet/devices/netlink.h
index f1c42609cbf..086781fdbff 100644
--- a/src/vnet/devices/netlink.h
+++ b/src/vnet/devices/netlink.h
@@ -26,8 +26,10 @@ clib_error_t *vnet_netlink_get_link_mtu (int ifindex, u32 *mtu);
clib_error_t *vnet_netlink_set_link_mtu (int ifindex, int mtu);
clib_error_t *vnet_netlink_add_ip4_addr (int ifindex, void *addr,
int pfx_len);
+clib_error_t *vnet_netlink_del_ip4_addr (int ifindex, void *addr, int pfx_len);
clib_error_t *vnet_netlink_add_ip6_addr (int ifindex, void *addr,
int pfx_len);
+clib_error_t *vnet_netlink_del_ip6_addr (int ifindex, void *addr, int pfx_len);
clib_error_t *vnet_netlink_add_ip4_route (void *dst, u8 dst_len, void *gw);
clib_error_t *vnet_netlink_add_ip6_route (void *dst, u8 dst_len, void *gw);
diff --git a/src/vnet/devices/pipe/pipe.c b/src/vnet/devices/pipe/pipe.c
index eb92b3c788a..9caee2a55cb 100644
--- a/src/vnet/devices/pipe/pipe.c
+++ b/src/vnet/devices/pipe/pipe.c
@@ -83,13 +83,11 @@ pipe_build_rewrite (vnet_main_t * vnm,
return (rewrite);
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (pipe_hw_interface_class) = {
.name = "Pipe",
.build_rewrite = pipe_build_rewrite,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
-/* *INDENT-ON* */
pipe_t *
pipe_get (u32 sw_if_index)
@@ -131,7 +129,7 @@ pipe_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
{
u32 n_left_from, n_left_to_next, n_copy, *from, *to_next;
u32 next_index = VNET_PIPE_TX_NEXT_ETHERNET_INPUT;
- u32 i, sw_if_index = 0, n_pkts = 0, n_bytes = 0;
+ u32 i, sw_if_index = 0;
vlib_buffer_t *b;
pipe_t *pipe;
@@ -159,8 +157,6 @@ pipe_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0;
i++;
- n_pkts++;
- n_bytes += vlib_buffer_length_in_chain (vm, b);
}
from += n_copy;
@@ -187,25 +183,21 @@ pipe_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
- /* *INDENT-OFF* */
hi = vnet_get_hw_interface (vnm, hw_if_index);
hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id,
({
vnet_sw_interface_set_flags (vnm, sw_if_index, flags);
}));
- /* *INDENT-ON* */
return (NULL);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (pipe_device_class) = {
.name = "Pipe",
.format_device_name = format_pipe_name,
.tx_function = pipe_tx,
.admin_up_down_function = pipe_admin_up_down,
};
-/* *INDENT-ON* */
#define foreach_pipe_rx_next \
_ (DROP, "error-drop")
@@ -434,7 +426,6 @@ pipe_rx (vlib_main_t * vm,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (pipe_rx_node) = {
.function = pipe_rx,
.name = "pipe-rx",
@@ -444,7 +435,6 @@ VLIB_REGISTER_NODE (pipe_rx_node) = {
.sibling_of = "ethernet-input",
};
-/* *INDENT-ON* */
/*
* Maintain a bitmap of allocated pipe instance numbers.
@@ -534,6 +524,7 @@ vnet_create_pipe_interface (u8 is_specified,
{
vnet_main_t *vnm = vnet_get_main ();
vlib_main_t *vm = vlib_get_main ();
+ vnet_eth_interface_registration_t eir = {};
u8 address[6] = {
[0] = 0x22,
[1] = 0x22,
@@ -564,15 +555,10 @@ vnet_create_pipe_interface (u8 is_specified,
*/
address[5] = instance;
- error = ethernet_register_interface (vnm, pipe_device_class.index,
- instance, address, &hw_if_index,
- /* flag change */ 0);
-
- if (error)
- {
- rv = VNET_API_ERROR_INVALID_REGISTRATION;
- goto oops;
- }
+ eir.dev_class_index = pipe_device_class.index;
+ eir.dev_instance = instance;
+ eir.address = address;
+ hw_if_index = vnet_eth_register_interface (vnm, &eir);
hi = vnet_get_hw_interface (vnm, hw_if_index);
*parent_sw_if_index = hi->sw_if_index;
@@ -631,13 +617,11 @@ pipe_hw_walk (vnet_main_t * vnm, u32 hw_if_index, void *args)
{
u32 pipe_sw_if_index[2], id, sw_if_index;
- /* *INDENT-OFF* */
hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id,
({
ASSERT(id < 2);
pipe_sw_if_index[id] = sw_if_index;
}));
- /* *INDENT-ON* */
ctx->cb (hi->sw_if_index, pipe_sw_if_index, hi->dev_instance, ctx->ctx);
}
@@ -696,13 +680,11 @@ create_pipe_interfaces (vlib_main_t * vm,
* Example of how to create a pipe interface:
* @cliexcmd{pipe create}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (pipe_create_interface_command, static) = {
.path = "pipe create",
.short_help = "pipe create [instance <instance>]",
.function = create_pipe_interfaces,
};
-/* *INDENT-ON* */
int
vnet_delete_pipe_interface (u32 sw_if_index)
@@ -726,13 +708,11 @@ vnet_delete_pipe_interface (u32 sw_if_index)
return VNET_API_ERROR_INVALID_SW_IF_INDEX;
}
- /* *INDENT-OFF* */
hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id,
({
vnet_delete_sub_interface(sw_if_index);
pipe_main.pipes[sw_if_index] = PIPE_INVALID;
}));
- /* *INDENT-ON* */
ethernet_delete_interface (vnm, hw_if_index);
@@ -776,13 +756,11 @@ delete_pipe_interfaces (vlib_main_t * vm,
* Example of how to delete a pipe interface:
* @cliexcmd{pipe delete-interface intfc loop0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (pipe_delete_interface_command, static) = {
.path = "pipe delete",
.short_help = "pipe delete <interface>",
.function = delete_pipe_interfaces,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/devices/pipe/pipe_api.c b/src/vnet/devices/pipe/pipe_api.c
index 1f0faef7c1e..79a4377de83 100644
--- a/src/vnet/devices/pipe/pipe_api.c
+++ b/src/vnet/devices/pipe/pipe_api.c
@@ -42,14 +42,12 @@ vl_api_pipe_create_t_handler (vl_api_pipe_create_t * mp)
rv = vnet_create_pipe_interface (is_specified, user_instance,
&parent_sw_if_index, pipe_sw_if_index);
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_PIPE_CREATE_REPLY,
({
rmp->sw_if_index = ntohl (parent_sw_if_index);
rmp->pipe_sw_if_index[0] = ntohl (pipe_sw_if_index[0]);
rmp->pipe_sw_if_index[1] = ntohl (pipe_sw_if_index[1]);
}));
- /* *INDENT-ON* */
}
static void
diff --git a/src/vnet/devices/tap/FEATURE.yaml b/src/vnet/devices/tap/FEATURE.yaml
index 35ee4885b02..1a774fb0e74 100644
--- a/src/vnet/devices/tap/FEATURE.yaml
+++ b/src/vnet/devices/tap/FEATURE.yaml
@@ -1,6 +1,6 @@
---
name: Tap Device
-maintainer: damarion@cisco.com sluong@cisco.com sykazmi@cisco.com
+maintainer: damarion@cisco.com sluong@cisco.com mohsin.kazmi14@gmail.com
features:
- Virtio
- Persistence
diff --git a/src/vnet/devices/tap/cli.c b/src/vnet/devices/tap/cli.c
index 10f4bb0ee2e..5c676d32d60 100644
--- a/src/vnet/devices/tap/cli.c
+++ b/src/vnet/devices/tap/cli.c
@@ -41,6 +41,7 @@ tap_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
args.tap_flags = 0;
args.rv = -1;
args.num_rx_queues = 1;
+ args.num_tx_queues = 1;
/* Get a line of input. */
if (unformat_user (input, unformat_line_input, line_input))
@@ -76,6 +77,8 @@ tap_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
args.host_ip6_gw_set = 1;
else if (unformat (line_input, "num-rx-queues %d", &tmp))
args.num_rx_queues = tmp;
+ else if (unformat (line_input, "num-tx-queues %d", &tmp))
+ args.num_tx_queues = tmp;
else if (unformat (line_input, "rx-ring-size %d", &tmp))
args.rx_ring_sz = tmp;
else if (unformat (line_input, "tx-ring-size %d", &tmp))
@@ -133,12 +136,12 @@ tap_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tap_create_command, static) = {
.path = "create tap",
- .short_help = "create tap {id <if-id>} [hw-addr <mac-address>] "
- "[num-rx-queues <n>] [rx-ring-size <size>] [tx-ring-size <size>] "
- "[host-ns <netns>] [host-bridge <bridge-name>] "
+ .short_help =
+ "create tap {id <if-id>} [hw-addr <mac-address>] "
+ "[num-rx-queues <n>] [num-tx-queues <n>] [rx-ring-size <size>] "
+ "[tx-ring-size <size>] [host-ns <netns>] [host-bridge <bridge-name>] "
"[host-ip4-addr <ip4addr/mask>] [host-ip6-addr <ip6-addr>] "
"[host-ip4-gw <ip4-addr>] [host-ip6-gw <ip6-addr>] "
"[host-mac-addr <host-mac-address>] [host-if-name <name>] "
@@ -146,7 +149,6 @@ VLIB_CLI_COMMAND (tap_create_command, static) = {
"[persist] [attach] [tun] [packed] [in-order]",
.function = tap_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
tap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -187,14 +189,12 @@ tap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tap_delete__command, static) =
{
.path = "delete tap",
.short_help = "delete tap {<interface> | sw_if_index <sw_idx>}",
.function = tap_delete_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
tap_offload_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -257,7 +257,6 @@ tap_offload_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tap_offload_command, static) =
{
.path = "set tap offload",
@@ -266,7 +265,6 @@ VLIB_CLI_COMMAND (tap_offload_command, static) =
"csum-offload-disable>",
.function = tap_offload_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
tap_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -296,10 +294,8 @@ tap_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (vec_len (hw_if_indices) == 0)
{
- /* *INDENT-OFF* */
pool_foreach (vif, mm->interfaces)
vec_add1 (hw_if_indices, vif->hw_if_index);
- /* *INDENT-ON* */
}
virtio_show (vm, hw_if_indices, show_descr, VIRTIO_IF_TYPE_TAP);
@@ -309,13 +305,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tap_show_command, static) = {
.path = "show tap",
.short_help = "show tap {<interface>] [descriptors]",
.function = tap_show_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
tun_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -345,10 +339,8 @@ tun_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (vec_len (hw_if_indices) == 0)
{
- /* *INDENT-OFF* */
pool_foreach (vif, mm->interfaces)
vec_add1 (hw_if_indices, vif->hw_if_index);
- /* *INDENT-ON* */
}
virtio_show (vm, hw_if_indices, show_descr, VIRTIO_IF_TYPE_TUN);
@@ -358,13 +350,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tun_show_command, static) = {
.path = "show tun",
.short_help = "show tun {<interface>] [descriptors]",
.function = tun_show_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
tap_cli_init (vlib_main_t * vm)
diff --git a/src/vnet/devices/tap/tap.c b/src/vnet/devices/tap/tap.c
index 2d075f9e0fc..1e2ee87041d 100644
--- a/src/vnet/devices/tap/tap.c
+++ b/src/vnet/devices/tap/tap.c
@@ -58,13 +58,11 @@ tap_main_t tap_main;
goto error; \
}
- /* *INDENT-OFF* */
-VNET_HW_INTERFACE_CLASS (tun_device_hw_interface_class, static) =
-{
+VNET_HW_INTERFACE_CLASS (tun_device_hw_interface_class, static) = {
.name = "tun-device",
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+ .tx_hash_fn_type = VNET_HASH_FN_TYPE_IP,
};
- /* *INDENT-ON* */
#define TUN_MAX_PACKET_BYTES 65355
#define TUN_MIN_PACKET_BYTES 64
@@ -79,6 +77,14 @@ virtio_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi,
return 0;
}
+static clib_error_t *
+virtio_eth_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hi,
+ u32 frame_size)
+{
+ /* nothing for now */
+ return 0;
+}
+
#define TAP_MAX_INSTANCE 1024
static void
@@ -89,14 +95,14 @@ tap_free (vlib_main_t * vm, virtio_if_t * vif)
clib_error_t *err = 0;
int i;
- /* *INDENT-OFF* */
+ virtio_pre_input_node_disable (vm, vif);
+
vec_foreach_index (i, vif->vhost_fds) if (vif->vhost_fds[i] != -1)
close (vif->vhost_fds[i]);
vec_foreach_index (i, vif->rxq_vrings)
virtio_vring_free_rx (vm, vif, RX_QUEUE (i));
vec_foreach_index (i, vif->txq_vrings)
virtio_vring_free_tx (vm, vif, TX_QUEUE (i));
- /* *INDENT-ON* */
if (vif->tap_fds)
{
@@ -130,6 +136,7 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
tap_main_t *tm = &tap_main;
vnet_sw_interface_t *sw;
vnet_hw_interface_t *hw;
+ vnet_hw_if_caps_change_t cc;
int i, num_vhost_queues;
int old_netns_fd = -1;
struct ifreq ifr = {.ifr_flags = IFF_NO_PI | IFF_VNET_HDR };
@@ -191,7 +198,7 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
vif->dev_instance = vif - vim->interfaces;
vif->id = args->id;
- vif->num_txqs = thm->n_vlib_mains;
+ vif->num_txqs = clib_max (args->num_tx_queues, thm->n_vlib_mains);
vif->num_rxqs = clib_max (args->num_rx_queues, 1);
if (args->tap_flags & TAP_FLAG_ATTACH)
@@ -263,7 +270,7 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
else
ifr.ifr_flags |= IFF_MULTI_QUEUE;
- hdrsz = sizeof (virtio_net_hdr_v1_t);
+ hdrsz = sizeof (vnet_virtio_net_hdr_v1_t);
if (args->tap_flags & TAP_FLAG_GSO)
{
offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
@@ -325,10 +332,10 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
args->error = clib_error_return_unix (0, "open '/dev/net/tun'");
goto error;
}
+ vec_add1 (vif->tap_fds, qfd);
_IOCTL (qfd, TUNSETIFF, (void *) &ifr);
tap_log_dbg (vif, "TUNSETIFF fd %d name %s flags 0x%x", qfd,
ifr.ifr_ifrn.ifrn_name, ifr.ifr_flags);
- vec_add1 (vif->tap_fds, qfd);
}
for (i = 0; i < vif->num_rxqs; i++)
@@ -568,7 +575,7 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
vhost_vring_addr_t addr = { 0 };
vhost_vring_state_t state = { 0 };
vhost_vring_file_t file = { 0 };
- virtio_vring_t *vring;
+ vnet_virtio_vring_t *vring;
u16 qp = i >> 1;
int fd = vif->vhost_fds[qp];
@@ -586,7 +593,7 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
}
addr.index = state.index = file.index = vring->queue_id & 1;
- state.num = vring->size;
+ state.num = vring->queue_size;
virtio_log_debug (vif, "VHOST_SET_VRING_NUM fd %d index %u num %u", fd,
state.index, state.num);
_IOCTL (fd, VHOST_SET_VRING_NUM, &state);
@@ -643,17 +650,14 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
if (vif->type != VIRTIO_IF_TYPE_TUN)
{
- args->error =
- ethernet_register_interface (vnm, virtio_device_class.index,
- vif->dev_instance, vif->mac_addr,
- &vif->hw_if_index,
- virtio_eth_flag_change);
- if (args->error)
- {
- args->rv = VNET_API_ERROR_INVALID_REGISTRATION;
- goto error;
- }
-
+ vnet_eth_interface_registration_t eir = {};
+
+ eir.dev_class_index = virtio_device_class.index;
+ eir.dev_instance = vif->dev_instance;
+ eir.address = vif->mac_addr;
+ eir.cb.flag_change = virtio_eth_flag_change;
+ eir.cb.set_max_frame_size = virtio_eth_set_max_frame_size;
+ vif->hw_if_index = vnet_eth_register_interface (vnm, &eir);
}
else
{
@@ -669,18 +673,16 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
args->sw_if_index = vif->sw_if_index;
args->rv = 0;
hw = vnet_get_hw_interface (vnm, vif->hw_if_index);
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
+ cc.mask = VNET_HW_IF_CAP_INT_MODE | VNET_HW_IF_CAP_TCP_GSO |
+ VNET_HW_IF_CAP_TX_TCP_CKSUM | VNET_HW_IF_CAP_TX_UDP_CKSUM;
+ cc.val = VNET_HW_IF_CAP_INT_MODE;
+
if (args->tap_flags & TAP_FLAG_GSO)
- {
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM;
- }
+ cc.val |= VNET_HW_IF_CAP_TCP_GSO | VNET_HW_IF_CAP_TX_TCP_CKSUM |
+ VNET_HW_IF_CAP_TX_UDP_CKSUM;
else if (args->tap_flags & TAP_FLAG_CSUM_OFFLOAD)
- {
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM;
- }
+ cc.val |= VNET_HW_IF_CAP_TX_TCP_CKSUM | VNET_HW_IF_CAP_TX_UDP_CKSUM;
+
if ((args->tap_flags & TAP_FLAG_GSO)
&& (args->tap_flags & TAP_FLAG_GRO_COALESCE))
{
@@ -688,18 +690,18 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
}
if (vif->type == VIRTIO_IF_TYPE_TUN)
{
- hw->max_supported_packet_bytes = TUN_MAX_PACKET_BYTES;
- hw->min_packet_bytes = hw->min_supported_packet_bytes =
- TUN_MIN_PACKET_BYTES;
- hw->max_packet_bytes =
- args->host_mtu_size ? args->host_mtu_size : TUN_DEFAULT_PACKET_BYTES;
- vnet_sw_interface_set_mtu (vnm, hw->sw_if_index, hw->max_packet_bytes);
+ hw->min_frame_size = TUN_MIN_PACKET_BYTES;
+ vnet_hw_interface_set_mtu (
+ vnm, hw->hw_if_index,
+ args->host_mtu_size ? args->host_mtu_size : TUN_DEFAULT_PACKET_BYTES);
}
+ vnet_hw_if_change_caps (vnm, vif->hw_if_index, &cc);
+ virtio_pre_input_node_enable (vm, vif);
virtio_vring_set_rx_queues (vm, vif);
+ virtio_vring_set_tx_queues (vm, vif);
vif->per_interface_next_index = ~0;
- vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP;
vnet_hw_interface_set_flags (vnm, vif->hw_if_index,
VNET_HW_INTERFACE_FLAG_LINK_UP);
/*
@@ -774,6 +776,7 @@ tap_csum_offload_enable_disable (vlib_main_t * vm, u32 sw_if_index,
virtio_main_t *mm = &virtio_main;
virtio_if_t *vif;
vnet_hw_interface_t *hw;
+ vnet_hw_if_caps_change_t cc;
clib_error_t *err = 0;
int i = 0;
@@ -791,21 +794,19 @@ tap_csum_offload_enable_disable (vlib_main_t * vm, u32 sw_if_index,
_IOCTL (vif->tap_fds[i], TUNSETOFFLOAD, offload);
vif->gso_enabled = 0;
vif->packet_coalesce = 0;
- vif->csum_offload_enabled = enable_disable ? 1 : 0;
-
- if ((hw->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO) != 0)
- {
- hw->caps &= ~VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO;
- }
+ cc.mask = VNET_HW_IF_CAP_TCP_GSO | VNET_HW_IF_CAP_L4_TX_CKSUM;
if (enable_disable)
{
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_L4_TX_CKSUM;
+ cc.val = VNET_HW_IF_CAP_L4_TX_CKSUM;
+ vif->csum_offload_enabled = 1;
}
else
{
- hw->caps &= ~VNET_HW_INTERFACE_CAP_SUPPORTS_L4_TX_CKSUM;
+ cc.val = 0;
+ vif->csum_offload_enabled = 0;
}
+ vnet_hw_if_change_caps (vnm, vif->hw_if_index, &cc);
error:
if (err)
@@ -825,6 +826,7 @@ tap_gso_enable_disable (vlib_main_t * vm, u32 sw_if_index, int enable_disable,
virtio_main_t *mm = &virtio_main;
virtio_if_t *vif;
vnet_hw_interface_t *hw;
+ vnet_hw_if_caps_change_t cc;
clib_error_t *err = 0;
int i = 0;
@@ -840,29 +842,25 @@ tap_gso_enable_disable (vlib_main_t * vm, u32 sw_if_index, int enable_disable,
unsigned int offload = enable_disable ? gso_on : gso_off;
vec_foreach_index (i, vif->tap_fds)
_IOCTL (vif->tap_fds[i], TUNSETOFFLOAD, offload);
- vif->gso_enabled = enable_disable ? 1 : 0;
- vif->csum_offload_enabled = 0;
+
+ cc.mask = VNET_HW_IF_CAP_TCP_GSO | VNET_HW_IF_CAP_L4_TX_CKSUM;
+
if (enable_disable)
{
- if ((hw->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO) == 0)
- {
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO |
- VNET_HW_INTERFACE_CAP_SUPPORTS_L4_TX_CKSUM;
- }
+ cc.val = cc.mask;
+ vif->gso_enabled = 1;
+ vif->csum_offload_enabled = 1;
if (is_packet_coalesce)
- {
- virtio_set_packet_coalesce (vif);
- }
+ virtio_set_packet_coalesce (vif);
}
else
{
- if ((hw->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO) != 0)
- {
- hw->caps &= ~(VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO |
- VNET_HW_INTERFACE_CAP_SUPPORTS_L4_TX_CKSUM);
- }
+ cc.val = 0;
+ vif->gso_enabled = 0;
+ vif->csum_offload_enabled = 0;
vif->packet_coalesce = 0;
}
+ vnet_hw_if_change_caps (vnm, vif->hw_if_index, &cc);
error:
if (err)
@@ -880,12 +878,11 @@ tap_dump_ifs (tap_interface_details_t ** out_tapids)
vnet_main_t *vnm = vnet_get_main ();
virtio_main_t *mm = &virtio_main;
virtio_if_t *vif;
- virtio_vring_t *vring;
+ vnet_virtio_vring_t *vring;
vnet_hw_interface_t *hi;
tap_interface_details_t *r_tapids = NULL;
tap_interface_details_t *tapid = NULL;
- /* *INDENT-OFF* */
pool_foreach (vif, mm->interfaces) {
if ((vif->type != VIRTIO_IF_TYPE_TAP)
&& (vif->type != VIRTIO_IF_TYPE_TUN))
@@ -898,9 +895,9 @@ tap_dump_ifs (tap_interface_details_t ** out_tapids)
clib_memcpy(tapid->dev_name, hi->name,
MIN (ARRAY_LEN (tapid->dev_name) - 1, vec_len (hi->name)));
vring = vec_elt_at_index (vif->rxq_vrings, RX_QUEUE_ACCESS(0));
- tapid->rx_ring_sz = vring->size;
+ tapid->rx_ring_sz = vring->queue_size;
vring = vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS(0));
- tapid->tx_ring_sz = vring->size;
+ tapid->tx_ring_sz = vring->queue_size;
tapid->tap_flags = vif->tap_flags;
clib_memcpy(&tapid->host_mac_addr, vif->host_mac_addr, 6);
if (vif->host_if_name)
@@ -929,7 +926,6 @@ tap_dump_ifs (tap_interface_details_t ** out_tapids)
tapid->host_ip6_prefix_len = vif->host_ip6_prefix_len;
tapid->host_mtu_size = vif->host_mtu_size;
}
- /* *INDENT-ON* */
*out_tapids = r_tapids;
diff --git a/src/vnet/devices/tap/tap.h b/src/vnet/devices/tap/tap.h
index d89809862c0..6b88c34fe41 100644
--- a/src/vnet/devices/tap/tap.h
+++ b/src/vnet/devices/tap/tap.h
@@ -44,7 +44,8 @@ typedef struct
u32 id;
u8 mac_addr_set;
mac_address_t mac_addr;
- u8 num_rx_queues;
+ u16 num_rx_queues;
+ u16 num_tx_queues;
u16 rx_ring_sz;
u16 tx_ring_sz;
u32 tap_flags;
diff --git a/src/vnet/devices/tap/tapv2.api b/src/vnet/devices/tap/tapv2.api
index 6b6618411a6..bf53d1bc6fe 100644
--- a/src/vnet/devices/tap/tapv2.api
+++ b/src/vnet/devices/tap/tapv2.api
@@ -43,6 +43,82 @@ enum tap_flags {
@param use_random_mac - let the system generate a unique mac address
@param mac_address - mac addr to assign to the interface if use_random not set
@param num_rx_queues - number of rx queues
+ @param num_tx_queues - number of tx queues
+ @param tx_ring_sz - the number of entries of TX ring, optional, default is 256 entries, must be power of 2
+ @param rx_ring_sz - the number of entries of RX ring, optional, default is 256 entries, must be power of 2
+ @param host_mtu_set - host MTU should be set
+ @param host_mtu_size - host MTU size
+ @param host_mac_addr_set - host side interface mac address should be set
+ @param host_mac_addr - host side interface mac address
+ @param host_ip4_prefix_set - host IPv4 ip address should be set
+ @param host_ip4_prefix - host IPv4 ip address
+ @param host_ip6_prefix_set - host IPv6 ip address should be set
+ @param host_ip6_prefix - host IPv6 ip address
+ @param host_ip4_gw_set - host IPv4 default gateway should be set
+ @param host_ip4_gw - host IPv4 default gateway
+ @param host_ip6_gw_set - host IPv6 default gateway should be set
+ @param host_ip6_gw - host IPv6 default gateway
+ @param tap_flags - flags for the TAP interface creation
+ @param host_if_name_set - host side interface name should be set
+ @param host_if_name - host side interface name
+ @param host_namespace_set - host namespace should be set
+ @param host_namespace - host namespace to attach interface to
+ @param host_bridge_set - host bridge should be set
+ @param host_bridge - host bridge to attach interface to
+ @param tag - tag
+*/
+autoendian define tap_create_v3
+{
+ u32 client_index;
+ u32 context;
+ u32 id [default=0xffffffff];
+ bool use_random_mac [default=true];
+ vl_api_mac_address_t mac_address;
+ u16 num_rx_queues [default=1];
+ u16 num_tx_queues [default=1];
+ u16 tx_ring_sz [default=256];
+ u16 rx_ring_sz [default=256];
+ bool host_mtu_set;
+ u32 host_mtu_size;
+ bool host_mac_addr_set;
+ vl_api_mac_address_t host_mac_addr;
+ bool host_ip4_prefix_set;
+ vl_api_ip4_address_with_prefix_t host_ip4_prefix;
+ bool host_ip6_prefix_set;
+ vl_api_ip6_address_with_prefix_t host_ip6_prefix;
+ bool host_ip4_gw_set;
+ vl_api_ip4_address_t host_ip4_gw;
+ bool host_ip6_gw_set;
+ vl_api_ip6_address_t host_ip6_gw;
+ vl_api_tap_flags_t tap_flags;
+ bool host_namespace_set;
+ string host_namespace[64];
+ bool host_if_name_set;
+ string host_if_name[64];
+ bool host_bridge_set;
+ string host_bridge[64];
+ string tag[];
+};
+
+/** \brief Reply for tap create reply
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param sw_if_index - software index allocated for the new tap interface
+*/
+autoendian define tap_create_v3_reply
+{
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+};
+
+/** \brief Initialize a new tap interface with the given parameters
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param id - interface id, 0xffffffff means auto
+ @param use_random_mac - let the system generate a unique mac address
+ @param mac_address - mac addr to assign to the interface if use_random not set
+ @param num_rx_queues - number of rx queues
@param tx_ring_sz - the number of entries of TX ring, optional, default is 256 entries, must be power of 2
@param rx_ring_sz - the number of entries of RX ring, optional, default is 256 entries, must be power of 2
@param host_mtu_set - host MTU should be set
@@ -68,6 +144,8 @@ enum tap_flags {
*/
define tap_create_v2
{
+ option deprecated;
+
u32 client_index;
u32 context;
u32 id [default=0xffffffff];
@@ -105,6 +183,8 @@ define tap_create_v2
*/
define tap_create_v2_reply
{
+ option deprecated;
+
u32 context;
i32 retval;
vl_api_interface_index_t sw_if_index;
diff --git a/src/vnet/devices/tap/tapv2_api.c b/src/vnet/devices/tap/tapv2_api.c
index 64a0088136b..ab4189ab607 100644
--- a/src/vnet/devices/tap/tapv2_api.c
+++ b/src/vnet/devices/tap/tapv2_api.c
@@ -36,6 +36,100 @@
#include <vlibapi/api_helper_macros.h>
static void
+vl_api_tap_create_v3_t_handler (vl_api_tap_create_v3_t *mp)
+{
+ vl_api_registration_t *reg;
+ int rv;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_tap_create_v3_reply_t *rmp;
+
+ tap_create_if_args_t _a, *ap = &_a;
+
+ clib_memset (ap, 0, sizeof (*ap));
+
+ ap->id = mp->id;
+ if (!mp->use_random_mac)
+ {
+ mac_address_decode (mp->mac_address, &ap->mac_addr);
+ ap->mac_addr_set = 1;
+ }
+ ap->rx_ring_sz = mp->rx_ring_sz;
+ ap->tx_ring_sz = mp->tx_ring_sz;
+ ap->sw_if_index = (u32) ~0;
+ ap->num_rx_queues = clib_max (1, mp->num_rx_queues);
+ ap->num_tx_queues = mp->num_tx_queues;
+
+ if (mp->host_if_name_set)
+ ap->host_if_name = format (0, "%s%c", mp->host_if_name, 0);
+
+ if (mp->host_mac_addr_set)
+ {
+ mac_address_decode (mp->host_mac_addr, &ap->host_mac_addr);
+ }
+
+ if (mp->host_namespace_set)
+ ap->host_namespace = format (0, "%s%c", mp->host_namespace, 0);
+
+ if (mp->host_bridge_set)
+ ap->host_bridge = format (0, "%s%c", mp->host_bridge, 0);
+
+ if (mp->host_ip4_prefix_set)
+ {
+ ip4_address_decode (mp->host_ip4_prefix.address, &ap->host_ip4_addr);
+ ap->host_ip4_prefix_len = mp->host_ip4_prefix.len;
+ }
+
+ if (mp->host_ip6_prefix_set)
+ {
+ ip6_address_decode (mp->host_ip6_prefix.address, &ap->host_ip6_addr);
+ ap->host_ip6_prefix_len = mp->host_ip6_prefix.len;
+ }
+
+ if (mp->host_ip4_gw_set)
+ {
+ ip4_address_decode (mp->host_ip4_gw, &ap->host_ip4_gw);
+ ap->host_ip4_gw_set = 1;
+ }
+
+ if (mp->host_ip6_gw_set)
+ {
+ ip6_address_decode (mp->host_ip6_gw, &ap->host_ip6_gw);
+ ap->host_ip6_gw_set = 1;
+ }
+
+ if (mp->host_mtu_set)
+ {
+ ap->host_mtu_size = mp->host_mtu_size;
+ ap->host_mtu_set = 1;
+ }
+
+ ap->tap_flags = mp->tap_flags;
+
+ tap_create_if (vm, ap);
+
+ /* If a tag was supplied... */
+ if (vl_api_string_len (&mp->tag))
+ {
+ u8 *tag = vl_api_from_api_to_new_vec (mp, &mp->tag);
+ vnet_set_sw_interface_tag (vnm, tag, ap->sw_if_index);
+ }
+
+ vec_free (ap->host_if_name);
+ vec_free (ap->host_namespace);
+ vec_free (ap->host_bridge);
+
+ rv = ap->rv;
+ REPLY_MACRO2_END (VL_API_TAP_CREATE_V3_REPLY,
+ ({ rmp->sw_if_index = ap->sw_if_index; }));
+}
+
+static void
vl_api_tap_create_v2_t_handler (vl_api_tap_create_v2_t * mp)
{
vl_api_registration_t *reg;
@@ -61,6 +155,7 @@ vl_api_tap_create_v2_t_handler (vl_api_tap_create_v2_t * mp)
ap->tx_ring_sz = ntohs (mp->tx_ring_sz);
ap->sw_if_index = (u32) ~ 0;
ap->num_rx_queues = 1;
+ ap->num_tx_queues = 1;
if (mp->num_rx_queues > 1)
ap->num_rx_queues = mp->num_rx_queues;
diff --git a/src/vnet/devices/virtio/FEATURE.yaml b/src/vnet/devices/virtio/FEATURE.yaml
index 7b2fb59e1ad..446a45b61a3 100644
--- a/src/vnet/devices/virtio/FEATURE.yaml
+++ b/src/vnet/devices/virtio/FEATURE.yaml
@@ -1,6 +1,6 @@
---
name: Virtio PCI Device
-maintainer: sykazmi@cisco.com sluong@cisco.com
+maintainer: mohsin.kazmi14@gmail.com sluong@cisco.com
features:
- Driver mode to emulate PCI interface presented to VPP from
the host interface.
@@ -11,6 +11,8 @@ features:
- Support multi-queue, GSO, checksum offload, indirect descriptor,
jumbo frame, and packed ring.
- Support virtio 1.1 packed ring in vhost
+ - Support for tx queue size configuration (tested on host kernel 5.15
+ and qemu version 6.2.0)
description: "Virtio implementation"
missing:
- API dump filtering by sw_if_index
diff --git a/src/vnet/devices/virtio/cli.c b/src/vnet/devices/virtio/cli.c
index a78336997e2..c1b6c8be065 100644
--- a/src/vnet/devices/virtio/cli.c
+++ b/src/vnet/devices/virtio/cli.c
@@ -31,6 +31,7 @@ virtio_pci_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
virtio_pci_create_if_args_t args;
u64 feature_mask = (u64) ~ (0ULL);
u32 buffering_size = 0;
+ u32 txq_size = 0;
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
@@ -43,6 +44,8 @@ virtio_pci_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
;
else if (unformat (line_input, "feature-mask 0x%llx", &feature_mask))
args.features = feature_mask;
+ else if (unformat (line_input, "tx-queue-size %u", &txq_size))
+ args.tx_queue_size = txq_size;
else if (unformat (line_input, "gso-enabled"))
args.gso_enabled = 1;
else if (unformat (line_input, "csum-enabled"))
@@ -55,6 +58,10 @@ virtio_pci_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
}
else if (unformat (line_input, "packed"))
args.virtio_flags |= VIRTIO_FLAG_PACKED;
+ else if (unformat (line_input, "bind force"))
+ args.bind = VIRTIO_BIND_FORCE;
+ else if (unformat (line_input, "bind"))
+ args.bind = VIRTIO_BIND_DEFAULT;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
@@ -66,15 +73,14 @@ virtio_pci_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (virtio_pci_create_command, static) = {
.path = "create interface virtio",
.short_help = "create interface virtio <pci-address> "
- "[feature-mask <hex-mask>] [gso-enabled] [csum-enabled] "
- "[buffering [size <buffering-szie>]] [packed]",
+ "[feature-mask <hex-mask>] [tx-queue-size <size>] "
+ "[gso-enabled] [csum-enabled] "
+ "[buffering [size <buffering-szie>]] [packed] [bind [force]]",
.function = virtio_pci_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
virtio_pci_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -120,14 +126,12 @@ virtio_pci_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (virtio_pci_delete_command, static) = {
.path = "delete interface virtio",
.short_help = "delete interface virtio "
"{<interface> | sw_if_index <sw_idx>}",
.function = virtio_pci_delete_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
virtio_pci_enable_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -182,14 +186,12 @@ virtio_pci_enable_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (virtio_pci_enable_command, static) = {
.path = "set virtio pci",
.short_help = "set virtio pci {<interface> | sw_if_index <sw_idx>}"
" [gso-enabled | csum-offload-enabled | offloads-disabled]",
.function = virtio_pci_enable_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_virtio_pci_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -248,13 +250,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_virtio_pci_command, static) = {
.path = "show virtio pci",
.short_help = "show virtio pci [<interface>] [descriptors | desc] [debug-device]",
.function = show_virtio_pci_fn,
};
-/* *INDENT-ON* */
clib_error_t *
virtio_pci_cli_init (vlib_main_t * vm)
diff --git a/src/vnet/devices/virtio/device.c b/src/vnet/devices/virtio/device.c
index ac9be6b02ae..112f77e7065 100644
--- a/src/vnet/devices/virtio/device.c
+++ b/src/vnet/devices/virtio/device.c
@@ -63,13 +63,31 @@ format_virtio_device (u8 * s, va_list * args)
u32 dev_instance = va_arg (*args, u32);
int verbose = va_arg (*args, int);
u32 indent = format_get_indent (s);
+ virtio_main_t *vim = &virtio_main;
+ virtio_if_t *vif = vec_elt_at_index (vim->interfaces, dev_instance);
+ vnet_virtio_vring_t *vring = 0;
s = format (s, "VIRTIO interface");
if (verbose)
{
s = format (s, "\n%U instance %u", format_white_space, indent + 2,
dev_instance);
+ s = format (s, "\n%U RX QUEUE : Total Packets", format_white_space,
+ indent + 4);
+ vec_foreach (vring, vif->rxq_vrings)
+ {
+ s = format (s, "\n%U %8u : %llu", format_white_space, indent + 4,
+ RX_QUEUE_ACCESS (vring->queue_id), vring->total_packets);
+ }
+ s = format (s, "\n%U TX QUEUE : Total Packets", format_white_space,
+ indent + 4);
+ vec_foreach (vring, vif->txq_vrings)
+ {
+ s = format (s, "\n%U %8u : %llu", format_white_space, indent + 4,
+ TX_QUEUE_ACCESS (vring->queue_id), vring->total_packets);
+ }
}
+
return s;
}
@@ -109,6 +127,7 @@ virtio_tx_trace (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_t *b0,
t = vlib_add_trace (vm, node, b0, sizeof (t[0]));
t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
t->buffer_index = bi;
+ clib_memset (&t->gho, 0, sizeof (t->gho));
if (is_tun)
{
int is_ip4 = 0, is_ip6 = 0;
@@ -166,11 +185,12 @@ virtio_memset_ring_u32 (u32 *ring, u32 start, u32 ring_size, u32 n_buffers)
}
static void
-virtio_free_used_device_desc_split (vlib_main_t *vm, virtio_vring_t *vring,
+virtio_free_used_device_desc_split (vlib_main_t *vm,
+ vnet_virtio_vring_t *vring,
uword node_index)
{
u16 used = vring->desc_in_use;
- u16 sz = vring->size;
+ u16 sz = vring->queue_size;
u16 mask = sz - 1;
u16 last = vring->last_used_idx;
u16 n_left = vring->used->idx - last;
@@ -181,7 +201,7 @@ virtio_free_used_device_desc_split (vlib_main_t *vm, virtio_vring_t *vring,
while (n_left)
{
- vring_used_elem_t *e = &vring->used->ring[last & mask];
+ vnet_virtio_vring_used_elem_t *e = &vring->used->ring[last & mask];
u16 slot, n_buffers;
slot = n_buffers = e->id;
@@ -190,7 +210,7 @@ virtio_free_used_device_desc_split (vlib_main_t *vm, virtio_vring_t *vring,
n_left--;
last++;
n_buffers++;
- vring_desc_t *d = &vring->desc[e->id];
+ vnet_virtio_vring_desc_t *d = &vring->desc[e->id];
u16 next;
while (d->flags & VRING_DESC_F_NEXT)
{
@@ -232,11 +252,12 @@ virtio_free_used_device_desc_split (vlib_main_t *vm, virtio_vring_t *vring,
}
static void
-virtio_free_used_device_desc_packed (vlib_main_t *vm, virtio_vring_t *vring,
+virtio_free_used_device_desc_packed (vlib_main_t *vm,
+ vnet_virtio_vring_t *vring,
uword node_index)
{
- vring_packed_desc_t *d;
- u16 sz = vring->size;
+ vnet_virtio_vring_packed_desc_t *d;
+ u16 sz = vring->queue_size;
u16 last = vring->last_used_idx;
u16 n_buffers = 0, start;
u16 flags;
@@ -273,7 +294,7 @@ virtio_free_used_device_desc_packed (vlib_main_t *vm, virtio_vring_t *vring,
}
static void
-virtio_free_used_device_desc (vlib_main_t *vm, virtio_vring_t *vring,
+virtio_free_used_device_desc (vlib_main_t *vm, vnet_virtio_vring_t *vring,
uword node_index, int packed)
{
if (packed)
@@ -284,25 +305,22 @@ virtio_free_used_device_desc (vlib_main_t *vm, virtio_vring_t *vring,
}
static void
-set_checksum_offsets (vlib_buffer_t *b, virtio_net_hdr_v1_t *hdr,
+set_checksum_offsets (vlib_buffer_t *b, vnet_virtio_net_hdr_v1_t *hdr,
const int is_l2)
{
vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags;
-
+ i16 l4_hdr_offset = vnet_buffer (b)->l4_hdr_offset - b->current_data;
if (b->flags & VNET_BUFFER_F_IS_IP4)
{
ip4_header_t *ip4;
- generic_header_offset_t gho = { 0 };
- vnet_generic_header_offset_parser (b, &gho, is_l2, 1 /* ip4 */ ,
- 0 /* ip6 */ );
hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- hdr->csum_start = gho.l4_hdr_offset; // 0x22;
+ hdr->csum_start = l4_hdr_offset; // 0x22;
/*
* virtio devices do not support IP4 checksum offload. So driver takes
* care of it while doing tx.
*/
- ip4 = (ip4_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset);
+ ip4 = (ip4_header_t *) (b->data + vnet_buffer (b)->l3_hdr_offset);
if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
ip4->checksum = ip4_header_checksum (ip4);
@@ -313,14 +331,14 @@ set_checksum_offsets (vlib_buffer_t *b, virtio_net_hdr_v1_t *hdr,
if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
{
tcp_header_t *tcp =
- (tcp_header_t *) (vlib_buffer_get_current (b) + gho.l4_hdr_offset);
+ (tcp_header_t *) (b->data + vnet_buffer (b)->l4_hdr_offset);
tcp->checksum = ip4_pseudo_header_cksum (ip4);
hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
}
else if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)
{
udp_header_t *udp =
- (udp_header_t *) (vlib_buffer_get_current (b) + gho.l4_hdr_offset);
+ (udp_header_t *) (b->data + vnet_buffer (b)->l4_hdr_offset);
udp->checksum = ip4_pseudo_header_cksum (ip4);
hdr->csum_offset = STRUCT_OFFSET_OF (udp_header_t, checksum);
}
@@ -328,12 +346,9 @@ set_checksum_offsets (vlib_buffer_t *b, virtio_net_hdr_v1_t *hdr,
else if (b->flags & VNET_BUFFER_F_IS_IP6)
{
ip6_header_t *ip6;
- generic_header_offset_t gho = { 0 };
- vnet_generic_header_offset_parser (b, &gho, is_l2, 0 /* ip4 */ ,
- 1 /* ip6 */ );
hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- hdr->csum_start = gho.l4_hdr_offset; // 0x36;
- ip6 = (ip6_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset);
+ hdr->csum_start = l4_hdr_offset; // 0x36;
+ ip6 = (ip6_header_t *) (b->data + vnet_buffer (b)->l3_hdr_offset);
/*
* virtio devices assume the l4 header is set to the checksum of the
@@ -342,14 +357,14 @@ set_checksum_offsets (vlib_buffer_t *b, virtio_net_hdr_v1_t *hdr,
if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
{
tcp_header_t *tcp =
- (tcp_header_t *) (vlib_buffer_get_current (b) + gho.l4_hdr_offset);
+ (tcp_header_t *) (b->data + vnet_buffer (b)->l4_hdr_offset);
tcp->checksum = ip6_pseudo_header_cksum (ip6);
hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
}
else if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)
{
udp_header_t *udp =
- (udp_header_t *) (vlib_buffer_get_current (b) + gho.l4_hdr_offset);
+ (udp_header_t *) (b->data + vnet_buffer (b)->l4_hdr_offset);
udp->checksum = ip6_pseudo_header_cksum (ip6);
hdr->csum_offset = STRUCT_OFFSET_OF (udp_header_t, checksum);
}
@@ -357,24 +372,22 @@ set_checksum_offsets (vlib_buffer_t *b, virtio_net_hdr_v1_t *hdr,
}
static void
-set_gso_offsets (vlib_buffer_t *b, virtio_net_hdr_v1_t *hdr, const int is_l2)
+set_gso_offsets (vlib_buffer_t *b, vnet_virtio_net_hdr_v1_t *hdr,
+ const int is_l2)
{
vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags;
+ i16 l4_hdr_offset = vnet_buffer (b)->l4_hdr_offset - b->current_data;
if (b->flags & VNET_BUFFER_F_IS_IP4)
{
ip4_header_t *ip4;
- generic_header_offset_t gho = { 0 };
- vnet_generic_header_offset_parser (b, &gho, is_l2, 1 /* ip4 */ ,
- 0 /* ip6 */ );
hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
hdr->gso_size = vnet_buffer2 (b)->gso_size;
- hdr->hdr_len = gho.hdr_sz;
+ hdr->hdr_len = l4_hdr_offset + vnet_buffer2 (b)->gso_l4_hdr_sz;
hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- hdr->csum_start = gho.l4_hdr_offset; // 0x22;
+ hdr->csum_start = l4_hdr_offset; // 0x22;
hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
- ip4 =
- (ip4_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset);
+ ip4 = (ip4_header_t *) (b->data + vnet_buffer (b)->l3_hdr_offset);
/*
* virtio devices do not support IP4 checksum offload. So driver takes care
* of it while doing tx.
@@ -384,35 +397,33 @@ set_gso_offsets (vlib_buffer_t *b, virtio_net_hdr_v1_t *hdr, const int is_l2)
}
else if (b->flags & VNET_BUFFER_F_IS_IP6)
{
- generic_header_offset_t gho = { 0 };
- vnet_generic_header_offset_parser (b, &gho, is_l2, 0 /* ip4 */ ,
- 1 /* ip6 */ );
hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
hdr->gso_size = vnet_buffer2 (b)->gso_size;
- hdr->hdr_len = gho.hdr_sz;
+ hdr->hdr_len = l4_hdr_offset + vnet_buffer2 (b)->gso_l4_hdr_sz;
hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- hdr->csum_start = gho.l4_hdr_offset; // 0x36;
+ hdr->csum_start = l4_hdr_offset; // 0x36;
hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
}
}
static u16
add_buffer_to_slot (vlib_main_t *vm, vlib_node_runtime_t *node,
- virtio_if_t *vif, virtio_vring_t *vring, u32 bi,
+ virtio_if_t *vif, vnet_virtio_vring_t *vring, u32 bi,
u16 free_desc_count, u16 avail, u16 next, u16 mask,
int hdr_sz, int do_gso, int csum_offload, int is_pci,
int is_tun, int is_indirect, int is_any_layout)
{
u16 n_added = 0;
- vring_desc_t *d;
+ vnet_virtio_vring_desc_t *d;
int is_l2 = !is_tun;
d = &vring->desc[next];
vlib_buffer_t *b = vlib_get_buffer (vm, bi);
- virtio_net_hdr_v1_t *hdr = vlib_buffer_get_current (b) - hdr_sz;
+ vnet_virtio_net_hdr_v1_t *hdr = vlib_buffer_get_current (b) - hdr_sz;
u32 drop_inline = ~0;
clib_memset_u8 (hdr, 0, hdr_sz);
+ vring->total_packets++;
if (b->flags & VNET_BUFFER_F_GSO)
{
if (do_gso)
@@ -469,8 +480,8 @@ add_buffer_to_slot (vlib_main_t *vm, vlib_node_runtime_t *node,
indirect_desc->next_buffer = bi;
bi = indirect_buffer;
- vring_desc_t *id =
- (vring_desc_t *) vlib_buffer_get_current (indirect_desc);
+ vnet_virtio_vring_desc_t *id =
+ (vnet_virtio_vring_desc_t *) vlib_buffer_get_current (indirect_desc);
u32 count = 1;
if (is_pci)
{
@@ -539,7 +550,7 @@ add_buffer_to_slot (vlib_main_t *vm, vlib_node_runtime_t *node,
}
id->flags = 0;
id->next = 0;
- d->len = count * sizeof (vring_desc_t);
+ d->len = count * sizeof (vnet_virtio_vring_desc_t);
d->flags = VRING_DESC_F_INDIRECT;
}
else if (is_pci)
@@ -605,20 +616,22 @@ done:
static u16
add_buffer_to_slot_packed (vlib_main_t *vm, vlib_node_runtime_t *node,
- virtio_if_t *vif, virtio_vring_t *vring, u32 bi,
- u16 next, int hdr_sz, int do_gso, int csum_offload,
- int is_pci, int is_tun, int is_indirect,
- int is_any_layout)
+ virtio_if_t *vif, vnet_virtio_vring_t *vring,
+ u32 bi, u16 next, int hdr_sz, int do_gso,
+ int csum_offload, int is_pci, int is_tun,
+ int is_indirect, int is_any_layout)
{
u16 n_added = 0, flags = 0;
int is_l2 = !is_tun;
- vring_packed_desc_t *d = &vring->packed_desc[next];
+ vnet_virtio_vring_packed_desc_t *d = &vring->packed_desc[next];
vlib_buffer_t *b = vlib_get_buffer (vm, bi);
- virtio_net_hdr_v1_t *hdr = vlib_buffer_get_current (b) - hdr_sz;
+ vnet_virtio_net_hdr_v1_t *hdr = vlib_buffer_get_current (b) - hdr_sz;
u32 drop_inline = ~0;
clib_memset (hdr, 0, hdr_sz);
+ vring->total_packets++;
+
if (b->flags & VNET_BUFFER_F_GSO)
{
if (do_gso)
@@ -675,8 +688,9 @@ add_buffer_to_slot_packed (vlib_main_t *vm, vlib_node_runtime_t *node,
indirect_desc->next_buffer = bi;
bi = indirect_buffer;
- vring_packed_desc_t *id =
- (vring_packed_desc_t *) vlib_buffer_get_current (indirect_desc);
+ vnet_virtio_vring_packed_desc_t *id =
+ (vnet_virtio_vring_packed_desc_t *) vlib_buffer_get_current (
+ indirect_desc);
u32 count = 1;
if (is_pci)
{
@@ -720,7 +734,7 @@ add_buffer_to_slot_packed (vlib_main_t *vm, vlib_node_runtime_t *node,
}
id->flags = 0;
id->id = 0;
- d->len = count * sizeof (vring_packed_desc_t);
+ d->len = count * sizeof (vnet_virtio_vring_packed_desc_t);
flags = VRING_DESC_F_INDIRECT;
}
else
@@ -752,12 +766,10 @@ done:
}
static uword
-virtio_interface_tx_packed_gso_inline (vlib_main_t *vm,
- vlib_node_runtime_t *node,
- virtio_if_t *vif, virtio_if_type_t type,
- virtio_vring_t *vring, u32 *buffers,
- u16 n_left, const int do_gso,
- const int csum_offload)
+virtio_interface_tx_packed_gso_inline (
+ vlib_main_t *vm, vlib_node_runtime_t *node, virtio_if_t *vif,
+ virtio_if_type_t type, vnet_virtio_vring_t *vring, u32 *buffers, u16 n_left,
+ const int do_gso, const int csum_offload)
{
int is_pci = (type == VIRTIO_IF_TYPE_PCI);
int is_tun = (type == VIRTIO_IF_TYPE_TUN);
@@ -766,7 +778,7 @@ virtio_interface_tx_packed_gso_inline (vlib_main_t *vm,
int is_any_layout =
((vif->features & VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT)) != 0);
const int hdr_sz = vif->virtio_net_hdr_sz;
- u16 sz = vring->size;
+ u16 sz = vring->queue_size;
u16 used, next, n_buffers = 0, n_buffers_left = 0;
u16 n_vectors = n_left;
@@ -800,6 +812,7 @@ virtio_interface_tx_packed_gso_inline (vlib_main_t *vm,
vring->avail_wrap_counter ^= 1;
}
}
+ virtio_txq_clear_scheduled (vring);
}
while (n_left && used < sz)
@@ -837,7 +850,7 @@ virtio_interface_tx_packed_gso_inline (vlib_main_t *vm,
}
static void
-virtio_find_free_desc (virtio_vring_t *vring, u16 size, u16 mask, u16 req,
+virtio_find_free_desc (vnet_virtio_vring_t *vring, u16 size, u16 mask, u16 req,
u16 next, u32 *first_free_desc_index,
u16 *free_desc_count)
{
@@ -876,7 +889,7 @@ static u16
virtio_interface_tx_split_gso_inline (vlib_main_t *vm,
vlib_node_runtime_t *node,
virtio_if_t *vif, virtio_if_type_t type,
- virtio_vring_t *vring, u32 *buffers,
+ vnet_virtio_vring_t *vring, u32 *buffers,
u16 n_left, int do_gso, int csum_offload)
{
u16 used, next, avail, n_buffers = 0, n_buffers_left = 0;
@@ -886,7 +899,7 @@ virtio_interface_tx_split_gso_inline (vlib_main_t *vm,
((vif->features & VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC)) != 0);
int is_any_layout =
((vif->features & VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT)) != 0);
- u16 sz = vring->size;
+ u16 sz = vring->queue_size;
int hdr_sz = vif->virtio_net_hdr_sz;
u16 mask = sz - 1;
u16 n_vectors = n_left;
@@ -940,6 +953,7 @@ virtio_interface_tx_split_gso_inline (vlib_main_t *vm,
n_buffers_left--;
free_desc_count -= n_added;
}
+ virtio_txq_clear_scheduled (vring);
}
while (n_left && free_desc_count)
@@ -984,7 +998,7 @@ virtio_interface_tx_split_gso_inline (vlib_main_t *vm,
static u16
virtio_interface_tx_gso_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
virtio_if_t *vif, virtio_if_type_t type,
- virtio_vring_t *vring, u32 *buffers,
+ vnet_virtio_vring_t *vring, u32 *buffers,
u16 n_left, int packed, int do_gso,
int csum_offload)
{
@@ -1000,19 +1014,19 @@ virtio_interface_tx_gso_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
static u16
virtio_interface_tx_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
- virtio_if_t *vif, virtio_vring_t *vring,
+ virtio_if_t *vif, vnet_virtio_vring_t *vring,
virtio_if_type_t type, u32 *buffers, u16 n_left,
int packed)
{
vnet_main_t *vnm = vnet_get_main ();
vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vif->hw_if_index);
- if (hw->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO)
+ if (hw->caps & VNET_HW_IF_CAP_TCP_GSO)
return virtio_interface_tx_gso_inline (vm, node, vif, type, vring,
buffers, n_left, packed,
1 /* do_gso */ ,
1 /* checksum offload */ );
- else if (hw->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_L4_TX_CKSUM)
+ else if (hw->caps & VNET_HW_IF_CAP_L4_TX_CKSUM)
return virtio_interface_tx_gso_inline (vm, node, vif, type, vring,
buffers, n_left, packed,
0 /* no do_gso */ ,
@@ -1031,21 +1045,24 @@ VNET_DEVICE_CLASS_TX_FN (virtio_device_class) (vlib_main_t * vm,
virtio_main_t *nm = &virtio_main;
vnet_interface_output_runtime_t *rund = (void *) node->runtime_data;
virtio_if_t *vif = pool_elt_at_index (nm->interfaces, rund->dev_instance);
- u16 qid = vm->thread_index % vif->num_txqs;
- virtio_vring_t *vring = vec_elt_at_index (vif->txq_vrings, qid);
+ vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame);
+ u16 qid = tf->queue_id;
+ vnet_virtio_vring_t *vring = vec_elt_at_index (vif->txq_vrings, qid);
u16 n_left = frame->n_vectors;
u32 *buffers = vlib_frame_vector_args (frame);
u32 to[GRO_TO_VECTOR_SIZE (n_left)];
int packed = vif->is_packed;
u16 n_vectors = frame->n_vectors;
- clib_spinlock_lock_if_init (&vring->lockp);
+ if (tf->shared_queue)
+ clib_spinlock_lock (&vring->lockp);
if (vif->packet_coalesce)
{
n_vectors = n_left =
vnet_gro_inline (vm, vring->flow_table, buffers, n_left, to);
buffers = to;
+ virtio_txq_clear_scheduled (vring);
}
u16 retry_count = 2;
@@ -1089,7 +1106,8 @@ retry:
&buffers[n_vectors - n_left], n_left,
VIRTIO_TX_ERROR_NO_FREE_SLOTS);
- clib_spinlock_unlock_if_init (&vring->lockp);
+ if (tf->shared_queue)
+ clib_spinlock_unlock (&vring->lockp);
return frame->n_vectors - n_left;
}
@@ -1121,7 +1139,7 @@ virtio_clear_hw_interface_counters (u32 instance)
}
static void
-virtio_set_rx_interrupt (virtio_if_t *vif, virtio_vring_t *vring)
+virtio_set_rx_interrupt (virtio_if_t *vif, vnet_virtio_vring_t *vring)
{
if (vif->is_packed)
vring->driver_event->flags &= ~VRING_EVENT_F_DISABLE;
@@ -1130,7 +1148,7 @@ virtio_set_rx_interrupt (virtio_if_t *vif, virtio_vring_t *vring)
}
static void
-virtio_set_rx_polling (virtio_if_t *vif, virtio_vring_t *vring)
+virtio_set_rx_polling (virtio_if_t *vif, vnet_virtio_vring_t *vring)
{
if (vif->is_packed)
vring->driver_event->flags |= VRING_EVENT_F_DISABLE;
@@ -1142,11 +1160,10 @@ static clib_error_t *
virtio_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid,
vnet_hw_if_rx_mode mode)
{
- vlib_main_t *vm = vnm->vlib_main;
virtio_main_t *mm = &virtio_main;
vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
virtio_if_t *vif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
- virtio_vring_t *rx_vring = vec_elt_at_index (vif->rxq_vrings, qid);
+ vnet_virtio_vring_t *rx_vring = vec_elt_at_index (vif->rxq_vrings, qid);
if (vif->type == VIRTIO_IF_TYPE_PCI && !(vif->support_int_mode))
{
@@ -1155,30 +1172,9 @@ virtio_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid,
}
if (mode == VNET_HW_IF_RX_MODE_POLLING)
- {
- if (vif->packet_coalesce || vif->packet_buffering)
- {
- if (mm->interrupt_queues_count > 0)
- mm->interrupt_queues_count--;
- if (mm->interrupt_queues_count == 0)
- vlib_process_signal_event (vm,
- virtio_send_interrupt_node.index,
- VIRTIO_EVENT_STOP_TIMER, 0);
- }
virtio_set_rx_polling (vif, rx_vring);
- }
else
- {
- if (vif->packet_coalesce || vif->packet_buffering)
- {
- mm->interrupt_queues_count++;
- if (mm->interrupt_queues_count == 1)
- vlib_process_signal_event (vm,
- virtio_send_interrupt_node.index,
- VIRTIO_EVENT_START_TIMER, 0);
- }
virtio_set_rx_interrupt (vif, rx_vring);
- }
rx_vring->mode = mode;
@@ -1206,16 +1202,6 @@ virtio_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
return 0;
}
-static clib_error_t *
-virtio_subif_add_del_function (vnet_main_t * vnm,
- u32 hw_if_index,
- struct vnet_sw_interface_t *st, int is_add)
-{
- /* Nothing for now */
- return 0;
-}
-
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (virtio_device_class) = {
.name = "virtio",
.format_device_name = format_virtio_device_name,
@@ -1226,11 +1212,9 @@ VNET_DEVICE_CLASS (virtio_device_class) = {
.rx_redirect_to_node = virtio_set_interface_next_node,
.clear_counters = virtio_clear_hw_interface_counters,
.admin_up_down_function = virtio_interface_admin_up_down,
- .subif_add_del_function = virtio_subif_add_del_function,
.rx_mode_change_function = virtio_interface_rx_mode_change,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/devices/virtio/node.c b/src/vnet/devices/virtio/node.c
index c36c0807de0..027e1ed4e74 100644
--- a/src/vnet/devices/virtio/node.c
+++ b/src/vnet/devices/virtio/node.c
@@ -19,7 +19,11 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <net/if.h>
+#ifdef __linux__
#include <linux/if_tun.h>
+#elif __FreeBSD__
+#include <net/if_tun.h>
+#endif /* __linux */
#include <sys/ioctl.h>
#include <sys/eventfd.h>
@@ -27,11 +31,11 @@
#include <vlib/unix/unix.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/feature/feature.h>
-#include <vnet/gso/gro_func.h>
#include <vnet/interface/rx_queue_funcs.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
#include <vnet/udp/udp_packet.h>
+#include <vnet/tcp/tcp_packet.h>
#include <vnet/devices/virtio/virtio.h>
#include <vnet/devices/virtio/virtio_inline.h>
@@ -47,7 +51,7 @@ typedef struct
u32 hw_if_index;
u16 ring;
u16 len;
- virtio_net_hdr_v1_t hdr;
+ vnet_virtio_net_hdr_v1_t hdr;
} virtio_input_trace_t;
static u8 *
@@ -69,8 +73,8 @@ format_virtio_input_trace (u8 * s, va_list * args)
}
static_always_inline void
-virtio_needs_csum (vlib_buffer_t * b0, virtio_net_hdr_v1_t * hdr,
- u8 * l4_proto, u8 * l4_hdr_sz, virtio_if_type_t type)
+virtio_needs_csum (vlib_buffer_t *b0, vnet_virtio_net_hdr_v1_t *hdr,
+ u8 *l4_proto, u8 *l4_hdr_sz, virtio_if_type_t type)
{
if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
{
@@ -91,8 +95,7 @@ virtio_needs_csum (vlib_buffer_t * b0, virtio_net_hdr_v1_t * hdr,
}
else
{
- ethernet_header_t *eh =
- (ethernet_header_t *) vlib_buffer_get_current (b0);
+ ethernet_header_t *eh = (ethernet_header_t *) b0->data;
ethertype = clib_net_to_host_u16 (eh->type);
l2hdr_sz = sizeof (ethernet_header_t);
@@ -117,8 +120,7 @@ virtio_needs_csum (vlib_buffer_t * b0, virtio_net_hdr_v1_t * hdr,
if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP4))
{
- ip4_header_t *ip4 =
- (ip4_header_t *) (vlib_buffer_get_current (b0) + l2hdr_sz);
+ ip4_header_t *ip4 = (ip4_header_t *) (b0->data + l2hdr_sz);
vnet_buffer (b0)->l4_hdr_offset = l2hdr_sz + ip4_header_bytes (ip4);
*l4_proto = ip4->protocol;
oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM;
@@ -129,8 +131,7 @@ virtio_needs_csum (vlib_buffer_t * b0, virtio_net_hdr_v1_t * hdr,
}
else if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP6))
{
- ip6_header_t *ip6 =
- (ip6_header_t *) (vlib_buffer_get_current (b0) + l2hdr_sz);
+ ip6_header_t *ip6 = (ip6_header_t *) (b0->data + l2hdr_sz);
vnet_buffer (b0)->l4_hdr_offset = l2hdr_sz + sizeof (ip6_header_t);
/* FIXME IPv6 EH traversal */
*l4_proto = ip6->protocol;
@@ -142,18 +143,14 @@ virtio_needs_csum (vlib_buffer_t * b0, virtio_net_hdr_v1_t * hdr,
if (*l4_proto == IP_PROTOCOL_TCP)
{
oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
- tcp_header_t *tcp = (tcp_header_t *) (vlib_buffer_get_current (b0) +
- vnet_buffer
- (b0)->l4_hdr_offset);
+ tcp_header_t *tcp =
+ (tcp_header_t *) (b0->data + vnet_buffer (b0)->l4_hdr_offset);
*l4_hdr_sz = tcp_header_bytes (tcp);
}
else if (*l4_proto == IP_PROTOCOL_UDP)
{
oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
- udp_header_t *udp = (udp_header_t *) (vlib_buffer_get_current (b0) +
- vnet_buffer
- (b0)->l4_hdr_offset);
- *l4_hdr_sz = sizeof (*udp);
+ *l4_hdr_sz = sizeof (udp_header_t);
}
if (oflags)
vnet_buffer_offload_flags_set (b0, oflags);
@@ -161,7 +158,7 @@ virtio_needs_csum (vlib_buffer_t * b0, virtio_net_hdr_v1_t * hdr,
}
static_always_inline void
-fill_gso_buffer_flags (vlib_buffer_t * b0, virtio_net_hdr_v1_t * hdr,
+fill_gso_buffer_flags (vlib_buffer_t *b0, vnet_virtio_net_hdr_v1_t *hdr,
u8 l4_proto, u8 l4_hdr_sz)
{
if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV4)
@@ -181,7 +178,7 @@ fill_gso_buffer_flags (vlib_buffer_t * b0, virtio_net_hdr_v1_t * hdr,
}
static_always_inline u16
-virtio_n_left_to_process (virtio_vring_t * vring, const int packed)
+virtio_n_left_to_process (vnet_virtio_vring_t *vring, const int packed)
{
if (packed)
return vring->desc_in_use;
@@ -190,7 +187,7 @@ virtio_n_left_to_process (virtio_vring_t * vring, const int packed)
}
static_always_inline u16
-virtio_get_slot_id (virtio_vring_t * vring, const int packed, u16 last,
+virtio_get_slot_id (vnet_virtio_vring_t *vring, const int packed, u16 last,
u16 mask)
{
if (packed)
@@ -200,7 +197,7 @@ virtio_get_slot_id (virtio_vring_t * vring, const int packed, u16 last,
}
static_always_inline u16
-virtio_get_len (virtio_vring_t * vring, const int packed, const int hdr_sz,
+virtio_get_len (vnet_virtio_vring_t *vring, const int packed, const int hdr_sz,
u16 last, u16 mask)
{
if (packed)
@@ -209,22 +206,60 @@ virtio_get_len (virtio_vring_t * vring, const int packed, const int hdr_sz,
return vring->used->ring[last & mask].len - hdr_sz;
}
-#define increment_last(last, packed, vring) \
- do { \
- last++; \
- if (packed && last >= vring->size) \
- { \
- last = 0; \
- vring->used_wrap_counter ^= 1; \
- } \
- } while (0)
+#define virtio_packed_check_n_left(vring, last) \
+ do \
+ { \
+ vnet_virtio_vring_packed_desc_t *d = &vring->packed_desc[last]; \
+ u16 flags = d->flags; \
+ if ((flags & VRING_DESC_F_AVAIL) != (vring->used_wrap_counter << 7) || \
+ (flags & VRING_DESC_F_USED) != (vring->used_wrap_counter << 15)) \
+ { \
+ n_left = 0; \
+ } \
+ } \
+ while (0)
+
+#define increment_last(last, packed, vring) \
+ do \
+ { \
+ last++; \
+ if (packed && last >= vring->queue_size) \
+ { \
+ last = 0; \
+ vring->used_wrap_counter ^= 1; \
+ } \
+ } \
+ while (0)
+
+static_always_inline void
+virtio_device_input_ethernet (vlib_main_t *vm, vlib_node_runtime_t *node,
+ const u32 next_index, const u32 sw_if_index,
+ const u32 hw_if_index)
+{
+ vlib_next_frame_t *nf;
+ vlib_frame_t *f;
+ ethernet_input_frame_t *ef;
+
+ if (PREDICT_FALSE (VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT != next_index))
+ return;
+
+ nf = vlib_node_runtime_get_next_frame (
+ vm, node, VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT);
+ f = vlib_get_frame (vm, nf->frame);
+ f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
+
+ ef = vlib_frame_scalar_args (f);
+ ef->sw_if_index = sw_if_index;
+ ef->hw_if_index = hw_if_index;
+ vlib_frame_no_append (f);
+}
static_always_inline uword
-virtio_device_input_gso_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame, virtio_if_t * vif,
- virtio_vring_t * vring, virtio_if_type_t type,
- int gso_enabled, int checksum_offload_enabled,
- int packed)
+virtio_device_input_gso_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, virtio_if_t *vif,
+ vnet_virtio_vring_t *vring,
+ virtio_if_type_t type, int gso_enabled,
+ int checksum_offload_enabled, int packed)
{
vnet_main_t *vnm = vnet_get_main ();
u32 thread_index = vm->thread_index;
@@ -234,14 +269,29 @@ virtio_device_input_gso_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
u32 *to_next = 0;
u32 n_rx_packets = 0;
u32 n_rx_bytes = 0;
- u16 mask = vring->size - 1;
+ u16 mask = vring->queue_size - 1;
u16 last = vring->last_used_idx;
u16 n_left = virtio_n_left_to_process (vring, packed);
- vlib_buffer_t bt;
+ vlib_buffer_t bt = {};
+
+ if (packed)
+ {
+ virtio_packed_check_n_left (vring, last);
+ }
if (n_left == 0)
return 0;
+ if (PREDICT_FALSE (n_left == vring->queue_size))
+ {
+ /*
+ * Informational error logging when VPP is not pulling packets fast
+ * enough.
+ */
+ vlib_error_count (vm, node->node_index, VIRTIO_INPUT_ERROR_FULL_RX_QUEUE,
+ 1);
+ }
+
if (type == VIRTIO_IF_TYPE_TUN)
{
next_index = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
@@ -253,7 +303,7 @@ virtio_device_input_gso_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
next_index = vif->per_interface_next_index;
/* only for l2, redirect if feature path enabled */
- vnet_feature_start_device_input_x1 (vif->sw_if_index, &next_index, &bt);
+ vnet_feature_start_device_input (vif->sw_if_index, &next_index, &bt);
}
while (n_left)
@@ -261,13 +311,13 @@ virtio_device_input_gso_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
u32 n_left_to_next;
u32 next0 = next_index;
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
while (n_left && n_left_to_next)
{
if (packed)
{
- vring_packed_desc_t *d = &vring->packed_desc[last];
+ vnet_virtio_vring_packed_desc_t *d = &vring->packed_desc[last];
u16 flags = d->flags;
if ((flags & VRING_DESC_F_AVAIL) !=
(vring->used_wrap_counter << 7)
@@ -280,13 +330,13 @@ virtio_device_input_gso_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
u8 l4_proto = 0, l4_hdr_sz = 0;
u16 num_buffers = 1;
- virtio_net_hdr_v1_t *hdr;
+ vnet_virtio_net_hdr_v1_t *hdr;
u16 slot = virtio_get_slot_id (vring, packed, last, mask);
u16 len = virtio_get_len (vring, packed, hdr_sz, last, mask);
u32 bi0 = vring->buffers[slot];
vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
hdr = vlib_buffer_get_current (b0);
- if (hdr_sz == sizeof (virtio_net_hdr_v1_t))
+ if (hdr_sz == sizeof (vnet_virtio_net_hdr_v1_t))
num_buffers = hdr->num_buffers;
b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
@@ -371,7 +421,7 @@ virtio_device_input_gso_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
tr->next_index = next0;
tr->hw_if_index = vif->hw_if_index;
tr->len = len;
- clib_memcpy_fast (&tr->hdr, hdr, hdr_sz);
+ clib_memcpy_fast (&tr->hdr, hdr, (hdr_sz == 12) ? 12 : 10);
}
/* enqueue buffer */
@@ -391,10 +441,13 @@ virtio_device_input_gso_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
n_rx_packets++;
n_rx_bytes += len;
}
+ virtio_device_input_ethernet (vm, node, next_index, vif->sw_if_index,
+ vif->hw_if_index);
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
vring->last_used_idx = last;
+ vring->total_packets += n_rx_packets;
vlib_increment_combined_counter (vnm->interface_main.combined_sw_if_counters
+ VNET_INTERFACE_COUNTER_RX, thread_index,
vif->sw_if_index, n_rx_packets,
@@ -408,23 +461,10 @@ virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame, virtio_if_t * vif, u16 qid,
virtio_if_type_t type)
{
- virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, qid);
+ vnet_virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, qid);
const int hdr_sz = vif->virtio_net_hdr_sz;
- u16 txq_id = vm->thread_index % vif->num_txqs;
- virtio_vring_t *txq_vring = vec_elt_at_index (vif->txq_vrings, txq_id);
uword rv;
- if (clib_spinlock_trylock_if_init (&txq_vring->lockp))
- {
- if (vif->packet_coalesce)
- vnet_gro_flow_table_schedule_node_on_dispatcher
- (vm, txq_vring->flow_table);
- else if (vif->packet_buffering)
- virtio_vring_buffering_schedule_node_on_dispatcher
- (vm, txq_vring->buffering);
- clib_spinlock_unlock_if_init (&txq_vring->lockp);
- }
-
if (vif->is_packed)
{
if (vif->gso_enabled)
@@ -494,7 +534,6 @@ VLIB_NODE_FN (virtio_input_node) (vlib_main_t * vm,
return n_rx;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (virtio_input_node) = {
.name = "virtio-input",
.sibling_of = "device-input",
@@ -505,7 +544,6 @@ VLIB_REGISTER_NODE (virtio_input_node) = {
.n_errors = VIRTIO_INPUT_N_ERROR,
.error_strings = virtio_input_error_strings,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/devices/virtio/pci.c b/src/vnet/devices/virtio/pci.c
index bebba7b45f8..6234f64fcfb 100644
--- a/src/vnet/devices/virtio/pci.c
+++ b/src/vnet/devices/virtio/pci.c
@@ -116,7 +116,7 @@ virtio_pci_irq_queue_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h,
line--;
u16 qid = line;
- virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, qid);
+ vnet_virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, qid);
vnet_hw_if_rx_queue_set_int_pending (vnm, vring->queue_index);
}
@@ -131,13 +131,11 @@ virtio_pci_irq_config_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h,
if (virtio_pci_is_link_up (vm, vif) & VIRTIO_NET_S_LINK_UP)
{
- vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP;
vnet_hw_interface_set_flags (vnm, vif->hw_if_index,
VNET_HW_INTERFACE_FLAG_LINK_UP);
}
else
{
- vif->flags &= ~VIRTIO_IF_FLAG_ADMIN_UP;
vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0);
}
}
@@ -200,18 +198,18 @@ static int
virtio_pci_send_ctrl_msg_packed (vlib_main_t * vm, virtio_if_t * vif,
virtio_ctrl_msg_t * data, u32 len)
{
- virtio_vring_t *vring = vif->cxq_vring;
+ vnet_virtio_vring_t *vring = vif->cxq_vring;
virtio_net_ctrl_ack_t status = VIRTIO_NET_ERR;
virtio_ctrl_msg_t result;
u32 buffer_index;
vlib_buffer_t *b;
u16 used, next;
- u16 sz = vring->size;
+ u16 sz = vring->queue_size;
u16 flags = 0, first_desc_flags = 0;
used = vring->desc_in_use;
next = vring->desc_next;
- vring_packed_desc_t *d = &vring->packed_desc[next];
+ vnet_virtio_vring_packed_desc_t *d = &vring->packed_desc[next];
if (vlib_buffer_alloc (vm, &buffer_index, 1))
b = vlib_get_buffer (vm, buffer_index);
@@ -319,9 +317,9 @@ virtio_pci_send_ctrl_msg_packed (vlib_main_t * vm, virtio_if_t * vif,
|| (flags & VRING_DESC_F_USED) != (vring->used_wrap_counter << 15));
last += 3;
- if (last >= vring->size)
+ if (last >= vring->queue_size)
{
- last = last - vring->size;
+ last = last - vring->queue_size;
vring->used_wrap_counter ^= 1;
}
vring->desc_in_use -= 3;
@@ -340,19 +338,19 @@ static int
virtio_pci_send_ctrl_msg_split (vlib_main_t * vm, virtio_if_t * vif,
virtio_ctrl_msg_t * data, u32 len)
{
- virtio_vring_t *vring = vif->cxq_vring;
+ vnet_virtio_vring_t *vring = vif->cxq_vring;
virtio_net_ctrl_ack_t status = VIRTIO_NET_ERR;
virtio_ctrl_msg_t result;
u32 buffer_index;
vlib_buffer_t *b;
u16 used, next, avail;
- u16 sz = vring->size;
+ u16 sz = vring->queue_size;
u16 mask = sz - 1;
used = vring->desc_in_use;
next = vring->desc_next;
avail = vring->avail->idx;
- vring_desc_t *d = &vring->desc[next];
+ vnet_virtio_vring_desc_t *d = &vring->desc[next];
if (vlib_buffer_alloc (vm, &buffer_index, 1))
b = vlib_get_buffer (vm, buffer_index);
@@ -405,7 +403,7 @@ virtio_pci_send_ctrl_msg_split (vlib_main_t * vm, virtio_if_t * vif,
while (n_left)
{
- vring_used_elem_t *e = &vring->used->ring[last & mask];
+ vnet_virtio_vring_used_elem_t *e = &vring->used->ring[last & mask];
u16 slot = e->id;
d = &vring->desc[slot];
@@ -508,7 +506,7 @@ virtio_pci_offloads (vlib_main_t * vm, virtio_if_t * vif, int gso_enabled,
int csum_offload_enabled)
{
vnet_main_t *vnm = vnet_get_main ();
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vif->hw_if_index);
+ vnet_hw_if_caps_change_t cc = {};
if ((vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ)) &&
(vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)))
@@ -524,10 +522,10 @@ virtio_pci_offloads (vlib_main_t * vm, virtio_if_t * vif, int gso_enabled,
else
{
vif->gso_enabled = 1;
- vif->csum_offload_enabled = 0;
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM;
+ vif->csum_offload_enabled = 1;
+ cc.val = cc.mask = VNET_HW_IF_CAP_TCP_GSO |
+ VNET_HW_IF_CAP_TX_TCP_CKSUM |
+ VNET_HW_IF_CAP_TX_UDP_CKSUM;
}
}
else if (csum_offload_enabled
@@ -541,9 +539,10 @@ virtio_pci_offloads (vlib_main_t * vm, virtio_if_t * vif, int gso_enabled,
{
vif->csum_offload_enabled = 1;
vif->gso_enabled = 0;
- hw->caps &= ~VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO;
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM;
+ cc.val =
+ VNET_HW_IF_CAP_TX_TCP_CKSUM | VNET_HW_IF_CAP_TX_UDP_CKSUM;
+ cc.mask = VNET_HW_IF_CAP_TCP_GSO | VNET_HW_IF_CAP_TX_TCP_CKSUM |
+ VNET_HW_IF_CAP_TX_UDP_CKSUM;
}
}
else
@@ -556,12 +555,15 @@ virtio_pci_offloads (vlib_main_t * vm, virtio_if_t * vif, int gso_enabled,
{
vif->csum_offload_enabled = 0;
vif->gso_enabled = 0;
- hw->caps &= ~(VNET_HW_INTERFACE_CAP_SUPPORTS_L4_TX_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO);
+ cc.val = 0;
+ cc.mask = VNET_HW_IF_CAP_L4_TX_CKSUM | VNET_HW_IF_CAP_TCP_GSO;
}
}
}
+ if (cc.mask)
+ vnet_hw_if_change_caps (vnm, vif->hw_if_index, &cc);
+
return 0;
}
@@ -598,7 +600,7 @@ virtio_pci_control_vring_packed_init (vlib_main_t * vm, virtio_if_t * vif,
{
clib_error_t *error = 0;
u16 queue_size = 0;
- virtio_vring_t *vring;
+ vnet_virtio_vring_t *vring;
u32 i = 0;
void *ptr = NULL;
@@ -613,34 +615,36 @@ virtio_pci_control_vring_packed_init (vlib_main_t * vm, virtio_if_t * vif,
vec_validate_aligned (vif->cxq_vring, 0, CLIB_CACHE_LINE_BYTES);
vring = vec_elt_at_index (vif->cxq_vring, 0);
- i =
- (((queue_size * sizeof (vring_packed_desc_t)) +
- sizeof (vring_desc_event_t) + VIRTIO_PCI_VRING_ALIGN -
- 1) & ~(VIRTIO_PCI_VRING_ALIGN - 1)) + sizeof (vring_desc_event_t);
+ i = (((queue_size * sizeof (vnet_virtio_vring_packed_desc_t)) +
+ sizeof (vnet_virtio_vring_desc_event_t) + VNET_VIRTIO_PCI_VRING_ALIGN -
+ 1) &
+ ~(VNET_VIRTIO_PCI_VRING_ALIGN - 1)) +
+ sizeof (vnet_virtio_vring_desc_event_t);
- ptr =
- vlib_physmem_alloc_aligned_on_numa (vm, i, VIRTIO_PCI_VRING_ALIGN,
- vif->numa_node);
+ ptr = vlib_physmem_alloc_aligned_on_numa (vm, i, VNET_VIRTIO_PCI_VRING_ALIGN,
+ vif->numa_node);
if (!ptr)
return vlib_physmem_last_error (vm);
clib_memset (ptr, 0, i);
vring->packed_desc = ptr;
- vring->driver_event = ptr + (queue_size * sizeof (vring_packed_desc_t));
+ vring->driver_event =
+ ptr + (queue_size * sizeof (vnet_virtio_vring_packed_desc_t));
vring->driver_event->off_wrap = 0;
vring->driver_event->flags = VRING_EVENT_F_DISABLE;
vring->device_event =
- ptr +
- (((queue_size * sizeof (vring_packed_desc_t)) +
- sizeof (vring_desc_event_t) + VIRTIO_PCI_VRING_ALIGN -
- 1) & ~(VIRTIO_PCI_VRING_ALIGN - 1));
+ ptr + (((queue_size * sizeof (vnet_virtio_vring_packed_desc_t)) +
+ sizeof (vnet_virtio_vring_desc_event_t) +
+ VNET_VIRTIO_PCI_VRING_ALIGN - 1) &
+ ~(VNET_VIRTIO_PCI_VRING_ALIGN - 1));
vring->device_event->off_wrap = 0;
vring->device_event->flags = 0;
+ vring->total_packets = 0;
vring->queue_id = queue_num;
- vring->size = queue_size;
+ vring->queue_size = queue_size;
vring->avail_wrap_counter = 1;
vring->used_wrap_counter = 1;
@@ -648,7 +652,7 @@ virtio_pci_control_vring_packed_init (vlib_main_t * vm, virtio_if_t * vif,
virtio_log_debug (vif, "control-queue: number %u, size %u", queue_num,
queue_size);
- vif->virtio_pci_func->setup_queue (vm, vif, queue_num, (void *) vring);
+ vif->virtio_pci_func->setup_queue (vm, vif, queue_num, vring);
vring->queue_notify_offset =
vif->notify_off_multiplier *
vif->virtio_pci_func->get_queue_notify_off (vm, vif, queue_num);
@@ -663,8 +667,7 @@ virtio_pci_control_vring_split_init (vlib_main_t * vm, virtio_if_t * vif,
{
clib_error_t *error = 0;
u16 queue_size = 0;
- virtio_vring_t *vring;
- vring_t vr;
+ vnet_virtio_vring_t *vring;
u32 i = 0;
void *ptr = NULL;
@@ -683,27 +686,21 @@ virtio_pci_control_vring_split_init (vlib_main_t * vm, virtio_if_t * vif,
vec_validate_aligned (vif->cxq_vring, 0, CLIB_CACHE_LINE_BYTES);
vring = vec_elt_at_index (vif->cxq_vring, 0);
- i = vring_size (queue_size, VIRTIO_PCI_VRING_ALIGN);
- i = round_pow2 (i, VIRTIO_PCI_VRING_ALIGN);
- ptr =
- vlib_physmem_alloc_aligned_on_numa (vm, i, VIRTIO_PCI_VRING_ALIGN,
- vif->numa_node);
+ i = vnet_virtio_vring_size (queue_size, VNET_VIRTIO_PCI_VRING_ALIGN);
+ i = round_pow2 (i, VNET_VIRTIO_PCI_VRING_ALIGN);
+ ptr = vlib_physmem_alloc_aligned_on_numa (vm, i, VNET_VIRTIO_PCI_VRING_ALIGN,
+ vif->numa_node);
if (!ptr)
return vlib_physmem_last_error (vm);
clib_memset (ptr, 0, i);
- vring_init (&vr, queue_size, ptr, VIRTIO_PCI_VRING_ALIGN);
- vring->desc = vr.desc;
- vring->avail = vr.avail;
- vring->used = vr.used;
+ vnet_virtio_vring_init (vring, queue_size, ptr, VNET_VIRTIO_PCI_VRING_ALIGN);
vring->queue_id = queue_num;
- vring->avail->flags = VIRTIO_RING_FLAG_MASK_INT;
+ vring->total_packets = 0;
ASSERT (vring->buffers == 0);
-
- vring->size = queue_size;
virtio_log_debug (vif, "control-queue: number %u, size %u", queue_num,
queue_size);
- vif->virtio_pci_func->setup_queue (vm, vif, queue_num, ptr);
+ vif->virtio_pci_func->setup_queue (vm, vif, queue_num, vring);
vring->queue_notify_offset =
vif->notify_off_multiplier *
vif->virtio_pci_func->get_queue_notify_off (vm, vif, queue_num);
@@ -724,14 +721,12 @@ virtio_pci_control_vring_init (vlib_main_t * vm, virtio_if_t * vif,
}
clib_error_t *
-virtio_pci_vring_split_init (vlib_main_t * vm, virtio_if_t * vif,
- u16 queue_num)
+virtio_pci_vring_split_init (vlib_main_t *vm, virtio_if_t *vif, u16 queue_num,
+ u16 txq_size)
{
- vlib_thread_main_t *vtm = vlib_get_thread_main ();
clib_error_t *error = 0;
u16 queue_size = 0;
- virtio_vring_t *vring;
- vring_t vr;
+ vnet_virtio_vring_t *vring;
u32 i = 0;
void *ptr = NULL;
@@ -750,11 +745,20 @@ virtio_pci_vring_split_init (vlib_main_t * vm, virtio_if_t * vif,
if (queue_num % 2)
{
+ if (txq_size)
+ {
+ virtio_log_debug (vif, "tx-queue: number %u, default-size %u",
+ queue_num, queue_size);
+ vif->virtio_pci_func->set_queue_size (vm, vif, queue_num, txq_size);
+ queue_size =
+ vif->virtio_pci_func->get_queue_size (vm, vif, queue_num);
+ virtio_log_debug (vif, "tx-queue: number %u, new size %u", queue_num,
+ queue_size);
+ }
vec_validate_aligned (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num),
CLIB_CACHE_LINE_BYTES);
vring = vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num));
- if (vif->max_queue_pairs < vtm->n_vlib_mains)
- clib_spinlock_init (&vring->lockp);
+ clib_spinlock_init (&vring->lockp);
}
else
{
@@ -762,21 +766,18 @@ virtio_pci_vring_split_init (vlib_main_t * vm, virtio_if_t * vif,
CLIB_CACHE_LINE_BYTES);
vring = vec_elt_at_index (vif->rxq_vrings, RX_QUEUE_ACCESS (queue_num));
}
- i = vring_size (queue_size, VIRTIO_PCI_VRING_ALIGN);
- i = round_pow2 (i, VIRTIO_PCI_VRING_ALIGN);
- ptr =
- vlib_physmem_alloc_aligned_on_numa (vm, i, VIRTIO_PCI_VRING_ALIGN,
- vif->numa_node);
+ i = vnet_virtio_vring_size (queue_size, VNET_VIRTIO_PCI_VRING_ALIGN);
+ i = round_pow2 (i, VNET_VIRTIO_PCI_VRING_ALIGN);
+ ptr = vlib_physmem_alloc_aligned_on_numa (vm, i, VNET_VIRTIO_PCI_VRING_ALIGN,
+ vif->numa_node);
if (!ptr)
return vlib_physmem_last_error (vm);
clib_memset (ptr, 0, i);
- vring_init (&vr, queue_size, ptr, VIRTIO_PCI_VRING_ALIGN);
- vring->desc = vr.desc;
- vring->avail = vr.avail;
- vring->used = vr.used;
+ vnet_virtio_vring_init (vring, queue_size, ptr, VNET_VIRTIO_PCI_VRING_ALIGN);
vring->queue_id = queue_num;
vring->avail->flags = VIRTIO_RING_FLAG_MASK_INT;
vring->flow_table = 0;
+ vring->total_packets = 0;
ASSERT (vring->buffers == 0);
vec_validate_aligned (vring->buffers, queue_size, CLIB_CACHE_LINE_BYTES);
@@ -791,8 +792,8 @@ virtio_pci_vring_split_init (vlib_main_t * vm, virtio_if_t * vif,
virtio_log_debug (vif, "rx-queue: number %u, size %u", queue_num,
queue_size);
}
- vring->size = queue_size;
- if (vif->virtio_pci_func->setup_queue (vm, vif, queue_num, ptr))
+ vring->queue_size = queue_size;
+ if (vif->virtio_pci_func->setup_queue (vm, vif, queue_num, vring))
return clib_error_return (0, "error in queue address setup");
vring->queue_notify_offset =
@@ -807,10 +808,9 @@ clib_error_t *
virtio_pci_vring_packed_init (vlib_main_t * vm, virtio_if_t * vif,
u16 queue_num)
{
- vlib_thread_main_t *vtm = vlib_get_thread_main ();
clib_error_t *error = 0;
u16 queue_size = 0;
- virtio_vring_t *vring;
+ vnet_virtio_vring_t *vring;
u32 i = 0;
void *ptr = NULL;
@@ -827,8 +827,7 @@ virtio_pci_vring_packed_init (vlib_main_t * vm, virtio_if_t * vif,
vec_validate_aligned (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num),
CLIB_CACHE_LINE_BYTES);
vring = vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num));
- if (vif->max_queue_pairs < vtm->n_vlib_mains)
- clib_spinlock_init (&vring->lockp);
+ clib_spinlock_init (&vring->lockp);
}
else
{
@@ -837,29 +836,30 @@ virtio_pci_vring_packed_init (vlib_main_t * vm, virtio_if_t * vif,
vring = vec_elt_at_index (vif->rxq_vrings, RX_QUEUE_ACCESS (queue_num));
}
- i =
- (((queue_size * sizeof (vring_packed_desc_t)) +
- sizeof (vring_desc_event_t) + VIRTIO_PCI_VRING_ALIGN -
- 1) & ~(VIRTIO_PCI_VRING_ALIGN - 1)) + sizeof (vring_desc_event_t);
+ i = (((queue_size * sizeof (vnet_virtio_vring_packed_desc_t)) +
+ sizeof (vnet_virtio_vring_desc_event_t) + VNET_VIRTIO_PCI_VRING_ALIGN -
+ 1) &
+ ~(VNET_VIRTIO_PCI_VRING_ALIGN - 1)) +
+ sizeof (vnet_virtio_vring_desc_event_t);
- ptr =
- vlib_physmem_alloc_aligned_on_numa (vm, i, VIRTIO_PCI_VRING_ALIGN,
- vif->numa_node);
+ ptr = vlib_physmem_alloc_aligned_on_numa (vm, i, VNET_VIRTIO_PCI_VRING_ALIGN,
+ vif->numa_node);
if (!ptr)
return vlib_physmem_last_error (vm);
clib_memset (ptr, 0, i);
vring->packed_desc = ptr;
- vring->driver_event = ptr + (queue_size * sizeof (vring_packed_desc_t));
+ vring->driver_event =
+ ptr + (queue_size * sizeof (vnet_virtio_vring_packed_desc_t));
vring->driver_event->off_wrap = 0;
vring->driver_event->flags = VRING_EVENT_F_DISABLE;
vring->device_event =
- ptr +
- (((queue_size * sizeof (vring_packed_desc_t)) +
- sizeof (vring_desc_event_t) + VIRTIO_PCI_VRING_ALIGN -
- 1) & ~(VIRTIO_PCI_VRING_ALIGN - 1));
+ ptr + (((queue_size * sizeof (vnet_virtio_vring_packed_desc_t)) +
+ sizeof (vnet_virtio_vring_desc_event_t) +
+ VNET_VIRTIO_PCI_VRING_ALIGN - 1) &
+ ~(VNET_VIRTIO_PCI_VRING_ALIGN - 1));
vring->device_event->off_wrap = 0;
vring->device_event->flags = 0;
@@ -867,6 +867,7 @@ virtio_pci_vring_packed_init (vlib_main_t * vm, virtio_if_t * vif,
vring->avail_wrap_counter = 1;
vring->used_wrap_counter = 1;
+ vring->total_packets = 0;
ASSERT (vring->buffers == 0);
vec_validate_aligned (vring->buffers, queue_size, CLIB_CACHE_LINE_BYTES);
@@ -881,8 +882,8 @@ virtio_pci_vring_packed_init (vlib_main_t * vm, virtio_if_t * vif,
virtio_log_debug (vif, "rx-queue: number %u, size %u", queue_num,
queue_size);
}
- vring->size = queue_size;
- if (vif->virtio_pci_func->setup_queue (vm, vif, queue_num, (void *) vring))
+ vring->queue_size = queue_size;
+ if (vif->virtio_pci_func->setup_queue (vm, vif, queue_num, vring))
return clib_error_return (0, "error in queue address setup");
vring->queue_notify_offset =
@@ -895,12 +896,13 @@ virtio_pci_vring_packed_init (vlib_main_t * vm, virtio_if_t * vif,
}
clib_error_t *
-virtio_pci_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 queue_num)
+virtio_pci_vring_init (vlib_main_t *vm, virtio_if_t *vif, u16 queue_num,
+ u16 txq_size)
{
if (vif->is_packed)
return virtio_pci_vring_packed_init (vm, vif, queue_num);
else
- return virtio_pci_vring_split_init (vm, vif, queue_num);
+ return virtio_pci_vring_split_init (vm, vif, queue_num, txq_size);
}
static void
@@ -1238,7 +1240,7 @@ virtio_pci_device_init (vlib_main_t * vm, virtio_if_t * vif,
for (int i = 0; i < vif->max_queue_pairs; i++)
{
- if ((error = virtio_pci_vring_init (vm, vif, RX_QUEUE (i))))
+ if ((error = virtio_pci_vring_init (vm, vif, RX_QUEUE (i), 0)))
{
args->rv = VNET_API_ERROR_INIT_FAILED;
virtio_log_error (vif, "%s (%u) %s", "error in rxq-queue",
@@ -1253,7 +1255,8 @@ virtio_pci_device_init (vlib_main_t * vm, virtio_if_t * vif,
vif->num_rxqs++;
}
- if ((error = virtio_pci_vring_init (vm, vif, TX_QUEUE (i))))
+ if ((error = virtio_pci_vring_init (vm, vif, TX_QUEUE (i),
+ args->tx_queue_size)))
{
args->rv = VNET_API_ERROR_INIT_FAILED;
virtio_log_error (vif, "%s (%u) %s", "error in txq-queue",
@@ -1337,7 +1340,6 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args)
clib_error_t *error = 0;
u32 interrupt_count = 0;
- /* *INDENT-OFF* */
pool_foreach (vif, vim->interfaces) {
if (vif->pci_addr.as_u32 == args->addr)
{
@@ -1350,7 +1352,24 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args)
return;
}
}
- /* *INDENT-ON* */
+
+ if (args->bind)
+ {
+ vlib_pci_addr_t pci = { .as_u32 = args->addr };
+ error = vlib_pci_bind_to_uio (vm, &pci, (char *) "auto",
+ VIRTIO_BIND_FORCE == args->bind);
+ if (error)
+ {
+ args->rv = VNET_API_ERROR_INVALID_INTERFACE;
+ args->error =
+ clib_error_return (error, "%U: %s", format_vlib_pci_addr, &pci,
+ "error encountered on binding pci device");
+ vlib_log (VLIB_LOG_LEVEL_ERR, vim->log_default, "%U: %s",
+ format_vlib_pci_addr, &pci,
+ "error encountered on binding pci devicee");
+ return;
+ }
+ }
pool_get (vim->interfaces, vif);
vif->dev_instance = vif - vim->interfaces;
@@ -1466,25 +1485,18 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args)
}
/* create interface */
- error = ethernet_register_interface (vnm, virtio_device_class.index,
- vif->dev_instance, vif->mac_addr,
- &vif->hw_if_index,
- virtio_pci_flag_change);
-
- if (error)
- {
- args->rv = VNET_API_ERROR_INVALID_REGISTRATION;
- virtio_log_error (vif,
- "error encountered on ethernet register interface");
- goto error;
- }
+ vnet_eth_interface_registration_t eir = {};
+ eir.dev_class_index = virtio_device_class.index;
+ eir.dev_instance = vif->dev_instance;
+ eir.address = vif->mac_addr;
+ eir.cb.flag_change = virtio_pci_flag_change;
+ vif->hw_if_index = vnet_eth_register_interface (vnm, &eir);
vnet_sw_interface_t *sw = vnet_get_hw_sw_interface (vnm, vif->hw_if_index);
vif->sw_if_index = sw->sw_if_index;
args->sw_if_index = sw->sw_if_index;
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vif->hw_if_index);
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
+ vnet_hw_if_set_caps (vnm, vif->hw_if_index, VNET_HW_IF_CAP_INT_MODE);
if (args->virtio_flags & VIRTIO_FLAG_BUFFERING)
{
@@ -1496,13 +1508,22 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args)
"error encountered during packet buffering init");
goto error;
}
+ /*
+ * packet buffering flag needs to be set 1 before calling the
+ * virtio_pre_input_node_enable but after the successful initialization
+ * of buffering queues above.
+ * Packet buffering flag set to 0 if there will be any error during
+ * buffering initialization.
+ */
+ vif->packet_buffering = 1;
+ virtio_pre_input_node_enable (vm, vif);
}
virtio_vring_set_rx_queues (vm, vif);
+ virtio_vring_set_tx_queues (vm, vif);
if (virtio_pci_is_link_up (vm, vif) & VIRTIO_NET_S_LINK_UP)
{
- vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP;
vnet_hw_interface_set_flags (vnm, vif->hw_if_index,
VNET_HW_INTERFACE_FLAG_LINK_UP);
}
@@ -1539,17 +1560,19 @@ virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * vif)
vlib_pci_intr_disable (vm, vif->pci_dev_handle);
- for (i = 0; i < vif->max_queue_pairs; i++)
+ if (vif->virtio_pci_func)
{
- vif->virtio_pci_func->del_queue (vm, vif, RX_QUEUE (i));
- vif->virtio_pci_func->del_queue (vm, vif, TX_QUEUE (i));
- }
+ for (i = 0; i < vif->max_queue_pairs; i++)
+ {
+ vif->virtio_pci_func->del_queue (vm, vif, RX_QUEUE (i));
+ vif->virtio_pci_func->del_queue (vm, vif, TX_QUEUE (i));
+ }
- if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ))
- vif->virtio_pci_func->del_queue (vm, vif, vif->max_queue_pairs * 2);
+ if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ))
+ vif->virtio_pci_func->del_queue (vm, vif, vif->max_queue_pairs * 2);
- if (vif->virtio_pci_func)
- vif->virtio_pci_func->device_reset (vm, vif);
+ vif->virtio_pci_func->device_reset (vm, vif);
+ }
if (vif->hw_if_index)
{
@@ -1561,7 +1584,7 @@ virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * vif)
vec_foreach_index (i, vif->rxq_vrings)
{
- virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, i);
+ vnet_virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, i);
if (vring->used)
{
virtio_free_buffers (vm, vring);
@@ -1570,9 +1593,12 @@ virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * vif)
vlib_physmem_free (vm, vring->desc);
}
+ if (vif->packet_buffering)
+ virtio_pre_input_node_disable (vm, vif);
+
vec_foreach_index (i, vif->txq_vrings)
{
- virtio_vring_t *vring = vec_elt_at_index (vif->txq_vrings, i);
+ vnet_virtio_vring_t *vring = vec_elt_at_index (vif->txq_vrings, i);
if (vring->used)
{
virtio_free_buffers (vm, vring);
diff --git a/src/vnet/devices/virtio/pci.h b/src/vnet/devices/virtio/pci.h
index 70aa9833c2d..5eb80f823be 100644
--- a/src/vnet/devices/virtio/pci.h
+++ b/src/vnet/devices/virtio/pci.h
@@ -87,7 +87,7 @@ typedef enum
#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12
-#define VIRTIO_PCI_VRING_ALIGN 4096
+#define VNET_VIRTIO_PCI_VRING_ALIGN 4096
typedef enum
{
@@ -154,13 +154,11 @@ typedef struct
* and an ack/status response in the last entry. Data for the
* command goes in between.
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
u8 class;
u8 cmd;
}) virtio_net_ctrl_hdr_t;
-/* *INDENT-ON* */
typedef u8 virtio_net_ctrl_ack_t;
@@ -192,8 +190,8 @@ typedef struct _virtio_pci_func
u16 (*get_queue_size) (vlib_main_t * vm, virtio_if_t * vif, u16 queue_id);
void (*set_queue_size) (vlib_main_t * vm, virtio_if_t * vif, u16 queue_id,
u16 queue_size);
- u8 (*setup_queue) (vlib_main_t * vm, virtio_if_t * vif, u16 queue_id,
- void *p);
+ u8 (*setup_queue) (vlib_main_t *vm, virtio_if_t *vif, u16 queue_id,
+ vnet_virtio_vring_t *vring);
void (*del_queue) (vlib_main_t * vm, virtio_if_t * vif, u16 queue_id);
u16 (*get_queue_notify_off) (vlib_main_t * vm, virtio_if_t * vif,
u16 queue_id);
@@ -227,6 +225,13 @@ typedef enum
#undef _
} virtio_flag_t;
+typedef enum
+{
+ VIRTIO_BIND_NONE = 0,
+ VIRTIO_BIND_DEFAULT = 1,
+ VIRTIO_BIND_FORCE = 2,
+} __clib_packed virtio_bind_t;
+
typedef struct
{
u32 addr;
@@ -238,6 +243,8 @@ typedef struct
u64 features;
u8 gso_enabled;
u8 checksum_offload_enabled;
+ u32 tx_queue_size;
+ virtio_bind_t bind;
u32 buffering_size;
u32 virtio_flags;
clib_error_t *error;
diff --git a/src/vnet/devices/virtio/vhost_user.api b/src/vnet/devices/virtio/vhost_user.api
deleted file mode 100644
index b026ba768a9..00000000000
--- a/src/vnet/devices/virtio/vhost_user.api
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Copyright (c) 2015-2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-option version = "4.1.1";
-
-import "vnet/interface_types.api";
-import "vnet/ethernet/ethernet_types.api";
-import "vnet/devices/virtio/virtio_types.api";
-
-/** \brief vhost-user interface create request
- @param client_index - opaque cookie to identify the sender
- @param is_server - our side is socket server
- @param sock_filename - unix socket filename, used to speak with frontend
- @param use_custom_mac - enable or disable the use of the provided hardware address
- @param disable_mrg_rxbuf - disable the use of merge receive buffers
- @param disable_indirect_desc - disable the use of indirect descriptors which driver can use
- @param enable_gso - enable gso support (default 0)
- @param enable_packed - enable packed ring support (default 0)
- @param mac_address - hardware address to use if 'use_custom_mac' is set
-*/
-define create_vhost_user_if
-{
- option deprecated;
- u32 client_index;
- u32 context;
- bool is_server;
- string sock_filename[256];
- bool renumber;
- bool disable_mrg_rxbuf;
- bool disable_indirect_desc;
- bool enable_gso;
- bool enable_packed;
- u32 custom_dev_instance;
- bool use_custom_mac;
- vl_api_mac_address_t mac_address;
- string tag[64];
-};
-
-/** \brief vhost-user interface create response
- @param context - sender context, to match reply w/ request
- @param retval - return code for the request
- @param sw_if_index - interface the operation is applied to
-*/
-define create_vhost_user_if_reply
-{
- option deprecated;
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
-};
-
-/** \brief vhost-user interface modify request
- @param client_index - opaque cookie to identify the sender
- @param is_server - our side is socket server
- @param sock_filename - unix socket filename, used to speak with frontend
- @param enable_gso - enable gso support (default 0)
- @param enable_packed - enable packed ring support (default 0)
-*/
-autoreply define modify_vhost_user_if
-{
- option deprecated;
- u32 client_index;
- u32 context;
- vl_api_interface_index_t sw_if_index;
- bool is_server;
- string sock_filename[256];
- bool renumber;
- bool enable_gso;
- bool enable_packed;
- u32 custom_dev_instance;
-};
-
-/** \brief vhost-user interface create request
- @param client_index - opaque cookie to identify the sender
- @param is_server - our side is socket server
- @param sock_filename - unix socket filename, used to speak with frontend
- @param use_custom_mac - enable or disable the use of the provided hardware address
- @param disable_mrg_rxbuf - disable the use of merge receive buffers
- @param disable_indirect_desc - disable the use of indirect descriptors which driver can use
- @param enable_gso - enable gso support (default 0)
- @param enable_packed - enable packed ring support (default 0)
- @param enable_event_idx - enable event_idx support (default 0)
- @param mac_address - hardware address to use if 'use_custom_mac' is set
- @param renumber - if true, use custom_dev_instance is valid
- @param custom_dev_instance - custom device instance number
-*/
-define create_vhost_user_if_v2
-{
- u32 client_index;
- u32 context;
- bool is_server;
- string sock_filename[256];
- bool renumber;
- bool disable_mrg_rxbuf;
- bool disable_indirect_desc;
- bool enable_gso;
- bool enable_packed;
- bool enable_event_idx;
- u32 custom_dev_instance;
- bool use_custom_mac;
- vl_api_mac_address_t mac_address;
- string tag[64];
-};
-
-/** \brief vhost-user interface create response
- @param context - sender context, to match reply w/ request
- @param retval - return code for the request
- @param sw_if_index - interface the operation is applied to
-*/
-define create_vhost_user_if_v2_reply
-{
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
-};
-
-/** \brief vhost-user interface modify request
- @param client_index - opaque cookie to identify the sender
- @param is_server - our side is socket server
- @param sock_filename - unix socket filename, used to speak with frontend
- @param enable_gso - enable gso support (default 0)
- @param enable_packed - enable packed ring support (default 0)
- @param enable_event_idx - enable event idx support (default 0)
- @param renumber - if true, use custom_dev_instance is valid
- @param custom_dev_instance - custom device instance number
-*/
-autoreply define modify_vhost_user_if_v2
-{
- u32 client_index;
- u32 context;
- vl_api_interface_index_t sw_if_index;
- bool is_server;
- string sock_filename[256];
- bool renumber;
- bool enable_gso;
- bool enable_packed;
- bool enable_event_idx;
- u32 custom_dev_instance;
-};
-
-/** \brief vhost-user interface delete request
- @param client_index - opaque cookie to identify the sender
-*/
-autoreply define delete_vhost_user_if
-{
- u32 client_index;
- u32 context;
- vl_api_interface_index_t sw_if_index;
-};
-
-/** \brief Vhost-user interface details structure (fix this)
- @param sw_if_index - index of the interface
- @param interface_name - name of interface
- @param virtio_net_hdr_sz - net header size
- @param features_first_32 - interface features, first 32 bits
- @param features_last_32 - interface features, last 32 bits
- @param is_server - vhost-user server socket
- @param sock_filename - socket filename
- @param num_regions - number of used memory regions
- @param sock_errno - socket errno
-*/
-define sw_interface_vhost_user_details
-{
- u32 context;
- vl_api_interface_index_t sw_if_index;
- string interface_name[64];
- u32 virtio_net_hdr_sz;
- vl_api_virtio_net_features_first_32_t features_first_32;
- vl_api_virtio_net_features_last_32_t features_last_32;
- bool is_server;
- string sock_filename[256];
- u32 num_regions;
- i32 sock_errno;
-};
-
-/** \brief Vhost-user interface dump request
- @param sw_if_index - filter by sw_if_index
-*/
-define sw_interface_vhost_user_dump
-{
- u32 client_index;
- u32 context;
- vl_api_interface_index_t sw_if_index [default=0xffffffff];
-};
-/*
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/virtio/vhost_user.c b/src/vnet/devices/virtio/vhost_user.c
deleted file mode 100644
index cd37d4c59f8..00000000000
--- a/src/vnet/devices/virtio/vhost_user.c
+++ /dev/null
@@ -1,2615 +0,0 @@
-/*
- *------------------------------------------------------------------
- * vhost.c - vhost-user
- *
- * Copyright (c) 2014-2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <fcntl.h> /* for open */
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/uio.h> /* for iovec */
-#include <netinet/in.h>
-#include <sys/vfs.h>
-
-#include <linux/if_arp.h>
-#include <linux/if_tun.h>
-
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/devices/devices.h>
-#include <vnet/feature/feature.h>
-#include <vnet/interface/rx_queue_funcs.h>
-#include <vnet/interface/tx_queue_funcs.h>
-
-#include <vnet/devices/virtio/vhost_user.h>
-#include <vnet/devices/virtio/vhost_user_inline.h>
-
-/**
- * @file
- * @brief vHost User Device Driver.
- *
- * This file contains the source code for vHost User interface.
- */
-
-
-vlib_node_registration_t vhost_user_send_interrupt_node;
-
-/* *INDENT-OFF* */
-vhost_user_main_t vhost_user_main = {
- .mtu_bytes = 1518,
-};
-
-VNET_HW_INTERFACE_CLASS (vhost_interface_class, static) = {
- .name = "vhost-user",
-};
-/* *INDENT-ON* */
-
-static long
-get_huge_page_size (int fd)
-{
- struct statfs s;
- fstatfs (fd, &s);
- return s.f_bsize;
-}
-
-static void
-unmap_all_mem_regions (vhost_user_intf_t * vui)
-{
- int i, r, q;
- vhost_user_vring_t *vq;
-
- for (i = 0; i < vui->nregions; i++)
- {
- if (vui->region_mmap_addr[i] != MAP_FAILED)
- {
-
- long page_sz = get_huge_page_size (vui->region_mmap_fd[i]);
-
- ssize_t map_sz = (vui->regions[i].memory_size +
- vui->regions[i].mmap_offset +
- page_sz - 1) & ~(page_sz - 1);
-
- r =
- munmap (vui->region_mmap_addr[i] - vui->regions[i].mmap_offset,
- map_sz);
-
- vu_log_debug (vui, "unmap memory region %d addr 0x%lx len 0x%lx "
- "page_sz 0x%x", i, vui->region_mmap_addr[i], map_sz,
- page_sz);
-
- vui->region_mmap_addr[i] = MAP_FAILED;
-
- if (r == -1)
- {
- vu_log_err (vui, "failed to unmap memory region (errno %d)",
- errno);
- }
- close (vui->region_mmap_fd[i]);
- }
- }
- vui->nregions = 0;
-
- FOR_ALL_VHOST_RX_TXQ (q, vui)
- {
- vq = &vui->vrings[q];
- vq->avail = 0;
- vq->used = 0;
- vq->desc = 0;
- }
-}
-
-static_always_inline void
-vhost_user_tx_thread_placement (vhost_user_intf_t *vui, u32 qid)
-{
- vnet_main_t *vnm = vnet_get_main ();
- vhost_user_vring_t *rxvq = &vui->vrings[qid];
- u32 q = qid >> 1, rxvq_count;
-
- ASSERT ((qid & 1) == 0);
- if (!rxvq->started || !rxvq->enabled)
- return;
-
- rxvq_count = (qid >> 1) + 1;
- if (rxvq->queue_index == ~0)
- {
- rxvq->queue_index =
- vnet_hw_if_register_tx_queue (vnm, vui->hw_if_index, q);
- rxvq->qid = q;
- }
-
- FOR_ALL_VHOST_RXQ (q, vui)
- {
- vhost_user_vring_t *rxvq = &vui->vrings[q];
- u32 qi = rxvq->queue_index;
-
- if (rxvq->queue_index == ~0)
- break;
- for (u32 i = 0; i < vlib_get_n_threads (); i++)
- vnet_hw_if_tx_queue_unassign_thread (vnm, qi, i);
- }
-
- for (u32 i = 0; i < vlib_get_n_threads (); i++)
- {
- vhost_user_vring_t *rxvq =
- &vui->vrings[VHOST_VRING_IDX_RX (i % rxvq_count)];
- u32 qi = rxvq->queue_index;
-
- vnet_hw_if_tx_queue_assign_thread (vnm, qi, i);
- }
-
- vnet_hw_if_update_runtime_data (vnm, vui->hw_if_index);
-}
-
-/**
- * @brief Unassign existing interface/queue to thread mappings and re-assign
- * new interface/queue to thread mappings
- */
-static_always_inline void
-vhost_user_rx_thread_placement (vhost_user_intf_t * vui, u32 qid)
-{
- vhost_user_vring_t *txvq = &vui->vrings[qid];
- vnet_main_t *vnm = vnet_get_main ();
- int rv;
- u32 q = qid >> 1;
- vhost_user_main_t *vum = &vhost_user_main;
-
- ASSERT ((qid & 1) == 1); // should be odd
- // Assign new queue mappings for the interface
- if (txvq->queue_index != ~0)
- return;
- vnet_hw_if_set_input_node (vnm, vui->hw_if_index,
- vhost_user_input_node.index);
- txvq->queue_index = vnet_hw_if_register_rx_queue (vnm, vui->hw_if_index, q,
- VNET_HW_IF_RXQ_THREAD_ANY);
- txvq->thread_index =
- vnet_hw_if_get_rx_queue_thread_index (vnm, txvq->queue_index);
-
- if (txvq->mode == VNET_HW_IF_RX_MODE_UNKNOWN)
- /* Set polling as the default */
- txvq->mode = VNET_HW_IF_RX_MODE_POLLING;
- if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING)
- {
- vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, txvq->thread_index);
- /* Keep a polling queue count for each thread */
- cpu->polling_q_count++;
- }
- txvq->qid = q;
- rv = vnet_hw_if_set_rx_queue_mode (vnm, txvq->queue_index, txvq->mode);
- if (rv)
- vu_log_warn (vui, "unable to set rx mode for interface %d, "
- "queue %d: rc=%d", vui->hw_if_index, q, rv);
- vnet_hw_if_update_runtime_data (vnm, vui->hw_if_index);
-}
-
-/** @brief Returns whether at least one TX and one RX vring are enabled */
-static_always_inline int
-vhost_user_intf_ready (vhost_user_intf_t * vui)
-{
- int i, found[2] = { }; //RX + TX
-
- for (i = 0; i < vui->num_qid; i++)
- if (vui->vrings[i].started && vui->vrings[i].enabled)
- found[i & 1] = 1;
-
- return found[0] && found[1];
-}
-
-static_always_inline void
-vhost_user_update_iface_state (vhost_user_intf_t * vui)
-{
- /* if we have pointers to descriptor table, go up */
- int is_ready = vhost_user_intf_ready (vui);
- if (is_ready != vui->is_ready)
- {
- vu_log_debug (vui, "interface %d %s", vui->sw_if_index,
- is_ready ? "ready" : "down");
- if (vui->admin_up)
- vnet_hw_interface_set_flags (vnet_get_main (), vui->hw_if_index,
- is_ready ? VNET_HW_INTERFACE_FLAG_LINK_UP
- : 0);
- vui->is_ready = is_ready;
- }
-}
-
-static clib_error_t *
-vhost_user_callfd_read_ready (clib_file_t * uf)
-{
- __attribute__ ((unused)) int n;
- u8 buff[8];
-
- n = read (uf->file_descriptor, ((char *) &buff), 8);
-
- return 0;
-}
-
-static_always_inline void
-vhost_user_thread_placement (vhost_user_intf_t * vui, u32 qid)
-{
- if (qid & 1) // RX is odd, TX is even
- {
- if (vui->vrings[qid].queue_index == ~0)
- vhost_user_rx_thread_placement (vui, qid);
- }
- else
- vhost_user_tx_thread_placement (vui, qid);
-}
-
-static clib_error_t *
-vhost_user_kickfd_read_ready (clib_file_t * uf)
-{
- __attribute__ ((unused)) ssize_t n;
- u8 buff[8];
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui =
- pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data >> 8);
- u32 qid = uf->private_data & 0xff;
- u32 is_txq = qid & 1;
- vhost_user_vring_t *vq = &vui->vrings[qid];
- vnet_main_t *vnm = vnet_get_main ();
-
- n = read (uf->file_descriptor, buff, 8);
- if (vq->started == 0)
- {
- vq->started = 1;
- vhost_user_thread_placement (vui, qid);
- vhost_user_update_iface_state (vui);
- if (is_txq)
- vnet_hw_if_set_rx_queue_file_index (vnm, vq->queue_index,
- vq->kickfd_idx);
- }
-
- if (is_txq && (vq->mode != VNET_HW_IF_RX_MODE_POLLING) &&
- vhost_user_intf_ready (vui))
- {
- vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, vq->thread_index);
- /*
- * If the thread has more than 1 queue and the other queue is in polling
- * mode, there is no need to trigger an interrupt
- */
- if (cpu->polling_q_count == 0)
- vnet_hw_if_rx_queue_set_int_pending (vnm, vq->queue_index);
- }
-
- return 0;
-}
-
-static_always_inline void
-vhost_user_vring_init (vhost_user_intf_t * vui, u32 qid)
-{
- vhost_user_vring_t *vring = &vui->vrings[qid];
-
- clib_memset (vring, 0, sizeof (*vring));
- vring->kickfd_idx = ~0;
- vring->callfd_idx = ~0;
- vring->errfd = -1;
- vring->qid = -1;
- vring->queue_index = ~0;
- vring->thread_index = ~0;
- vring->mode = VNET_HW_IF_RX_MODE_POLLING;
-
- clib_spinlock_init (&vring->vring_lock);
-
- /*
- * We have a bug with some qemu 2.5, and this may be a fix.
- * Feel like interpretation holy text, but this is from vhost-user.txt.
- * "
- * One queue pair is enabled initially. More queues are enabled
- * dynamically, by sending message VHOST_USER_SET_VRING_ENABLE.
- * "
- * Don't know who's right, but this is what DPDK does.
- */
- if (qid == 0 || qid == 1)
- vring->enabled = 1;
-}
-
-static_always_inline void
-vhost_user_vring_close (vhost_user_intf_t * vui, u32 qid)
-{
- vhost_user_vring_t *vring = &vui->vrings[qid];
-
- if (vring->kickfd_idx != ~0)
- {
- clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
- vring->kickfd_idx);
- clib_file_del (&file_main, uf);
- vring->kickfd_idx = ~0;
- }
- if (vring->callfd_idx != ~0)
- {
- clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
- vring->callfd_idx);
- clib_file_del (&file_main, uf);
- vring->callfd_idx = ~0;
- }
- if (vring->errfd != -1)
- {
- close (vring->errfd);
- vring->errfd = -1;
- }
-
- clib_spinlock_free (&vring->vring_lock);
-
- // save the needed information in vrings prior to being wiped out
- u16 q = vui->vrings[qid].qid;
- u32 queue_index = vui->vrings[qid].queue_index;
- u32 mode = vui->vrings[qid].mode;
- u32 thread_index = vui->vrings[qid].thread_index;
- vhost_user_vring_init (vui, qid);
- vui->vrings[qid].qid = q;
- vui->vrings[qid].queue_index = queue_index;
- vui->vrings[qid].mode = mode;
- vui->vrings[qid].thread_index = thread_index;
-}
-
-static_always_inline void
-vhost_user_if_disconnect (vhost_user_intf_t * vui)
-{
- vnet_main_t *vnm = vnet_get_main ();
- int q;
-
- vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
-
- if (vui->clib_file_index != ~0)
- {
- clib_file_del (&file_main, file_main.file_pool + vui->clib_file_index);
- vui->clib_file_index = ~0;
- }
-
- vui->is_ready = 0;
-
- FOR_ALL_VHOST_RX_TXQ (q, vui) { vhost_user_vring_close (vui, q); }
-
- unmap_all_mem_regions (vui);
- vu_log_debug (vui, "interface ifindex %d disconnected", vui->sw_if_index);
-}
-
-void
-vhost_user_set_operation_mode (vhost_user_intf_t *vui,
- vhost_user_vring_t *txvq)
-{
- if (vhost_user_is_packed_ring_supported (vui))
- {
- if (txvq->used_event)
- {
- if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING)
- txvq->used_event->flags = VRING_EVENT_F_DISABLE;
- else
- txvq->used_event->flags = 0;
- }
- }
- else
- {
- if (txvq->used)
- {
- if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING)
- txvq->used->flags = VRING_USED_F_NO_NOTIFY;
- else
- txvq->used->flags = 0;
- }
- }
-}
-
-static clib_error_t *
-vhost_user_socket_read (clib_file_t * uf)
-{
- int n, i, j;
- int fd, number_of_fds = 0;
- int fds[VHOST_MEMORY_MAX_NREGIONS];
- vhost_user_msg_t msg;
- struct msghdr mh;
- struct iovec iov[1];
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui;
- struct cmsghdr *cmsg;
- u8 q;
- clib_file_t template = { 0 };
- vnet_main_t *vnm = vnet_get_main ();
- vlib_main_t *vm = vlib_get_main ();
-
- vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
-
- char control[CMSG_SPACE (VHOST_MEMORY_MAX_NREGIONS * sizeof (int))];
-
- clib_memset (&mh, 0, sizeof (mh));
- clib_memset (control, 0, sizeof (control));
-
- for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++)
- fds[i] = -1;
-
- /* set the payload */
- iov[0].iov_base = (void *) &msg;
- iov[0].iov_len = VHOST_USER_MSG_HDR_SZ;
-
- mh.msg_iov = iov;
- mh.msg_iovlen = 1;
- mh.msg_control = control;
- mh.msg_controllen = sizeof (control);
-
- n = recvmsg (uf->file_descriptor, &mh, 0);
-
- if (n != VHOST_USER_MSG_HDR_SZ)
- {
- if (n == -1)
- {
- vu_log_debug (vui, "recvmsg returned error %d %s", errno,
- strerror (errno));
- }
- else
- {
- vu_log_debug (vui, "n (%d) != VHOST_USER_MSG_HDR_SZ (%d)",
- n, VHOST_USER_MSG_HDR_SZ);
- }
- goto close_socket;
- }
-
- if (mh.msg_flags & MSG_CTRUNC)
- {
- vu_log_debug (vui, "MSG_CTRUNC is set");
- goto close_socket;
- }
-
- cmsg = CMSG_FIRSTHDR (&mh);
-
- if (cmsg && (cmsg->cmsg_len > 0) && (cmsg->cmsg_level == SOL_SOCKET) &&
- (cmsg->cmsg_type == SCM_RIGHTS) &&
- (cmsg->cmsg_len - CMSG_LEN (0) <=
- VHOST_MEMORY_MAX_NREGIONS * sizeof (int)))
- {
- number_of_fds = (cmsg->cmsg_len - CMSG_LEN (0)) / sizeof (int);
- clib_memcpy_fast (fds, CMSG_DATA (cmsg), number_of_fds * sizeof (int));
- }
-
- /* version 1, no reply bit set */
- if ((msg.flags & 7) != 1)
- {
- vu_log_debug (vui, "malformed message received. closing socket");
- goto close_socket;
- }
-
- {
- int rv;
- rv =
- read (uf->file_descriptor, ((char *) &msg) + VHOST_USER_MSG_HDR_SZ,
- msg.size);
- if (rv < 0)
- {
- vu_log_debug (vui, "read failed %s", strerror (errno));
- goto close_socket;
- }
- else if (rv != msg.size)
- {
- vu_log_debug (vui, "message too short (read %dB should be %dB)", rv,
- msg.size);
- goto close_socket;
- }
- }
-
- switch (msg.request)
- {
- case VHOST_USER_GET_FEATURES:
- msg.flags |= 4;
- msg.u64 = VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF) |
- VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ) |
- VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT) |
- VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC) |
- VIRTIO_FEATURE (VHOST_F_LOG_ALL) |
- VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_ANNOUNCE) |
- VIRTIO_FEATURE (VIRTIO_NET_F_MQ) |
- VIRTIO_FEATURE (VHOST_USER_F_PROTOCOL_FEATURES) |
- VIRTIO_FEATURE (VIRTIO_F_VERSION_1);
- msg.u64 &= vui->feature_mask;
-
- if (vui->enable_event_idx)
- msg.u64 |= VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX);
- if (vui->enable_gso)
- msg.u64 |= FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS;
- if (vui->enable_packed)
- msg.u64 |= VIRTIO_FEATURE (VIRTIO_F_RING_PACKED);
-
- msg.size = sizeof (msg.u64);
- vu_log_debug (vui, "if %d msg VHOST_USER_GET_FEATURES - reply "
- "0x%016llx", vui->hw_if_index, msg.u64);
- n =
- send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
- if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
- {
- vu_log_debug (vui, "could not send message response");
- goto close_socket;
- }
- break;
-
- case VHOST_USER_SET_FEATURES:
- vu_log_debug (vui, "if %d msg VHOST_USER_SET_FEATURES features "
- "0x%016llx", vui->hw_if_index, msg.u64);
-
- vui->features = msg.u64;
-
- if (vui->features &
- (VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF) |
- VIRTIO_FEATURE (VIRTIO_F_VERSION_1)))
- vui->virtio_net_hdr_sz = 12;
- else
- vui->virtio_net_hdr_sz = 10;
-
- vui->is_any_layout =
- (vui->features & VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT)) ? 1 : 0;
-
- ASSERT (vui->virtio_net_hdr_sz < VLIB_BUFFER_PRE_DATA_SIZE);
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vui->hw_if_index);
- if (vui->enable_gso &&
- ((vui->features & FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS)
- == FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS))
- {
- hw->caps |= (VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM);
- }
- else
- {
- hw->caps &= ~(VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO |
- VNET_HW_INTERFACE_CAP_SUPPORTS_L4_TX_CKSUM);
- }
- vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
- vui->is_ready = 0;
- vhost_user_update_iface_state (vui);
- break;
-
- case VHOST_USER_SET_MEM_TABLE:
- vu_log_debug (vui, "if %d msg VHOST_USER_SET_MEM_TABLE nregions %d",
- vui->hw_if_index, msg.memory.nregions);
-
- if ((msg.memory.nregions < 1) ||
- (msg.memory.nregions > VHOST_MEMORY_MAX_NREGIONS))
- {
- vu_log_debug (vui, "number of mem regions must be between 1 and %i",
- VHOST_MEMORY_MAX_NREGIONS);
- goto close_socket;
- }
-
- if (msg.memory.nregions != number_of_fds)
- {
- vu_log_debug (vui, "each memory region must have FD");
- goto close_socket;
- }
-
- /* Do the mmap without barrier sync */
- void *region_mmap_addr[VHOST_MEMORY_MAX_NREGIONS];
- for (i = 0; i < msg.memory.nregions; i++)
- {
- long page_sz = get_huge_page_size (fds[i]);
-
- /* align size to page */
- ssize_t map_sz = (msg.memory.regions[i].memory_size +
- msg.memory.regions[i].mmap_offset +
- page_sz - 1) & ~(page_sz - 1);
-
- region_mmap_addr[i] = mmap (0, map_sz, PROT_READ | PROT_WRITE,
- MAP_SHARED, fds[i], 0);
- if (region_mmap_addr[i] == MAP_FAILED)
- {
- vu_log_err (vui, "failed to map memory. errno is %d", errno);
- for (j = 0; j < i; j++)
- munmap (region_mmap_addr[j], map_sz);
- goto close_socket;
- }
- vu_log_debug (vui, "map memory region %d addr 0 len 0x%lx fd %d "
- "mapped 0x%lx page_sz 0x%x", i, map_sz, fds[i],
- region_mmap_addr[i], page_sz);
- }
-
- vlib_worker_thread_barrier_sync (vm);
- unmap_all_mem_regions (vui);
- for (i = 0; i < msg.memory.nregions; i++)
- {
- clib_memcpy_fast (&(vui->regions[i]), &msg.memory.regions[i],
- sizeof (vhost_user_memory_region_t));
-
- vui->region_mmap_addr[i] = region_mmap_addr[i];
- vui->region_guest_addr_lo[i] = vui->regions[i].guest_phys_addr;
- vui->region_guest_addr_hi[i] = vui->regions[i].guest_phys_addr +
- vui->regions[i].memory_size;
-
- vui->region_mmap_addr[i] += vui->regions[i].mmap_offset;
- vui->region_mmap_fd[i] = fds[i];
-
- vui->nregions++;
- }
-
- /*
- * Re-compute desc, used, and avail descriptor table if vring address
- * is set.
- */
- FOR_ALL_VHOST_RX_TXQ (q, vui)
- {
- if (vui->vrings[q].desc_user_addr && vui->vrings[q].used_user_addr &&
- vui->vrings[q].avail_user_addr)
- {
- vui->vrings[q].desc =
- map_user_mem (vui, vui->vrings[q].desc_user_addr);
- vui->vrings[q].used =
- map_user_mem (vui, vui->vrings[q].used_user_addr);
- vui->vrings[q].avail =
- map_user_mem (vui, vui->vrings[q].avail_user_addr);
- }
- }
- vlib_worker_thread_barrier_release (vm);
- break;
-
- case VHOST_USER_SET_VRING_NUM:
- vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_NUM idx %d num %d",
- vui->hw_if_index, msg.state.index, msg.state.num);
-
- if ((msg.state.num > 32768) || /* maximum ring size is 32768 */
- (msg.state.num == 0) || /* it cannot be zero */
- ((msg.state.num - 1) & msg.state.num) || /* must be power of 2 */
- (msg.state.index >= vui->num_qid))
- {
- vu_log_debug (vui, "invalid VHOST_USER_SET_VRING_NUM: msg.state.num"
- " %d, msg.state.index %d, curruent max q %d",
- msg.state.num, msg.state.index, vui->num_qid);
- goto close_socket;
- }
- vui->vrings[msg.state.index].qsz_mask = msg.state.num - 1;
- break;
-
- case VHOST_USER_SET_VRING_ADDR:
- vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_ADDR idx %d",
- vui->hw_if_index, msg.state.index);
-
- if (msg.state.index >= vui->num_qid)
- {
- vu_log_debug (vui, "invalid vring index VHOST_USER_SET_VRING_ADDR:"
- " %u >= %u", msg.state.index, vui->num_qid);
- goto close_socket;
- }
-
- if (msg.size < sizeof (msg.addr))
- {
- vu_log_debug (vui, "vhost message is too short (%d < %d)",
- msg.size, sizeof (msg.addr));
- goto close_socket;
- }
-
- vring_desc_t *desc = map_user_mem (vui, msg.addr.desc_user_addr);
- vring_used_t *used = map_user_mem (vui, msg.addr.used_user_addr);
- vring_avail_t *avail = map_user_mem (vui, msg.addr.avail_user_addr);
-
- if ((desc == NULL) || (used == NULL) || (avail == NULL))
- {
- vu_log_debug (vui, "failed to map user memory for hw_if_index %d",
- vui->hw_if_index);
- goto close_socket;
- }
-
- vui->vrings[msg.state.index].desc_user_addr = msg.addr.desc_user_addr;
- vui->vrings[msg.state.index].used_user_addr = msg.addr.used_user_addr;
- vui->vrings[msg.state.index].avail_user_addr = msg.addr.avail_user_addr;
-
- vlib_worker_thread_barrier_sync (vm);
- vui->vrings[msg.state.index].desc = desc;
- vui->vrings[msg.state.index].used = used;
- vui->vrings[msg.state.index].avail = avail;
-
- vui->vrings[msg.state.index].log_guest_addr = msg.addr.log_guest_addr;
- vui->vrings[msg.state.index].log_used =
- (msg.addr.flags & (1 << VHOST_VRING_F_LOG)) ? 1 : 0;
-
- /* Spec says: If VHOST_USER_F_PROTOCOL_FEATURES has not been negotiated,
- the ring is initialized in an enabled state. */
- if (!(vui->features & VIRTIO_FEATURE (VHOST_USER_F_PROTOCOL_FEATURES)))
- vui->vrings[msg.state.index].enabled = 1;
-
- vui->vrings[msg.state.index].last_used_idx =
- vui->vrings[msg.state.index].last_avail_idx =
- vui->vrings[msg.state.index].used->idx;
- vui->vrings[msg.state.index].last_kick =
- vui->vrings[msg.state.index].last_used_idx;
-
- /* tell driver that we want interrupts or not */
- vhost_user_set_operation_mode (vui, &vui->vrings[msg.state.index]);
- vlib_worker_thread_barrier_release (vm);
- vhost_user_update_iface_state (vui);
- break;
-
- case VHOST_USER_SET_OWNER:
- vu_log_debug (vui, "if %d msg VHOST_USER_SET_OWNER", vui->hw_if_index);
- break;
-
- case VHOST_USER_RESET_OWNER:
- vu_log_debug (vui, "if %d msg VHOST_USER_RESET_OWNER",
- vui->hw_if_index);
- break;
-
- case VHOST_USER_SET_VRING_CALL:
- vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_CALL %d",
- vui->hw_if_index, msg.u64);
-
- q = (u8) (msg.u64 & 0xFF);
- if (vui->num_qid > q)
- {
- /* if there is old fd, delete and close it */
- if (vui->vrings[q].callfd_idx != ~0)
- {
- clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
- vui->vrings[q].callfd_idx);
- clib_file_del (&file_main, uf);
- vui->vrings[q].callfd_idx = ~0;
- }
- }
- else if (vec_len (vui->vrings) > q)
- {
- /* grow vrings by pair (RX + TX) */
- vui->num_qid = (q & 1) ? (q + 1) : (q + 2);
- }
- else
- {
- u32 i, new_max_q, old_max_q = vec_len (vui->vrings);
-
- /*
- * Double the array size if it is less than 64 entries.
- * Slow down thereafter.
- */
- if (vec_len (vui->vrings) < (VHOST_VRING_INIT_MQ_PAIR_SZ << 3))
- new_max_q = vec_len (vui->vrings) << 1;
- else
- new_max_q = vec_len (vui->vrings) +
- (VHOST_VRING_INIT_MQ_PAIR_SZ << 2);
- if (new_max_q > (VHOST_VRING_MAX_MQ_PAIR_SZ << 1))
- new_max_q = (VHOST_VRING_MAX_MQ_PAIR_SZ << 1);
-
- /* sync with the worker threads, vrings may move due to realloc */
- vlib_worker_thread_barrier_sync (vm);
- vec_validate_aligned (vui->vrings, new_max_q - 1,
- CLIB_CACHE_LINE_BYTES);
- vlib_worker_thread_barrier_release (vm);
-
- for (i = old_max_q; i < vec_len (vui->vrings); i++)
- vhost_user_vring_init (vui, i);
-
- /* grow vrings by pair (RX + TX) */
- vui->num_qid = (q & 1) ? (q + 1) : (q + 2);
- }
-
- if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK))
- {
- if (number_of_fds != 1)
- {
- vu_log_debug (vui, "More than one fd received !");
- goto close_socket;
- }
-
- template.read_function = vhost_user_callfd_read_ready;
- template.file_descriptor = fds[0];
- template.private_data =
- ((vui - vhost_user_main.vhost_user_interfaces) << 8) + q;
- template.description = format (0, "vhost user");
- vui->vrings[q].callfd_idx = clib_file_add (&file_main, &template);
- }
- else
- vui->vrings[q].callfd_idx = ~0;
- break;
-
- case VHOST_USER_SET_VRING_KICK:
- vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_KICK %d",
- vui->hw_if_index, msg.u64);
-
- q = (u8) (msg.u64 & 0xFF);
- if (q >= vui->num_qid)
- {
- vu_log_debug (vui, "invalid vring index VHOST_USER_SET_VRING_KICK:"
- " %u >= %u", q, vui->num_qid);
- goto close_socket;
- }
-
- if (vui->vrings[q].kickfd_idx != ~0)
- {
- clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
- vui->vrings[q].kickfd_idx);
- clib_file_del (&file_main, uf);
- vui->vrings[q].kickfd_idx = ~0;
- }
-
- if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK))
- {
- if (number_of_fds != 1)
- {
- vu_log_debug (vui, "More than one fd received !");
- goto close_socket;
- }
-
- template.read_function = vhost_user_kickfd_read_ready;
- template.file_descriptor = fds[0];
- template.private_data =
- (((uword) (vui - vhost_user_main.vhost_user_interfaces)) << 8) +
- q;
- vui->vrings[q].kickfd_idx = clib_file_add (&file_main, &template);
- }
- else
- {
- //When no kickfd is set, the queue is initialized as started
- vui->vrings[q].kickfd_idx = ~0;
- vui->vrings[q].started = 1;
- vhost_user_thread_placement (vui, q);
- }
- vhost_user_update_iface_state (vui);
- break;
-
- case VHOST_USER_SET_VRING_ERR:
- vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_ERR %d",
- vui->hw_if_index, msg.u64);
-
- q = (u8) (msg.u64 & 0xFF);
- if (q >= vui->num_qid)
- {
- vu_log_debug (vui, "invalid vring index VHOST_USER_SET_VRING_ERR:"
- " %u >= %u", q, vui->num_qid);
- goto close_socket;
- }
-
- if (vui->vrings[q].errfd != -1)
- close (vui->vrings[q].errfd);
-
- if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK))
- {
- if (number_of_fds != 1)
- goto close_socket;
-
- vui->vrings[q].errfd = fds[0];
- }
- else
- vui->vrings[q].errfd = -1;
- break;
-
- case VHOST_USER_SET_VRING_BASE:
- vu_log_debug (vui,
- "if %d msg VHOST_USER_SET_VRING_BASE idx %d num 0x%x",
- vui->hw_if_index, msg.state.index, msg.state.num);
- if (msg.state.index >= vui->num_qid)
- {
- vu_log_debug (vui, "invalid vring index VHOST_USER_SET_VRING_ADDR:"
- " %u >= %u", msg.state.index, vui->num_qid);
- goto close_socket;
- }
- vlib_worker_thread_barrier_sync (vm);
- vui->vrings[msg.state.index].last_avail_idx = msg.state.num;
- if (vhost_user_is_packed_ring_supported (vui))
- {
- /*
- * 0 1 2 3
- * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | last avail idx | | last used idx | |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * ^ ^
- * | |
- * avail wrap counter used wrap counter
- */
- /* last avail idx at bit 0-14. */
- vui->vrings[msg.state.index].last_avail_idx =
- msg.state.num & 0x7fff;
- /* avail wrap counter at bit 15 */
- vui->vrings[msg.state.index].avail_wrap_counter =
- ! !(msg.state.num & (1 << 15));
-
- /*
- * Although last_used_idx is passed in the upper 16 bits in qemu
- * implementation, in practice, last_avail_idx and last_used_idx are
- * usually the same. As a result, DPDK does not bother to pass us
- * last_used_idx. The spec is not clear on thex coding. I figured it
- * out by reading the qemu code. So let's just read last_avail_idx
- * and set last_used_idx equals to last_avail_idx.
- */
- vui->vrings[msg.state.index].last_used_idx =
- vui->vrings[msg.state.index].last_avail_idx;
- vui->vrings[msg.state.index].last_kick =
- vui->vrings[msg.state.index].last_used_idx;
- vui->vrings[msg.state.index].used_wrap_counter =
- vui->vrings[msg.state.index].avail_wrap_counter;
-
- if (vui->vrings[msg.state.index].avail_wrap_counter == 1)
- vui->vrings[msg.state.index].avail_wrap_counter =
- VRING_DESC_F_AVAIL;
- }
- vlib_worker_thread_barrier_release (vm);
- break;
-
- case VHOST_USER_GET_VRING_BASE:
- if (msg.state.index >= vui->num_qid)
- {
- vu_log_debug (vui, "invalid vring index VHOST_USER_GET_VRING_BASE:"
- " %u >= %u", msg.state.index, vui->num_qid);
- goto close_socket;
- }
-
- /* protection is needed to prevent rx/tx from changing last_avail_idx */
- vlib_worker_thread_barrier_sync (vm);
- /*
- * Copy last_avail_idx from the vring before closing it because
- * closing the vring also initializes the vring last_avail_idx
- */
- msg.state.num = vui->vrings[msg.state.index].last_avail_idx;
- if (vhost_user_is_packed_ring_supported (vui))
- {
- msg.state.num =
- (vui->vrings[msg.state.index].last_avail_idx & 0x7fff) |
- (! !vui->vrings[msg.state.index].avail_wrap_counter << 15);
- msg.state.num |=
- ((vui->vrings[msg.state.index].last_used_idx & 0x7fff) |
- (! !vui->vrings[msg.state.index].used_wrap_counter << 15)) << 16;
- }
- msg.flags |= 4;
- msg.size = sizeof (msg.state);
-
- /*
- * Spec says: Client must [...] stop ring upon receiving
- * VHOST_USER_GET_VRING_BASE
- */
- vhost_user_vring_close (vui, msg.state.index);
- vlib_worker_thread_barrier_release (vm);
- vu_log_debug (vui,
- "if %d msg VHOST_USER_GET_VRING_BASE idx %d num 0x%x",
- vui->hw_if_index, msg.state.index, msg.state.num);
- n =
- send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
- if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
- {
- vu_log_debug (vui, "could not send message response");
- goto close_socket;
- }
- vhost_user_update_iface_state (vui);
- break;
-
- case VHOST_USER_NONE:
- vu_log_debug (vui, "if %d msg VHOST_USER_NONE", vui->hw_if_index);
- break;
-
- case VHOST_USER_SET_LOG_BASE:
- vu_log_debug (vui, "if %d msg VHOST_USER_SET_LOG_BASE",
- vui->hw_if_index);
-
- if (msg.size != sizeof (msg.log))
- {
- vu_log_debug (vui, "invalid msg size for VHOST_USER_SET_LOG_BASE:"
- " %d instead of %d", msg.size, sizeof (msg.log));
- goto close_socket;
- }
-
- if (!(vui->protocol_features & (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD)))
- {
- vu_log_debug (vui, "VHOST_USER_PROTOCOL_F_LOG_SHMFD not set but "
- "VHOST_USER_SET_LOG_BASE received");
- goto close_socket;
- }
-
- fd = fds[0];
- /* align size to page */
- long page_sz = get_huge_page_size (fd);
- ssize_t map_sz =
- (msg.log.size + msg.log.offset + page_sz - 1) & ~(page_sz - 1);
-
- void *log_base_addr = mmap (0, map_sz, PROT_READ | PROT_WRITE,
- MAP_SHARED, fd, 0);
-
- vu_log_debug (vui, "map log region addr 0 len 0x%lx off 0x%lx fd %d "
- "mapped 0x%lx", map_sz, msg.log.offset, fd,
- log_base_addr);
-
- if (log_base_addr == MAP_FAILED)
- {
- vu_log_err (vui, "failed to map memory. errno is %d", errno);
- goto close_socket;
- }
-
- vlib_worker_thread_barrier_sync (vm);
- vui->log_base_addr = log_base_addr;
- vui->log_base_addr += msg.log.offset;
- vui->log_size = msg.log.size;
- vlib_worker_thread_barrier_release (vm);
-
- msg.flags |= 4;
- msg.size = sizeof (msg.u64);
- n =
- send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
- if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
- {
- vu_log_debug (vui, "could not send message response");
- goto close_socket;
- }
- break;
-
- case VHOST_USER_SET_LOG_FD:
- vu_log_debug (vui, "if %d msg VHOST_USER_SET_LOG_FD", vui->hw_if_index);
- break;
-
- case VHOST_USER_GET_PROTOCOL_FEATURES:
- msg.flags |= 4;
- msg.u64 = (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |
- (1 << VHOST_USER_PROTOCOL_F_MQ);
- msg.size = sizeof (msg.u64);
- vu_log_debug (vui, "if %d msg VHOST_USER_GET_PROTOCOL_FEATURES - "
- "reply 0x%016llx", vui->hw_if_index, msg.u64);
- n =
- send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
- if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
- {
- vu_log_debug (vui, "could not send message response");
- goto close_socket;
- }
- break;
-
- case VHOST_USER_SET_PROTOCOL_FEATURES:
- vu_log_debug (vui, "if %d msg VHOST_USER_SET_PROTOCOL_FEATURES "
- "features 0x%016llx", vui->hw_if_index, msg.u64);
- vui->protocol_features = msg.u64;
- break;
-
- case VHOST_USER_GET_QUEUE_NUM:
- msg.flags |= 4;
- msg.u64 = VHOST_VRING_MAX_MQ_PAIR_SZ;
- msg.size = sizeof (msg.u64);
- vu_log_debug (vui, "if %d msg VHOST_USER_GET_QUEUE_NUM - reply %d",
- vui->hw_if_index, msg.u64);
- n =
- send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
- if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
- {
- vu_log_debug (vui, "could not send message response");
- goto close_socket;
- }
- break;
-
- case VHOST_USER_SET_VRING_ENABLE:
- vu_log_debug (vui, "if %d VHOST_USER_SET_VRING_ENABLE: %s queue %d",
- vui->hw_if_index, msg.state.num ? "enable" : "disable",
- msg.state.index);
- if (msg.state.index >= vui->num_qid)
- {
- vu_log_debug (vui, "invalid vring idx VHOST_USER_SET_VRING_ENABLE:"
- " %u >= %u", msg.state.index, vui->num_qid);
- goto close_socket;
- }
-
- vui->vrings[msg.state.index].enabled = msg.state.num;
- vhost_user_thread_placement (vui, msg.state.index);
- vhost_user_update_iface_state (vui);
- break;
-
- default:
- vu_log_debug (vui, "unknown vhost-user message %d received. "
- "closing socket", msg.request);
- goto close_socket;
- }
-
- return 0;
-
-close_socket:
- vlib_worker_thread_barrier_sync (vm);
- vhost_user_if_disconnect (vui);
- vlib_worker_thread_barrier_release (vm);
- vhost_user_update_iface_state (vui);
- return 0;
-}
-
-static clib_error_t *
-vhost_user_socket_error (clib_file_t * uf)
-{
- vlib_main_t *vm = vlib_get_main ();
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui =
- pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
-
- vu_log_debug (vui, "socket error on if %d", vui->sw_if_index);
- vlib_worker_thread_barrier_sync (vm);
- vhost_user_if_disconnect (vui);
- vlib_worker_thread_barrier_release (vm);
- return 0;
-}
-
-static clib_error_t *
-vhost_user_socksvr_accept_ready (clib_file_t * uf)
-{
- int client_fd, client_len;
- struct sockaddr_un client;
- clib_file_t template = { 0 };
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui;
-
- vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
-
- client_len = sizeof (client);
- client_fd = accept (uf->file_descriptor,
- (struct sockaddr *) &client,
- (socklen_t *) & client_len);
-
- if (client_fd < 0)
- return clib_error_return_unix (0, "accept");
-
- if (vui->clib_file_index != ~0)
- {
- vu_log_debug (vui, "Close client socket for vhost interface %d, fd %d",
- vui->sw_if_index, UNIX_GET_FD (vui->clib_file_index));
- clib_file_del (&file_main, file_main.file_pool + vui->clib_file_index);
- }
-
- vu_log_debug (vui, "New client socket for vhost interface %d, fd %d",
- vui->sw_if_index, client_fd);
- template.read_function = vhost_user_socket_read;
- template.error_function = vhost_user_socket_error;
- template.file_descriptor = client_fd;
- template.private_data = vui - vhost_user_main.vhost_user_interfaces;
- template.description = format (0, "vhost interface %d", vui->sw_if_index);
- vui->clib_file_index = clib_file_add (&file_main, &template);
- vui->num_qid = 2;
- return 0;
-}
-
-static clib_error_t *
-vhost_user_init (vlib_main_t * vm)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
-
- vum->log_default = vlib_log_register_class ("vhost-user", 0);
-
- vum->coalesce_frames = 32;
- vum->coalesce_time = 1e-3;
-
- vec_validate (vum->cpus, tm->n_vlib_mains - 1);
-
- vhost_cpu_t *cpu;
- vec_foreach (cpu, vum->cpus)
- {
- /* This is actually not necessary as validate already zeroes it
- * Just keeping the loop here for later because I am lazy. */
- cpu->rx_buffers_len = 0;
- }
-
- vum->random = random_default_seed ();
-
- mhash_init_c_string (&vum->if_index_by_sock_name, sizeof (uword));
-
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_INIT_FUNCTION (vhost_user_init) =
-{
- .runs_after = VLIB_INITS("ip4_init"),
-};
-/* *INDENT-ON* */
-
-static uword
-vhost_user_send_interrupt_process (vlib_main_t * vm,
- vlib_node_runtime_t * rt, vlib_frame_t * f)
-{
- vhost_user_intf_t *vui;
- f64 timeout = 3153600000.0 /* 100 years */ ;
- uword event_type, *event_data = 0;
- vhost_user_main_t *vum = &vhost_user_main;
- u16 qid;
- f64 now, poll_time_remaining;
- f64 next_timeout;
- u8 stop_timer = 0;
-
- while (1)
- {
- poll_time_remaining =
- vlib_process_wait_for_event_or_clock (vm, timeout);
- event_type = vlib_process_get_events (vm, &event_data);
- vec_reset_length (event_data);
-
- /*
- * Use the remaining timeout if it is less than coalesce time to avoid
- * resetting the existing timer in the middle of expiration
- */
- timeout = poll_time_remaining;
- if (vlib_process_suspend_time_is_zero (timeout) ||
- (timeout > vum->coalesce_time))
- timeout = vum->coalesce_time;
-
- now = vlib_time_now (vm);
- switch (event_type)
- {
- case VHOST_USER_EVENT_STOP_TIMER:
- stop_timer = 1;
- break;
-
- case VHOST_USER_EVENT_START_TIMER:
- stop_timer = 0;
- timeout = 1e-3;
- if (!vlib_process_suspend_time_is_zero (poll_time_remaining))
- break;
- /* fall through */
-
- case ~0:
- /* *INDENT-OFF* */
- pool_foreach (vui, vum->vhost_user_interfaces) {
- next_timeout = timeout;
- FOR_ALL_VHOST_RX_TXQ (qid, vui)
- {
- vhost_user_vring_t *vq = &vui->vrings[qid];
-
- if (vq->started == 0)
- continue;
- if (vq->n_since_last_int)
- {
- if (now >= vq->int_deadline)
- vhost_user_send_call (vm, vui, vq);
- else
- next_timeout = vq->int_deadline - now;
- }
-
- if ((next_timeout < timeout) && (next_timeout > 0.0))
- timeout = next_timeout;
- }
- }
- /* *INDENT-ON* */
- break;
-
- default:
- clib_warning ("BUG: unhandled event type %d", event_type);
- break;
- }
- /* No less than 1 millisecond */
- if (timeout < 1e-3)
- timeout = 1e-3;
- if (stop_timer)
- timeout = 3153600000.0;
- }
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (vhost_user_send_interrupt_node) = {
- .function = vhost_user_send_interrupt_process,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "vhost-user-send-interrupt-process",
-};
-/* *INDENT-ON* */
-
-static uword
-vhost_user_process (vlib_main_t * vm,
- vlib_node_runtime_t * rt, vlib_frame_t * f)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui;
- struct sockaddr_un sun;
- int sockfd;
- clib_file_t template = { 0 };
- f64 timeout = 3153600000.0 /* 100 years */ ;
- uword *event_data = 0;
-
- sockfd = -1;
- sun.sun_family = AF_UNIX;
- template.read_function = vhost_user_socket_read;
- template.error_function = vhost_user_socket_error;
-
- while (1)
- {
- vlib_process_wait_for_event_or_clock (vm, timeout);
- vlib_process_get_events (vm, &event_data);
- vec_reset_length (event_data);
-
- timeout = 3.0;
-
- /* *INDENT-OFF* */
- pool_foreach (vui, vum->vhost_user_interfaces) {
-
- if (vui->unix_server_index == ~0) { //Nothing to do for server sockets
- if (vui->clib_file_index == ~0)
- {
- if ((sockfd < 0) &&
- ((sockfd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0))
- {
- /*
- * 1st time error or new error for this interface,
- * spit out the message and record the error
- */
- if (!vui->sock_errno || (vui->sock_errno != errno))
- {
- clib_unix_warning
- ("Error: Could not open unix socket for %s",
- vui->sock_filename);
- vui->sock_errno = errno;
- }
- continue;
- }
-
- /* try to connect */
- strncpy (sun.sun_path, (char *) vui->sock_filename,
- sizeof (sun.sun_path) - 1);
- sun.sun_path[sizeof (sun.sun_path) - 1] = 0;
-
- /* Avoid hanging VPP if the other end does not accept */
- if (fcntl(sockfd, F_SETFL, O_NONBLOCK) < 0)
- clib_unix_warning ("fcntl");
-
- if (connect (sockfd, (struct sockaddr *) &sun,
- sizeof (struct sockaddr_un)) == 0)
- {
- /* Set the socket to blocking as it was before */
- if (fcntl(sockfd, F_SETFL, 0) < 0)
- clib_unix_warning ("fcntl2");
-
- vui->sock_errno = 0;
- template.file_descriptor = sockfd;
- template.private_data =
- vui - vhost_user_main.vhost_user_interfaces;
- template.description = format (0, "vhost user process");
- vui->clib_file_index = clib_file_add (&file_main, &template);
- vui->num_qid = 2;
-
- /* This sockfd is considered consumed */
- sockfd = -1;
- }
- else
- {
- vui->sock_errno = errno;
- }
- }
- else
- {
- /* check if socket is alive */
- int error = 0;
- socklen_t len = sizeof (error);
- int fd = UNIX_GET_FD(vui->clib_file_index);
- int retval =
- getsockopt (fd, SOL_SOCKET, SO_ERROR, &error, &len);
-
- if (retval)
- {
- vu_log_debug (vui, "getsockopt returned %d", retval);
- vhost_user_if_disconnect (vui);
- }
- }
- }
- }
- /* *INDENT-ON* */
- }
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (vhost_user_process_node,static) = {
- .function = vhost_user_process,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "vhost-user-process",
-};
-/* *INDENT-ON* */
-
-/**
- * Disables and reset interface structure.
- * It can then be either init again, or removed from used interfaces.
- */
-static void
-vhost_user_term_if (vhost_user_intf_t * vui)
-{
- int q;
- vhost_user_main_t *vum = &vhost_user_main;
-
- // disconnect interface sockets
- vhost_user_if_disconnect (vui);
- vhost_user_update_gso_interface_count (vui, 0 /* delete */ );
- vhost_user_update_iface_state (vui);
-
- FOR_ALL_VHOST_RX_TXQ (q, vui)
- {
- clib_spinlock_free (&vui->vrings[q].vring_lock);
- }
-
- if (vui->unix_server_index != ~0)
- {
- //Close server socket
- clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
- vui->unix_server_index);
- clib_file_del (&file_main, uf);
- vui->unix_server_index = ~0;
- unlink (vui->sock_filename);
- }
-
- mhash_unset (&vum->if_index_by_sock_name, vui->sock_filename,
- &vui->if_index);
-}
-
-int
-vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui;
- int rv = 0;
- vnet_hw_interface_t *hwif;
- u16 qid;
-
- if (!
- (hwif =
- vnet_get_sup_hw_interface_api_visible_or_null (vnm, sw_if_index))
- || hwif->dev_class_index != vhost_user_device_class.index)
- return VNET_API_ERROR_INVALID_SW_IF_INDEX;
-
- vui = pool_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance);
-
- vu_log_debug (vui, "Deleting vhost-user interface %s (instance %d)",
- hwif->name, hwif->dev_instance);
-
- FOR_ALL_VHOST_TXQ (qid, vui)
- {
- vhost_user_vring_t *txvq = &vui->vrings[qid];
-
- if ((txvq->mode == VNET_HW_IF_RX_MODE_POLLING) &&
- (txvq->thread_index != ~0))
- {
- vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, txvq->thread_index);
- ASSERT (cpu->polling_q_count != 0);
- cpu->polling_q_count--;
- }
-
- if ((vum->ifq_count > 0) &&
- ((txvq->mode == VNET_HW_IF_RX_MODE_INTERRUPT) ||
- (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)))
- {
- vum->ifq_count--;
- // Stop the timer if there is no more interrupt interface/queue
- if (vum->ifq_count == 0)
- {
- vlib_process_signal_event (vm,
- vhost_user_send_interrupt_node.index,
- VHOST_USER_EVENT_STOP_TIMER, 0);
- break;
- }
- }
- }
-
- // Disable and reset interface
- vhost_user_term_if (vui);
-
- // Reset renumbered iface
- if (hwif->dev_instance <
- vec_len (vum->show_dev_instance_by_real_dev_instance))
- vum->show_dev_instance_by_real_dev_instance[hwif->dev_instance] = ~0;
-
- // Delete ethernet interface
- ethernet_delete_interface (vnm, vui->hw_if_index);
-
- // free vrings
- vec_free (vui->vrings);
-
- // Back to pool
- pool_put (vum->vhost_user_interfaces, vui);
-
- return rv;
-}
-
-static clib_error_t *
-vhost_user_exit (vlib_main_t * vm)
-{
- vnet_main_t *vnm = vnet_get_main ();
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui;
-
- vlib_worker_thread_barrier_sync (vlib_get_main ());
- /* *INDENT-OFF* */
- pool_foreach (vui, vum->vhost_user_interfaces) {
- vhost_user_delete_if (vnm, vm, vui->sw_if_index);
- }
- /* *INDENT-ON* */
- vlib_worker_thread_barrier_release (vlib_get_main ());
- return 0;
-}
-
-VLIB_MAIN_LOOP_EXIT_FUNCTION (vhost_user_exit);
-
-/**
- * Open server unix socket on specified sock_filename.
- */
-static int
-vhost_user_init_server_sock (const char *sock_filename, int *sock_fd)
-{
- int rv = 0;
- struct sockaddr_un un = { };
- int fd;
- /* create listening socket */
- if ((fd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0)
- return VNET_API_ERROR_SYSCALL_ERROR_1;
-
- un.sun_family = AF_UNIX;
- strncpy ((char *) un.sun_path, (char *) sock_filename,
- sizeof (un.sun_path) - 1);
-
- /* remove if exists */
- unlink ((char *) sock_filename);
-
- if (bind (fd, (struct sockaddr *) &un, sizeof (un)) == -1)
- {
- rv = VNET_API_ERROR_SYSCALL_ERROR_2;
- goto error;
- }
-
- if (listen (fd, 1) == -1)
- {
- rv = VNET_API_ERROR_SYSCALL_ERROR_3;
- goto error;
- }
-
- *sock_fd = fd;
- return 0;
-
-error:
- close (fd);
- return rv;
-}
-
-/**
- * Create ethernet interface for vhost user interface.
- */
-static void
-vhost_user_create_ethernet (vnet_main_t *vnm, vlib_main_t *vm,
- vhost_user_intf_t *vui,
- vhost_user_create_if_args_t *args)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- u8 hwaddr[6];
- clib_error_t *error;
-
- /* create hw and sw interface */
- if (args->use_custom_mac)
- {
- clib_memcpy (hwaddr, args->hwaddr, 6);
- }
- else
- {
- random_u32 (&vum->random);
- clib_memcpy (hwaddr + 2, &vum->random, sizeof (vum->random));
- hwaddr[0] = 2;
- hwaddr[1] = 0xfe;
- }
-
- error = ethernet_register_interface
- (vnm,
- vhost_user_device_class.index,
- vui - vum->vhost_user_interfaces /* device instance */ ,
- hwaddr /* ethernet address */ ,
- &vui->hw_if_index, 0 /* flag change */ );
-
- if (error)
- clib_error_report (error);
-}
-
-/*
- * Initialize vui with specified attributes
- */
-static void
-vhost_user_vui_init (vnet_main_t * vnm, vhost_user_intf_t * vui,
- int server_sock_fd, vhost_user_create_if_args_t * args,
- u32 * sw_if_index)
-{
- vnet_sw_interface_t *sw;
- int q;
- vhost_user_main_t *vum = &vhost_user_main;
- vnet_hw_interface_t *hw;
-
- hw = vnet_get_hw_interface (vnm, vui->hw_if_index);
- sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index);
- if (server_sock_fd != -1)
- {
- clib_file_t template = { 0 };
- template.read_function = vhost_user_socksvr_accept_ready;
- template.file_descriptor = server_sock_fd;
- template.private_data = vui - vum->vhost_user_interfaces; //hw index
- template.description = format (0, "vhost user %d", sw);
- vui->unix_server_index = clib_file_add (&file_main, &template);
- }
- else
- {
- vui->unix_server_index = ~0;
- }
-
- vui->sw_if_index = sw->sw_if_index;
- strncpy (vui->sock_filename, args->sock_filename,
- ARRAY_LEN (vui->sock_filename) - 1);
- vui->sock_errno = 0;
- vui->is_ready = 0;
- vui->feature_mask = args->feature_mask;
- vui->clib_file_index = ~0;
- vui->log_base_addr = 0;
- vui->if_index = vui - vum->vhost_user_interfaces;
- vui->enable_gso = args->enable_gso;
- vui->enable_event_idx = args->enable_event_idx;
- vui->enable_packed = args->enable_packed;
- /*
- * enable_gso takes precedence over configurable feature mask if there
- * is a clash.
- * if feature mask disables gso, but enable_gso is configured,
- * then gso is enable
- * if feature mask enables gso, but enable_gso is not configured,
- * then gso is enable
- *
- * if gso is enable via feature mask, it must enable both host and guest
- * gso feature mask, we don't support one sided GSO or partial GSO.
- */
- if ((vui->enable_gso == 0) &&
- ((args->feature_mask & FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS)
- == (FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS)))
- vui->enable_gso = 1;
- vhost_user_update_gso_interface_count (vui, 1 /* add */ );
- mhash_set_mem (&vum->if_index_by_sock_name, vui->sock_filename,
- &vui->if_index, 0);
-
- vec_validate_aligned (vui->vrings, (VHOST_VRING_INIT_MQ_PAIR_SZ << 1) - 1,
- CLIB_CACHE_LINE_BYTES);
- vui->num_qid = 2;
- for (q = 0; q < vec_len (vui->vrings); q++)
- vhost_user_vring_init (vui, q);
-
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
- vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
-
- if (sw_if_index)
- *sw_if_index = vui->sw_if_index;
-}
-
-int
-vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
- vhost_user_create_if_args_t * args)
-{
- vhost_user_intf_t *vui = NULL;
- u32 sw_if_idx = ~0;
- int rv = 0;
- int server_sock_fd = -1;
- vhost_user_main_t *vum = &vhost_user_main;
- uword *if_index;
-
- if (args->sock_filename == NULL || !(strlen (args->sock_filename) > 0))
- {
- return VNET_API_ERROR_INVALID_ARGUMENT;
- }
-
- if_index = mhash_get (&vum->if_index_by_sock_name,
- (void *) args->sock_filename);
- if (if_index)
- {
- vui = &vum->vhost_user_interfaces[*if_index];
- args->sw_if_index = vui->sw_if_index;
- return VNET_API_ERROR_IF_ALREADY_EXISTS;
- }
-
- if (args->is_server)
- {
- if ((rv =
- vhost_user_init_server_sock (args->sock_filename,
- &server_sock_fd)) != 0)
- {
- return rv;
- }
- }
-
- /* Protect the uninitialized vui from being dispatched by rx/tx */
- vlib_worker_thread_barrier_sync (vm);
- pool_get (vhost_user_main.vhost_user_interfaces, vui);
- vhost_user_create_ethernet (vnm, vm, vui, args);
- vlib_worker_thread_barrier_release (vm);
-
- vhost_user_vui_init (vnm, vui, server_sock_fd, args, &sw_if_idx);
- vnet_sw_interface_set_mtu (vnm, vui->sw_if_index, 9000);
- vhost_user_rx_thread_placement (vui, 1);
-
- if (args->renumber)
- vnet_interface_name_renumber (sw_if_idx, args->custom_dev_instance);
-
- args->sw_if_index = sw_if_idx;
-
- // Process node must connect
- vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
-
- return rv;
-}
-
-int
-vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
- vhost_user_create_if_args_t * args)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui = NULL;
- u32 sw_if_idx = ~0;
- int server_sock_fd = -1;
- int rv = 0;
- vnet_hw_interface_t *hwif;
- uword *if_index;
-
- if (!(hwif = vnet_get_sup_hw_interface_api_visible_or_null (vnm,
- args->sw_if_index))
- || hwif->dev_class_index != vhost_user_device_class.index)
- return VNET_API_ERROR_INVALID_SW_IF_INDEX;
-
- if (args->sock_filename == NULL || !(strlen (args->sock_filename) > 0))
- return VNET_API_ERROR_INVALID_ARGUMENT;
-
- vui = vec_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance);
-
- /*
- * Disallow changing the interface to have the same path name
- * as other interface
- */
- if_index = mhash_get (&vum->if_index_by_sock_name,
- (void *) args->sock_filename);
- if (if_index && (*if_index != vui->if_index))
- return VNET_API_ERROR_IF_ALREADY_EXISTS;
-
- // First try to open server socket
- if (args->is_server)
- if ((rv = vhost_user_init_server_sock (args->sock_filename,
- &server_sock_fd)) != 0)
- return rv;
-
- vhost_user_term_if (vui);
- vhost_user_vui_init (vnm, vui, server_sock_fd, args, &sw_if_idx);
-
- if (args->renumber)
- vnet_interface_name_renumber (sw_if_idx, args->custom_dev_instance);
-
- // Process node must connect
- vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
-
- return rv;
-}
-
-clib_error_t *
-vhost_user_connect_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- vnet_main_t *vnm = vnet_get_main ();
- unformat_input_t _line_input, *line_input = &_line_input;
- clib_error_t *error = NULL;
- vhost_user_create_if_args_t args = { 0 };
- int rv;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- args.feature_mask = (u64) ~ (0ULL);
- args.custom_dev_instance = ~0;
- /* GSO feature is disable by default */
- args.feature_mask &= ~FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS;
- /* packed-ring feature is disable by default */
- args.feature_mask &= ~VIRTIO_FEATURE (VIRTIO_F_RING_PACKED);
- /* event_idx feature is disable by default */
- args.feature_mask &= ~VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX);
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "socket %s", &args.sock_filename))
- ;
- else if (unformat (line_input, "server"))
- args.is_server = 1;
- else if (unformat (line_input, "gso"))
- args.enable_gso = 1;
- else if (unformat (line_input, "packed"))
- args.enable_packed = 1;
- else if (unformat (line_input, "event-idx"))
- args.enable_event_idx = 1;
- else if (unformat (line_input, "feature-mask 0x%llx",
- &args.feature_mask))
- ;
- else if (unformat (line_input, "hwaddr %U", unformat_ethernet_address,
- args.hwaddr))
- args.use_custom_mac = 1;
- else if (unformat (line_input, "renumber %d",
- &args.custom_dev_instance))
- args.renumber = 1;
- else
- {
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
- goto done;
- }
- }
-
- if ((rv = vhost_user_create_if (vnm, vm, &args)))
- {
- error = clib_error_return (0, "vhost_user_create_if returned %d", rv);
- goto done;
- }
-
- vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnm,
- args.sw_if_index);
-
-done:
- vec_free (args.sock_filename);
- unformat_free (line_input);
-
- return error;
-}
-
-clib_error_t *
-vhost_user_delete_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- u32 sw_if_index = ~0;
- vnet_main_t *vnm = vnet_get_main ();
- clib_error_t *error = NULL;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "sw_if_index %d", &sw_if_index))
- ;
- else if (unformat
- (line_input, "%U", unformat_vnet_sw_interface, vnm,
- &sw_if_index))
- {
- vnet_hw_interface_t *hwif =
- vnet_get_sup_hw_interface_api_visible_or_null (vnm, sw_if_index);
- if (hwif == NULL ||
- vhost_user_device_class.index != hwif->dev_class_index)
- {
- error = clib_error_return (0, "Not a vhost interface");
- goto done;
- }
- }
- else
- {
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
- goto done;
- }
- }
-
- vhost_user_delete_if (vnm, vm, sw_if_index);
-
-done:
- unformat_free (line_input);
-
- return error;
-}
-
-int
-vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
- vhost_user_intf_details_t ** out_vuids)
-{
- int rv = 0;
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui;
- vhost_user_intf_details_t *r_vuids = NULL;
- vhost_user_intf_details_t *vuid = NULL;
- u32 *hw_if_indices = 0;
- vnet_hw_interface_t *hi;
- int i;
-
- if (!out_vuids)
- return -1;
-
- pool_foreach (vui, vum->vhost_user_interfaces)
- vec_add1 (hw_if_indices, vui->hw_if_index);
-
- for (i = 0; i < vec_len (hw_if_indices); i++)
- {
- hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
- vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance);
-
- vec_add2 (r_vuids, vuid, 1);
- vuid->sw_if_index = vui->sw_if_index;
- vuid->virtio_net_hdr_sz = vui->virtio_net_hdr_sz;
- vuid->features = vui->features;
- vuid->num_regions = vui->nregions;
- vuid->is_server = vui->unix_server_index != ~0;
- vuid->sock_errno = vui->sock_errno;
- snprintf ((char *) vuid->sock_filename, sizeof (vuid->sock_filename),
- "%s", vui->sock_filename);
- memcpy_s (vuid->if_name, sizeof (vuid->if_name), hi->name,
- clib_min (vec_len (hi->name), sizeof (vuid->if_name) - 1));
- vuid->if_name[sizeof (vuid->if_name) - 1] = 0;
- }
-
- vec_free (hw_if_indices);
-
- *out_vuids = r_vuids;
-
- return rv;
-}
-
-static u8 *
-format_vhost_user_desc (u8 * s, va_list * args)
-{
- char *fmt = va_arg (*args, char *);
- vhost_user_intf_t *vui = va_arg (*args, vhost_user_intf_t *);
- vring_desc_t *desc_table = va_arg (*args, vring_desc_t *);
- int idx = va_arg (*args, int);
- u32 *mem_hint = va_arg (*args, u32 *);
-
- s = format (s, fmt, idx, desc_table[idx].addr, desc_table[idx].len,
- desc_table[idx].flags, desc_table[idx].next,
- pointer_to_uword (map_guest_mem (vui, desc_table[idx].addr,
- mem_hint)));
- return s;
-}
-
-static void
-vhost_user_show_fds (vlib_main_t * vm, vhost_user_vring_t * vq)
-{
- int kickfd = UNIX_GET_FD (vq->kickfd_idx);
- int callfd = UNIX_GET_FD (vq->callfd_idx);
-
- vlib_cli_output (vm, " kickfd %d callfd %d errfd %d\n", kickfd, callfd,
- vq->errfd);
-}
-
-static void
-vhost_user_show_desc (vlib_main_t * vm, vhost_user_intf_t * vui, int q,
- int show_descr, int show_verbose)
-{
- int j;
- u32 mem_hint = 0;
- u32 idx;
- u32 n_entries;
- vring_desc_t *desc_table;
- vhost_user_vring_t *vq = &vui->vrings[q];
-
- if (vq->avail && vq->used)
- vlib_cli_output (vm,
- " avail.flags %x avail event idx %u avail.idx %d "
- "used.flags %x used event idx %u used.idx %d\n",
- vq->avail->flags, vhost_user_avail_event_idx (vq),
- vq->avail->idx, vq->used->flags,
- vhost_user_used_event_idx (vq), vq->used->idx);
-
- vhost_user_show_fds (vm, vq);
-
- if (show_descr)
- {
- vlib_cli_output (vm, "\n descriptor table:\n");
- vlib_cli_output (vm,
- " slot addr len flags next "
- "user_addr\n");
- vlib_cli_output (vm,
- " ===== ================== ===== ====== ===== "
- "==================\n");
- for (j = 0; j < vq->qsz_mask + 1; j++)
- {
- desc_table = vq->desc;
- vlib_cli_output (vm, "%U", format_vhost_user_desc,
- " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n", vui,
- desc_table, j, &mem_hint);
- if (show_verbose && (desc_table[j].flags & VRING_DESC_F_INDIRECT))
- {
- n_entries = desc_table[j].len / sizeof (vring_desc_t);
- desc_table = map_guest_mem (vui, desc_table[j].addr, &mem_hint);
- if (desc_table)
- {
- for (idx = 0; idx < clib_min (20, n_entries); idx++)
- {
- vlib_cli_output
- (vm, "%U", format_vhost_user_desc,
- "> %-4u 0x%016lx %-5u 0x%04x %-5u 0x%016lx\n", vui,
- desc_table, idx, &mem_hint);
- }
- if (n_entries >= 20)
- vlib_cli_output (vm, "Skip displaying entries 20...%u\n",
- n_entries);
- }
- }
- }
- }
-}
-
-static u8 *
-format_vhost_user_packed_desc (u8 * s, va_list * args)
-{
- char *fmt = va_arg (*args, char *);
- vhost_user_intf_t *vui = va_arg (*args, vhost_user_intf_t *);
- vring_packed_desc_t *desc_table = va_arg (*args, vring_packed_desc_t *);
- int idx = va_arg (*args, int);
- u32 *mem_hint = va_arg (*args, u32 *);
-
- s = format (s, fmt, idx, desc_table[idx].addr, desc_table[idx].len,
- desc_table[idx].flags, desc_table[idx].id,
- pointer_to_uword (map_guest_mem (vui, desc_table[idx].addr,
- mem_hint)));
- return s;
-}
-
-static u8 *
-format_vhost_user_event_idx_flags (u8 * s, va_list * args)
-{
- u32 flags = va_arg (*args, u32);
- typedef struct
- {
- u8 value;
- char *str;
- } event_idx_flags;
- static event_idx_flags event_idx_array[] = {
-#define _(s,v) { .str = #s, .value = v, },
- foreach_virtio_event_idx_flags
-#undef _
- };
- u32 num_entries = sizeof (event_idx_array) / sizeof (event_idx_flags);
-
- if (flags < num_entries)
- s = format (s, "%s", event_idx_array[flags].str);
- else
- s = format (s, "%u", flags);
- return s;
-}
-
-static void
-vhost_user_show_desc_packed (vlib_main_t * vm, vhost_user_intf_t * vui, int q,
- int show_descr, int show_verbose)
-{
- int j;
- u32 mem_hint = 0;
- u32 idx;
- u32 n_entries;
- vring_packed_desc_t *desc_table;
- vhost_user_vring_t *vq = &vui->vrings[q];
- u16 off_wrap, event_idx;
-
- off_wrap = vq->avail_event->off_wrap;
- event_idx = off_wrap & 0x7fff;
- vlib_cli_output (vm, " avail_event.flags %U avail_event.off_wrap %u "
- "avail event idx %u\n", format_vhost_user_event_idx_flags,
- (u32) vq->avail_event->flags, off_wrap, event_idx);
-
- off_wrap = vq->used_event->off_wrap;
- event_idx = off_wrap & 0x7fff;
- vlib_cli_output (vm, " used_event.flags %U used_event.off_wrap %u "
- "used event idx %u\n", format_vhost_user_event_idx_flags,
- (u32) vq->used_event->flags, off_wrap, event_idx);
-
- vlib_cli_output (vm, " avail wrap counter %u, used wrap counter %u\n",
- vq->avail_wrap_counter, vq->used_wrap_counter);
-
- vhost_user_show_fds (vm, vq);
-
- if (show_descr)
- {
- vlib_cli_output (vm, "\n descriptor table:\n");
- vlib_cli_output (vm,
- " slot addr len flags id "
- "user_addr\n");
- vlib_cli_output (vm,
- " ===== ================== ===== ====== ===== "
- "==================\n");
- for (j = 0; j < vq->qsz_mask + 1; j++)
- {
- desc_table = vq->packed_desc;
- vlib_cli_output (vm, "%U", format_vhost_user_packed_desc,
- " %-5u 0x%016lx %-5u 0x%04x %-5u 0x%016lx\n", vui,
- desc_table, j, &mem_hint);
- if (show_verbose && (desc_table[j].flags & VRING_DESC_F_INDIRECT))
- {
- n_entries = desc_table[j].len >> 4;
- desc_table = map_guest_mem (vui, desc_table[j].addr, &mem_hint);
- if (desc_table)
- {
- for (idx = 0; idx < clib_min (20, n_entries); idx++)
- {
- vlib_cli_output
- (vm, "%U", format_vhost_user_packed_desc,
- "> %-4u 0x%016lx %-5u 0x%04x %-5u 0x%016lx\n", vui,
- desc_table, idx, &mem_hint);
- }
- if (n_entries >= 20)
- vlib_cli_output (vm, "Skip displaying entries 20...%u\n",
- n_entries);
- }
- }
- }
- }
-}
-
-clib_error_t *
-show_vhost_user_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- clib_error_t *error = 0;
- vnet_main_t *vnm = vnet_get_main ();
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui;
- u32 hw_if_index, *hw_if_indices = 0;
- vnet_hw_interface_t *hi;
- u16 qid;
- int i, j, q;
- int show_descr = 0;
- int show_verbose = 0;
- struct feat_struct
- {
- u8 bit;
- char *str;
- };
- struct feat_struct *feat_entry;
-
- static struct feat_struct feat_array[] = {
-#define _(s,b) { .str = #s, .bit = b, },
- foreach_virtio_net_features
-#undef _
- {.str = NULL}
- };
-
-#define foreach_protocol_feature \
- _(VHOST_USER_PROTOCOL_F_MQ) \
- _(VHOST_USER_PROTOCOL_F_LOG_SHMFD)
-
- static struct feat_struct proto_feat_array[] = {
-#define _(s) { .str = #s, .bit = s},
- foreach_protocol_feature
-#undef _
- {.str = NULL}
- };
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat
- (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
- {
- hi = vnet_get_hw_interface (vnm, hw_if_index);
- if (vhost_user_device_class.index != hi->dev_class_index)
- {
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- goto done;
- }
- vec_add1 (hw_if_indices, hw_if_index);
- }
- else if (unformat (input, "descriptors") || unformat (input, "desc"))
- show_descr = 1;
- else if (unformat (input, "verbose"))
- show_verbose = 1;
- else
- {
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- goto done;
- }
- }
- if (vec_len (hw_if_indices) == 0)
- {
- pool_foreach (vui, vum->vhost_user_interfaces)
- vec_add1 (hw_if_indices, vui->hw_if_index);
- }
- vlib_cli_output (vm, "Virtio vhost-user interfaces");
- vlib_cli_output (vm, "Global:\n coalesce frames %d time %e",
- vum->coalesce_frames, vum->coalesce_time);
- vlib_cli_output (vm, " Number of rx virtqueues in interrupt mode: %d",
- vum->ifq_count);
- vlib_cli_output (vm, " Number of GSO interfaces: %d", vum->gso_count);
- for (u32 tid = 0; tid <= vlib_num_workers (); tid++)
- {
- vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, tid);
- vlib_cli_output (vm, " Thread %u: Polling queue count %u", tid,
- cpu->polling_q_count);
- }
-
- for (i = 0; i < vec_len (hw_if_indices); i++)
- {
- hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
- vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance);
- vlib_cli_output (vm, "Interface: %U (ifindex %d)",
- format_vnet_hw_if_index_name, vnm, hw_if_indices[i],
- hw_if_indices[i]);
- vlib_cli_output (vm, " Number of qids %u", vui->num_qid);
- if (vui->enable_gso)
- vlib_cli_output (vm, " GSO enable");
- if (vui->enable_packed)
- vlib_cli_output (vm, " Packed ring enable");
- if (vui->enable_event_idx)
- vlib_cli_output (vm, " Event index enable");
-
- vlib_cli_output (vm, "virtio_net_hdr_sz %d\n"
- " features mask (0x%llx): \n"
- " features (0x%llx): \n",
- vui->virtio_net_hdr_sz, vui->feature_mask,
- vui->features);
-
- feat_entry = (struct feat_struct *) &feat_array;
- while (feat_entry->str)
- {
- if (vui->features & (1ULL << feat_entry->bit))
- vlib_cli_output (vm, " %s (%d)", feat_entry->str,
- feat_entry->bit);
- feat_entry++;
- }
-
- vlib_cli_output (vm, " protocol features (0x%llx)",
- vui->protocol_features);
- feat_entry = (struct feat_struct *) &proto_feat_array;
- while (feat_entry->str)
- {
- if (vui->protocol_features & (1ULL << feat_entry->bit))
- vlib_cli_output (vm, " %s (%d)", feat_entry->str,
- feat_entry->bit);
- feat_entry++;
- }
-
- vlib_cli_output (vm, "\n");
-
- vlib_cli_output (vm, " socket filename %s type %s errno \"%s\"\n\n",
- vui->sock_filename,
- (vui->unix_server_index != ~0) ? "server" : "client",
- strerror (vui->sock_errno));
-
- vlib_cli_output (vm, " rx placement: ");
-
- FOR_ALL_VHOST_TXQ (qid, vui)
- {
- vhost_user_vring_t *txvq = &vui->vrings[qid];
-
- if (txvq->qid == -1)
- continue;
- vlib_cli_output (vm, " thread %d on vring %d, %U\n",
- txvq->thread_index, qid, format_vnet_hw_if_rx_mode,
- txvq->mode);
- }
-
- vlib_cli_output (vm, " tx placement\n");
-
- FOR_ALL_VHOST_RXQ (qid, vui)
- {
- vhost_user_vring_t *rxvq = &vui->vrings[qid];
- vnet_hw_if_tx_queue_t *txq;
-
- if (rxvq->queue_index == ~0)
- continue;
- txq = vnet_hw_if_get_tx_queue (vnm, rxvq->queue_index);
- if (txq->threads)
- vlib_cli_output (vm, " threads %U on vring %u: %s\n",
- format_bitmap_list, txq->threads, qid,
- txq->shared_queue ? "spin-lock" : "lock-free");
- }
-
- vlib_cli_output (vm, "\n");
-
- vlib_cli_output (vm, " Memory regions (total %d)\n", vui->nregions);
-
- if (vui->nregions)
- {
- vlib_cli_output (vm,
- " region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr\n");
- vlib_cli_output (vm,
- " ====== ===== ================== ================== ================== ================== ==================\n");
- }
- for (j = 0; j < vui->nregions; j++)
- {
- vlib_cli_output (vm,
- " %d %-5d 0x%016lx 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n",
- j, vui->region_mmap_fd[j],
- vui->regions[j].guest_phys_addr,
- vui->regions[j].memory_size,
- vui->regions[j].userspace_addr,
- vui->regions[j].mmap_offset,
- pointer_to_uword (vui->region_mmap_addr[j]));
- }
- FOR_ALL_VHOST_RX_TXQ (q, vui)
- {
- if (!vui->vrings[q].started)
- continue;
-
- vlib_cli_output (vm, "\n Virtqueue %d (%s%s)\n", q,
- (q & 1) ? "RX" : "TX",
- vui->vrings[q].enabled ? "" : " disabled");
- vlib_cli_output (vm, " global %s queue index %u\n",
- (q & 1) ? "RX" : "TX", vui->vrings[q].queue_index);
-
- vlib_cli_output (
- vm,
- " qsz %d last_avail_idx %d last_used_idx %d"
- " last_kick %u\n",
- vui->vrings[q].qsz_mask + 1, vui->vrings[q].last_avail_idx,
- vui->vrings[q].last_used_idx, vui->vrings[q].last_kick);
-
- if (vhost_user_is_packed_ring_supported (vui))
- vhost_user_show_desc_packed (vm, vui, q, show_descr, show_verbose);
- else
- vhost_user_show_desc (vm, vui, q, show_descr, show_verbose);
- }
- vlib_cli_output (vm, "\n");
- }
-done:
- vec_free (hw_if_indices);
- return error;
-}
-
-/*
- * CLI functions
- */
-
-/*?
- * Create a vHost User interface. Once created, a new virtual interface
- * will exist with the name '<em>VirtualEthernet0/0/x</em>', where '<em>x</em>'
- * is the next free index.
- *
- * There are several parameters associated with a vHost interface:
- *
- * - <b>socket <socket-filename></b> - Name of the linux socket used by
- * hypervisor and VPP to manage the vHost interface. If in <em>server</em>
- * mode, VPP will create the socket if it does not already exist. If in
- * <em>client</em> mode, hypervisor will create the socket if it does not
- * already exist. The VPP code is indifferent to the file location. However,
- * if SELinux is enabled, then the socket needs to be created in
- * <em>/var/run/vpp/</em>.
- *
- * - <b>server</b> - Optional flag to indicate that VPP should be the server
- * for the linux socket. If not provided, VPP will be the client. In
- * <em>server</em> mode, the VM can be reset without tearing down the vHost
- * Interface. In <em>client</em> mode, VPP can be reset without bringing down
- * the VM and tearing down the vHost Interface.
- *
- * - <b>feature-mask <hex></b> - Optional virtio/vhost feature set negotiated
- * at startup. <b>This is intended for degugging only.</b> It is recommended
- * that this parameter not be used except by experienced users. By default,
- * all supported features will be advertised. Otherwise, provide the set of
- * features desired.
- * - 0x000008000 (15) - VIRTIO_NET_F_MRG_RXBUF
- * - 0x000020000 (17) - VIRTIO_NET_F_CTRL_VQ
- * - 0x000200000 (21) - VIRTIO_NET_F_GUEST_ANNOUNCE
- * - 0x000400000 (22) - VIRTIO_NET_F_MQ
- * - 0x004000000 (26) - VHOST_F_LOG_ALL
- * - 0x008000000 (27) - VIRTIO_F_ANY_LAYOUT
- * - 0x010000000 (28) - VIRTIO_F_INDIRECT_DESC
- * - 0x040000000 (30) - VHOST_USER_F_PROTOCOL_FEATURES
- * - 0x100000000 (32) - VIRTIO_F_VERSION_1
- *
- * - <b>hwaddr <mac-addr></b> - Optional ethernet address, can be in either
- * X:X:X:X:X:X unix or X.X.X cisco format.
- *
- * - <b>renumber <dev_instance></b> - Optional parameter which allows the
- * instance in the name to be specified. If instance already exists, name
- * will be used anyway and multiple instances will have the same name. Use
- * with caution.
- *
- * @cliexpar
- * Example of how to create a vhost interface with VPP as the client and all
- * features enabled:
- * @cliexstart{create vhost-user socket /var/run/vpp/vhost1.sock}
- * VirtualEthernet0/0/0
- * @cliexend
- * Example of how to create a vhost interface with VPP as the server and with
- * just multiple queues enabled:
- * @cliexstart{create vhost-user socket /var/run/vpp/vhost2.sock server
- * feature-mask 0x40400000}
- * VirtualEthernet0/0/1
- * @cliexend
- * Once the vHost interface is created, enable the interface using:
- * @cliexcmd{set interface state VirtualEthernet0/0/0 up}
-?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (vhost_user_connect_command, static) = {
- .path = "create vhost-user",
- .short_help = "create vhost-user socket <socket-filename> [server] "
- "[feature-mask <hex>] [hwaddr <mac-addr>] [renumber <dev_instance>] [gso] "
- "[packed] [event-idx]",
- .function = vhost_user_connect_command_fn,
- .is_mp_safe = 1,
-};
-/* *INDENT-ON* */
-
-/*?
- * Delete a vHost User interface using the interface name or the
- * software interface index. Use the '<em>show interface</em>'
- * command to determine the software interface index. On deletion,
- * the linux socket will not be deleted.
- *
- * @cliexpar
- * Example of how to delete a vhost interface by name:
- * @cliexcmd{delete vhost-user VirtualEthernet0/0/1}
- * Example of how to delete a vhost interface by software interface index:
- * @cliexcmd{delete vhost-user sw_if_index 1}
-?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (vhost_user_delete_command, static) = {
- .path = "delete vhost-user",
- .short_help = "delete vhost-user {<interface> | sw_if_index <sw_idx>}",
- .function = vhost_user_delete_command_fn,
-};
-
-/*?
- * Display the attributes of a single vHost User interface (provide interface
- * name), multiple vHost User interfaces (provide a list of interface names
- * separated by spaces) or all Vhost User interfaces (omit an interface name
- * to display all vHost interfaces).
- *
- * @cliexpar
- * @parblock
- * Example of how to display a vhost interface:
- * @cliexstart{show vhost-user VirtualEthernet0/0/0}
- * Virtio vhost-user interfaces
- * Global:
- * coalesce frames 32 time 1e-3
- * Interface: VirtualEthernet0/0/0 (ifindex 1)
- * virtio_net_hdr_sz 12
- * features mask (0xffffffffffffffff):
- * features (0x50408000):
- * VIRTIO_NET_F_MRG_RXBUF (15)
- * VIRTIO_NET_F_MQ (22)
- * VIRTIO_F_INDIRECT_DESC (28)
- * VHOST_USER_F_PROTOCOL_FEATURES (30)
- * protocol features (0x3)
- * VHOST_USER_PROTOCOL_F_MQ (0)
- * VHOST_USER_PROTOCOL_F_LOG_SHMFD (1)
- *
- * socket filename /var/run/vpp/vhost1.sock type client errno "Success"
- *
- * rx placement:
- * thread 1 on vring 1
- * thread 1 on vring 5
- * thread 2 on vring 3
- * thread 2 on vring 7
- * tx placement: spin-lock
- * thread 0 on vring 0
- * thread 1 on vring 2
- * thread 2 on vring 0
- *
- * Memory regions (total 2)
- * region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr
- * ====== == =============== =========== ============== =========== ==========
- * 0 60 0x00000000 0x000a0000 0xaac00000 0x00000000 0x2b400000
- * 1 61 0x000c0000 0x3ff40000 0xaacc0000 0x000c0000 0xabcc0000
- *
- * Virtqueue 0 (TX)
- * qsz 256 last_avail_idx 0 last_used_idx 0
- * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0
- * kickfd 62 callfd 64 errfd -1
- *
- * Virtqueue 1 (RX)
- * qsz 256 last_avail_idx 0 last_used_idx 0
- * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
- * kickfd 65 callfd 66 errfd -1
- *
- * Virtqueue 2 (TX)
- * qsz 256 last_avail_idx 0 last_used_idx 0
- * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0
- * kickfd 63 callfd 70 errfd -1
- *
- * Virtqueue 3 (RX)
- * qsz 256 last_avail_idx 0 last_used_idx 0
- * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
- * kickfd 72 callfd 74 errfd -1
- *
- * Virtqueue 4 (TX disabled)
- * qsz 256 last_avail_idx 0 last_used_idx 0
- * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
- * kickfd 76 callfd 78 errfd -1
- *
- * Virtqueue 5 (RX disabled)
- * qsz 256 last_avail_idx 0 last_used_idx 0
- * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
- * kickfd 80 callfd 82 errfd -1
- *
- * Virtqueue 6 (TX disabled)
- * qsz 256 last_avail_idx 0 last_used_idx 0
- * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
- * kickfd 84 callfd 86 errfd -1
- *
- * Virtqueue 7 (RX disabled)
- * qsz 256 last_avail_idx 0 last_used_idx 0
- * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
- * kickfd 88 callfd 90 errfd -1
- *
- * @cliexend
- *
- * The optional '<em>descriptors</em>' parameter will display the same output
- * as the previous example but will include the descriptor table for each
- * queue.
- * The output is truncated below:
- * @cliexstart{show vhost-user VirtualEthernet0/0/0 descriptors}
- * Virtio vhost-user interfaces
- * Global:
- * coalesce frames 32 time 1e-3
- * Interface: VirtualEthernet0/0/0 (ifindex 1)
- * virtio_net_hdr_sz 12
- * features mask (0xffffffffffffffff):
- * features (0x50408000):
- * VIRTIO_NET_F_MRG_RXBUF (15)
- * VIRTIO_NET_F_MQ (22)
- * :
- * Virtqueue 0 (TX)
- * qsz 256 last_avail_idx 0 last_used_idx 0
- * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0
- * kickfd 62 callfd 64 errfd -1
- *
- * descriptor table:
- * id addr len flags next user_addr
- * ===== ================== ===== ====== ===== ==================
- * 0 0x0000000010b6e974 2060 0x0002 1 0x00002aabbc76e974
- * 1 0x0000000010b6e034 2060 0x0002 2 0x00002aabbc76e034
- * 2 0x0000000010b6d6f4 2060 0x0002 3 0x00002aabbc76d6f4
- * 3 0x0000000010b6cdb4 2060 0x0002 4 0x00002aabbc76cdb4
- * 4 0x0000000010b6c474 2060 0x0002 5 0x00002aabbc76c474
- * 5 0x0000000010b6bb34 2060 0x0002 6 0x00002aabbc76bb34
- * 6 0x0000000010b6b1f4 2060 0x0002 7 0x00002aabbc76b1f4
- * 7 0x0000000010b6a8b4 2060 0x0002 8 0x00002aabbc76a8b4
- * 8 0x0000000010b69f74 2060 0x0002 9 0x00002aabbc769f74
- * 9 0x0000000010b69634 2060 0x0002 10 0x00002aabbc769634
- * 10 0x0000000010b68cf4 2060 0x0002 11 0x00002aabbc768cf4
- * :
- * 249 0x0000000000000000 0 0x0000 250 0x00002aab2b400000
- * 250 0x0000000000000000 0 0x0000 251 0x00002aab2b400000
- * 251 0x0000000000000000 0 0x0000 252 0x00002aab2b400000
- * 252 0x0000000000000000 0 0x0000 253 0x00002aab2b400000
- * 253 0x0000000000000000 0 0x0000 254 0x00002aab2b400000
- * 254 0x0000000000000000 0 0x0000 255 0x00002aab2b400000
- * 255 0x0000000000000000 0 0x0000 32768 0x00002aab2b400000
- *
- * Virtqueue 1 (RX)
- * qsz 256 last_avail_idx 0 last_used_idx 0
- * :
- * @cliexend
- * @endparblock
-?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (show_vhost_user_command, static) = {
- .path = "show vhost-user",
- .short_help = "show vhost-user [<interface> [<interface> [..]]] "
- "[[descriptors] [verbose]]",
- .function = show_vhost_user_command_fn,
-};
-/* *INDENT-ON* */
-
-
-static clib_error_t *
-vhost_user_config (vlib_main_t * vm, unformat_input_t * input)
-{
- vhost_user_main_t *vum = &vhost_user_main;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "coalesce-frames %d", &vum->coalesce_frames))
- ;
- else if (unformat (input, "coalesce-time %f", &vum->coalesce_time))
- ;
- else if (unformat (input, "dont-dump-memory"))
- vum->dont_dump_vhost_user_memory = 1;
- else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- }
-
- return 0;
-}
-
-/* vhost-user { ... } configuration. */
-VLIB_CONFIG_FUNCTION (vhost_user_config, "vhost-user");
-
-void
-vhost_user_unmap_all (void)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui;
-
- if (vum->dont_dump_vhost_user_memory)
- {
- pool_foreach (vui, vum->vhost_user_interfaces)
- unmap_all_mem_regions (vui);
- }
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/virtio/vhost_user.h b/src/vnet/devices/virtio/vhost_user.h
deleted file mode 100644
index 59db5b4c592..00000000000
--- a/src/vnet/devices/virtio/vhost_user.h
+++ /dev/null
@@ -1,388 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __VIRTIO_VHOST_USER_H__
-#define __VIRTIO_VHOST_USER_H__
-
-#include <vnet/devices/virtio/virtio_std.h>
-#include <vnet/devices/virtio/vhost_std.h>
-
-/* vhost-user data structures */
-
-#define VHOST_MEMORY_MAX_NREGIONS 8
-#define VHOST_USER_MSG_HDR_SZ 12
-#define VHOST_VRING_INIT_MQ_PAIR_SZ 8 //8TX + 8RX
-
-/*
- * qid is one byte in size in the spec. Please see VHOST_USER_SET_VRING_CALL,
- * VHOST_USER_SET_VRING_KICK, and VHOST_USER_SET_VRING_ERR.
- * The max number for q pair is naturally 128.
- */
-#define VHOST_VRING_MAX_MQ_PAIR_SZ 128
-#define VHOST_VRING_IDX_RX(qid) (2 * (qid))
-#define VHOST_VRING_IDX_TX(qid) (2 * (qid) + 1)
-
-#define VHOST_USER_VRING_NOFD_MASK 0x100
-
-#define VHOST_USER_PROTOCOL_F_MQ 0
-#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
-#define VHOST_VRING_F_LOG 0
-
-#define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
- (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD))
-
-#define vu_log_debug(dev, f, ...) \
-{ \
- vlib_log(VLIB_LOG_LEVEL_DEBUG, vhost_user_main.log_default, "%U: " f, \
- format_vnet_hw_if_index_name, vnet_get_main(), \
- dev->hw_if_index, ##__VA_ARGS__); \
-};
-
-#define vu_log_warn(dev, f, ...) \
-{ \
- vlib_log(VLIB_LOG_LEVEL_WARNING, vhost_user_main.log_default, "%U: " f, \
- format_vnet_hw_if_index_name, vnet_get_main(), \
- dev->hw_if_index, ##__VA_ARGS__); \
-};
-#define vu_log_err(dev, f, ...) \
-{ \
- vlib_log(VLIB_LOG_LEVEL_ERR, vhost_user_main.log_default, "%U: " f, \
- format_vnet_hw_if_index_name, vnet_get_main(), \
- dev->hw_if_index, ##__VA_ARGS__); \
-};
-
-#define UNIX_GET_FD(unixfd_idx) ({ \
- typeof(unixfd_idx) __unixfd_idx = (unixfd_idx); \
- (__unixfd_idx != ~0) ? \
- pool_elt_at_index (file_main.file_pool, \
- __unixfd_idx)->file_descriptor : -1; })
-
-#define foreach_virtio_trace_flags \
- _ (SIMPLE_CHAINED, 0, "Simple descriptor chaining") \
- _ (SINGLE_DESC, 1, "Single descriptor packet") \
- _ (INDIRECT, 2, "Indirect descriptor") \
- _ (MAP_ERROR, 4, "Memory mapping error")
-
-typedef enum
-{
-#define _(n,i,s) VIRTIO_TRACE_F_##n,
- foreach_virtio_trace_flags
-#undef _
-} virtio_trace_flag_t;
-
-#define FEATURE_VIRTIO_NET_F_HOST_TSO_FEATURE_BITS \
- (VIRTIO_FEATURE (VIRTIO_NET_F_CSUM) | \
- VIRTIO_FEATURE (VIRTIO_NET_F_HOST_UFO) | \
- VIRTIO_FEATURE (VIRTIO_NET_F_HOST_TSO4) | \
- VIRTIO_FEATURE (VIRTIO_NET_F_HOST_TSO6))
-
-#define FEATURE_VIRTIO_NET_F_GUEST_TSO_FEATURE_BITS \
- (VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM) | \
- VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_UFO) | \
- VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO4) | \
- VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO6))
-
-#define FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS \
- (FEATURE_VIRTIO_NET_F_HOST_TSO_FEATURE_BITS | \
- FEATURE_VIRTIO_NET_F_GUEST_TSO_FEATURE_BITS)
-
-
-typedef struct
-{
- char *sock_filename;
- u64 feature_mask;
- u32 custom_dev_instance;
- u8 hwaddr[6];
- u8 renumber;
- u8 is_server;
- u8 enable_gso;
- u8 enable_packed;
- u8 enable_event_idx;
- u8 use_custom_mac;
-
- /* return */
- u32 sw_if_index;
-} vhost_user_create_if_args_t;
-
-int vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
- vhost_user_create_if_args_t * args);
-int vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
- vhost_user_create_if_args_t * args);
-int vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm,
- u32 sw_if_index);
-
-/* *INDENT-OFF* */
-typedef struct vhost_user_memory_region
-{
- u64 guest_phys_addr;
- u64 memory_size;
- u64 userspace_addr;
- u64 mmap_offset;
-} __attribute ((packed)) vhost_user_memory_region_t;
-
-typedef struct vhost_user_memory
-{
- u32 nregions;
- u32 padding;
- vhost_user_memory_region_t regions[VHOST_MEMORY_MAX_NREGIONS];
-} __attribute ((packed)) vhost_user_memory_t;
-
-typedef enum vhost_user_req
-{
- VHOST_USER_NONE = 0,
- VHOST_USER_GET_FEATURES = 1,
- VHOST_USER_SET_FEATURES = 2,
- VHOST_USER_SET_OWNER = 3,
- VHOST_USER_RESET_OWNER = 4,
- VHOST_USER_SET_MEM_TABLE = 5,
- VHOST_USER_SET_LOG_BASE = 6,
- VHOST_USER_SET_LOG_FD = 7,
- VHOST_USER_SET_VRING_NUM = 8,
- VHOST_USER_SET_VRING_ADDR = 9,
- VHOST_USER_SET_VRING_BASE = 10,
- VHOST_USER_GET_VRING_BASE = 11,
- VHOST_USER_SET_VRING_KICK = 12,
- VHOST_USER_SET_VRING_CALL = 13,
- VHOST_USER_SET_VRING_ERR = 14,
- VHOST_USER_GET_PROTOCOL_FEATURES = 15,
- VHOST_USER_SET_PROTOCOL_FEATURES = 16,
- VHOST_USER_GET_QUEUE_NUM = 17,
- VHOST_USER_SET_VRING_ENABLE = 18,
- VHOST_USER_MAX
-} vhost_user_req_t;
-
-typedef struct vhost_user_msg {
- vhost_user_req_t request;
- u32 flags;
- u32 size;
- union
- {
- u64 u64;
- vhost_vring_state_t state;
- vhost_vring_addr_t addr;
- vhost_user_memory_t memory;
- vhost_user_log_t log;
- };
-} __attribute ((packed)) vhost_user_msg_t;
-/* *INDENT-ON* */
-
-typedef struct
-{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- u16 qsz_mask;
- u16 last_avail_idx;
- u16 last_used_idx;
- u16 n_since_last_int;
- union
- {
- vring_desc_t *desc;
- vring_packed_desc_t *packed_desc;
- };
- union
- {
- vring_avail_t *avail;
- vring_desc_event_t *avail_event;
- };
- union
- {
- vring_used_t *used;
- vring_desc_event_t *used_event;
- };
- uword desc_user_addr;
- uword used_user_addr;
- uword avail_user_addr;
- f64 int_deadline;
- u8 started;
- u8 enabled;
- u8 log_used;
- clib_spinlock_t vring_lock;
-
- //Put non-runtime in a different cache line
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
- int errfd;
- u32 callfd_idx;
- u32 kickfd_idx;
- u64 log_guest_addr;
-
- /* The rx queue policy (interrupt/adaptive/polling) for this queue */
- u32 mode;
-
- /*
- * It contains the device queue number. -1 if it does not. The idea is
- * to not invoke vnet_hw_interface_assign_rx_thread and
- * vnet_hw_interface_unassign_rx_thread more than once for the duration of
- * the interface even if it is disconnected and reconnected.
- */
- i16 qid;
-
- u16 used_wrap_counter;
- u16 avail_wrap_counter;
- u16 last_kick;
- u8 first_kick;
- u32 queue_index;
- u32 thread_index;
-} vhost_user_vring_t;
-
-#define VHOST_USER_EVENT_START_TIMER 1
-#define VHOST_USER_EVENT_STOP_TIMER 2
-
-typedef struct
-{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- u32 is_ready;
- u32 admin_up;
- u32 unix_server_index;
- u32 clib_file_index;
- char sock_filename[256];
- int sock_errno;
- uword if_index;
- u32 hw_if_index, sw_if_index;
-
- //Feature negotiation
- u64 features;
- u64 feature_mask;
- u64 protocol_features;
-
- //Memory region information
- u32 nregions;
- vhost_user_memory_region_t regions[VHOST_MEMORY_MAX_NREGIONS];
- void *region_mmap_addr[VHOST_MEMORY_MAX_NREGIONS];
- u64 region_guest_addr_lo[VHOST_MEMORY_MAX_NREGIONS];
- u64 region_guest_addr_hi[VHOST_MEMORY_MAX_NREGIONS];
- u32 region_mmap_fd[VHOST_MEMORY_MAX_NREGIONS];
-
- //Virtual rings
- vhost_user_vring_t *vrings;
-
- /*
- * vrings is a dynamic array. It may have more elements than it is
- * currently used. num_qid indicates the current total qid's in the
- * vrings. For example, vec_len(vrings) = 64, num_qid = 60, so the
- * current valid/used qid is (0, 59) in the vrings array.
- */
- u32 num_qid;
-
- int virtio_net_hdr_sz;
- int is_any_layout;
-
- void *log_base_addr;
- u64 log_size;
-
- u8 enable_gso;
-
- /* Packed ring configured */
- u8 enable_packed;
-
- u8 enable_event_idx;
-} vhost_user_intf_t;
-
-#define FOR_ALL_VHOST_TXQ(qid, vui) for (qid = 1; qid < vui->num_qid; qid += 2)
-
-#define FOR_ALL_VHOST_RXQ(qid, vui) for (qid = 0; qid < vui->num_qid; qid += 2)
-
-#define FOR_ALL_VHOST_RX_TXQ(qid, vui) for (qid = 0; qid < vui->num_qid; qid++)
-
-typedef struct
-{
- uword dst;
- uword src;
- u32 len;
-} vhost_copy_t;
-
-typedef struct
-{
- u16 qid; /** The interface queue index (Not the virtio vring idx) */
- u16 device_index; /** The device index */
- u32 virtio_ring_flags; /** Runtime queue flags **/
- u16 first_desc_len; /** Length of the first data descriptor **/
- virtio_net_hdr_mrg_rxbuf_t hdr; /** Virtio header **/
-} vhost_trace_t;
-
-#define VHOST_USER_RX_BUFFERS_N (2 * VLIB_FRAME_SIZE + 2)
-#define VHOST_USER_COPY_ARRAY_N (4 * VLIB_FRAME_SIZE)
-
-typedef struct
-{
- u32 rx_buffers_len;
- u32 rx_buffers[VHOST_USER_RX_BUFFERS_N];
-
- virtio_net_hdr_mrg_rxbuf_t tx_headers[VLIB_FRAME_SIZE];
- vhost_copy_t copy[VHOST_USER_COPY_ARRAY_N];
-
- /* This is here so it doesn't end-up
- * using stack or registers. */
- vhost_trace_t *current_trace;
-
- u32 *to_next_list;
- vlib_buffer_t **rx_buffers_pdesc;
- u32 polling_q_count;
-} vhost_cpu_t;
-
-typedef struct
-{
- mhash_t if_index_by_sock_name;
- u32 mtu_bytes;
- vhost_user_intf_t *vhost_user_interfaces;
- u32 *show_dev_instance_by_real_dev_instance;
- u32 coalesce_frames;
- f64 coalesce_time;
- int dont_dump_vhost_user_memory;
-
- /** Per-CPU data for vhost-user */
- vhost_cpu_t *cpus;
-
- /** Pseudo random iterator */
- u32 random;
-
- /* The number of rx interface/queue pairs in interrupt mode */
- u32 ifq_count;
-
- /* logging */
- vlib_log_class_t log_default;
-
- /* gso interface count */
- u32 gso_count;
-} vhost_user_main_t;
-
-typedef struct
-{
- u8 if_name[64];
- u32 sw_if_index;
- u32 virtio_net_hdr_sz;
- u64 features;
- u8 is_server;
- u8 sock_filename[256];
- u32 num_regions;
- int sock_errno;
-} vhost_user_intf_details_t;
-
-int vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
- vhost_user_intf_details_t ** out_vuids);
-void vhost_user_set_operation_mode (vhost_user_intf_t *vui,
- vhost_user_vring_t *txvq);
-
-extern vlib_node_registration_t vhost_user_send_interrupt_node;
-extern vnet_device_class_t vhost_user_device_class;
-extern vlib_node_registration_t vhost_user_input_node;
-extern vhost_user_main_t vhost_user_main;
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/virtio/vhost_user_api.c b/src/vnet/devices/virtio/vhost_user_api.c
deleted file mode 100644
index df6768d4cde..00000000000
--- a/src/vnet/devices/virtio/vhost_user_api.c
+++ /dev/null
@@ -1,351 +0,0 @@
-/*
- *------------------------------------------------------------------
- * vhost-user_api.c - vhost-user api
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <vnet/vnet.h>
-#include <vlibmemory/api.h>
-
-#include <vnet/interface.h>
-#include <vnet/api_errno.h>
-#include <vnet/devices/virtio/vhost_user.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/ethernet/ethernet_types_api.h>
-#include <vnet/devices/virtio/virtio_types_api.h>
-
-#include <vnet/format_fns.h>
-#include <vnet/devices/virtio/vhost_user.api_enum.h>
-#include <vnet/devices/virtio/vhost_user.api_types.h>
-
-#define REPLY_MSG_ID_BASE msg_id_base
-#include <vlibapi/api_helper_macros.h>
-
-static u16 msg_id_base;
-
-static void
-vl_api_create_vhost_user_if_t_handler (vl_api_create_vhost_user_if_t * mp)
-{
- int rv = 0;
- vl_api_create_vhost_user_if_reply_t *rmp;
- vnet_main_t *vnm = vnet_get_main ();
- vlib_main_t *vm = vlib_get_main ();
- u64 disabled_features = (u64) (0ULL);
- vhost_user_create_if_args_t args = { 0 };
-
- args.sw_if_index = (u32) ~ 0;
- args.feature_mask = (u64) ~ (0ULL);
- if (mp->disable_mrg_rxbuf)
- disabled_features = VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF);
-
- if (mp->disable_indirect_desc)
- disabled_features |= VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC);
-
- /*
- * GSO and PACKED are not supported by feature mask via binary API. We
- * disable GSO and PACKED feature in the feature mask. They may be enabled
- * explicitly via enable_gso and enable_packed argument
- */
- disabled_features |= FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS |
- VIRTIO_FEATURE (VIRTIO_F_RING_PACKED);
-
- /* EVENT_IDX is disabled by default */
- disabled_features |= VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX);
- args.feature_mask &= ~disabled_features;
-
- if (mp->use_custom_mac)
- mac_address_decode (mp->mac_address, (mac_address_t *) args.hwaddr);
-
- args.use_custom_mac = mp->use_custom_mac;
- args.is_server = mp->is_server;
- args.sock_filename = (char *) mp->sock_filename;
- args.renumber = mp->renumber;
- args.custom_dev_instance = ntohl (mp->custom_dev_instance);
- args.enable_gso = mp->enable_gso;
- args.enable_packed = mp->enable_packed;
- rv = vhost_user_create_if (vnm, vm, &args);
-
- /* Remember an interface tag for the new interface */
- if (rv == 0)
- {
- /* If a tag was supplied... */
- if (mp->tag[0])
- {
- /* Make sure it's a proper C-string */
- mp->tag[ARRAY_LEN (mp->tag) - 1] = 0;
- u8 *tag = format (0, "%s%c", mp->tag, 0);
- vnet_set_sw_interface_tag (vnm, tag, args.sw_if_index);
- }
- }
-
- /* *INDENT-OFF* */
- REPLY_MACRO2(VL_API_CREATE_VHOST_USER_IF_REPLY,
- ({
- rmp->sw_if_index = ntohl (args.sw_if_index);
- }));
- /* *INDENT-ON* */
-}
-
-static void
-vl_api_modify_vhost_user_if_t_handler (vl_api_modify_vhost_user_if_t * mp)
-{
- int rv = 0;
- vl_api_modify_vhost_user_if_reply_t *rmp;
- u64 disabled_features = (u64) (0ULL);
- vhost_user_create_if_args_t args = { 0 };
- vnet_main_t *vnm = vnet_get_main ();
- vlib_main_t *vm = vlib_get_main ();
-
- args.feature_mask = (u64) ~ (0ULL);
- /*
- * GSO and PACKED are not supported by feature mask via binary API. We
- * disable GSO and PACKED feature in the feature mask. They may be enabled
- * explicitly via enable_gso and enable_packed argument
- */
- disabled_features |= FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS |
- VIRTIO_FEATURE (VIRTIO_F_RING_PACKED);
-
- /* EVENT_IDX is disabled by default */
- disabled_features |= VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX);
- args.feature_mask &= ~disabled_features;
-
- args.sw_if_index = ntohl (mp->sw_if_index);
- args.sock_filename = (char *) mp->sock_filename;
- args.is_server = mp->is_server;
- args.renumber = mp->renumber;
- args.custom_dev_instance = ntohl (mp->custom_dev_instance);
- args.enable_gso = mp->enable_gso;
- args.enable_packed = mp->enable_packed;
- rv = vhost_user_modify_if (vnm, vm, &args);
-
- REPLY_MACRO (VL_API_MODIFY_VHOST_USER_IF_REPLY);
-}
-
-static void
-vl_api_create_vhost_user_if_v2_t_handler (vl_api_create_vhost_user_if_v2_t *
- mp)
-{
- int rv = 0;
- vl_api_create_vhost_user_if_v2_reply_t *rmp;
- vnet_main_t *vnm = vnet_get_main ();
- vlib_main_t *vm = vlib_get_main ();
- u64 disabled_features = (u64) (0ULL);
- vhost_user_create_if_args_t args = { 0 };
-
- args.sw_if_index = (u32) ~ 0;
- args.feature_mask = (u64) ~ (0ULL);
- if (mp->disable_mrg_rxbuf)
- disabled_features = VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF);
-
- if (mp->disable_indirect_desc)
- disabled_features |= VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC);
-
- /*
- * GSO and PACKED are not supported by feature mask via binary API. We
- * disable GSO and PACKED feature in the feature mask. They may be enabled
- * explicitly via enable_gso and enable_packed argument
- */
- disabled_features |= FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS |
- VIRTIO_FEATURE (VIRTIO_F_RING_PACKED);
-
- /* EVENT_IDX is disabled by default */
- disabled_features |= VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX);
- args.feature_mask &= ~disabled_features;
-
- if (mp->use_custom_mac)
- mac_address_decode (mp->mac_address, (mac_address_t *) args.hwaddr);
-
- args.is_server = mp->is_server;
- args.sock_filename = (char *) mp->sock_filename;
- args.renumber = mp->renumber;
- args.custom_dev_instance = ntohl (mp->custom_dev_instance);
- args.enable_gso = mp->enable_gso;
- args.enable_packed = mp->enable_packed;
- args.enable_event_idx = mp->enable_event_idx;
- rv = vhost_user_create_if (vnm, vm, &args);
-
- /* Remember an interface tag for the new interface */
- if (rv == 0)
- {
- /* If a tag was supplied... */
- if (mp->tag[0])
- {
- /* Make sure it's a proper C-string */
- mp->tag[ARRAY_LEN (mp->tag) - 1] = 0;
- u8 *tag = format (0, "%s%c", mp->tag, 0);
- vnet_set_sw_interface_tag (vnm, tag, args.sw_if_index);
- }
- }
-
- /* *INDENT-OFF* */
- REPLY_MACRO2(VL_API_CREATE_VHOST_USER_IF_V2_REPLY,
- ({
- rmp->sw_if_index = ntohl (args.sw_if_index);
- }));
- /* *INDENT-ON* */
-}
-
-static void
-vl_api_modify_vhost_user_if_v2_t_handler (vl_api_modify_vhost_user_if_v2_t *
- mp)
-{
- int rv = 0;
- vl_api_modify_vhost_user_if_v2_reply_t *rmp;
- u64 disabled_features = (u64) (0ULL);
- vhost_user_create_if_args_t args = { 0 };
- vnet_main_t *vnm = vnet_get_main ();
- vlib_main_t *vm = vlib_get_main ();
-
- args.feature_mask = (u64) ~ (0ULL);
- /*
- * GSO and PACKED are not supported by feature mask via binary API. We
- * disable GSO and PACKED feature in the feature mask. They may be enabled
- * explicitly via enable_gso and enable_packed argument
- */
- disabled_features |= FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS |
- VIRTIO_FEATURE (VIRTIO_F_RING_PACKED);
-
- /* EVENT_IDX is disabled by default */
- disabled_features |= VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX);
- args.feature_mask &= ~disabled_features;
-
- args.sw_if_index = ntohl (mp->sw_if_index);
- args.sock_filename = (char *) mp->sock_filename;
- args.is_server = mp->is_server;
- args.renumber = mp->renumber;
- args.custom_dev_instance = ntohl (mp->custom_dev_instance);
- args.enable_gso = mp->enable_gso;
- args.enable_packed = mp->enable_packed;
- args.enable_event_idx = mp->enable_event_idx;
- rv = vhost_user_modify_if (vnm, vm, &args);
-
- REPLY_MACRO (VL_API_MODIFY_VHOST_USER_IF_V2_REPLY);
-}
-
-static void
-vl_api_delete_vhost_user_if_t_handler (vl_api_delete_vhost_user_if_t * mp)
-{
- int rv = 0;
- vl_api_delete_vhost_user_if_reply_t *rmp;
- u32 sw_if_index = ntohl (mp->sw_if_index);
- vl_api_registration_t *reg;
-
- vnet_main_t *vnm = vnet_get_main ();
- vlib_main_t *vm = vlib_get_main ();
-
- rv = vhost_user_delete_if (vnm, vm, sw_if_index);
-
- REPLY_MACRO (VL_API_DELETE_VHOST_USER_IF_REPLY);
- if (!rv)
- {
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- vnet_clear_sw_interface_tag (vnm, sw_if_index);
- }
-}
-
-static void
-send_sw_interface_vhost_user_details (vpe_api_main_t * am,
- vl_api_registration_t * reg,
- vhost_user_intf_details_t * vui,
- u32 context)
-{
- vl_api_sw_interface_vhost_user_details_t *mp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id =
- ntohs (REPLY_MSG_ID_BASE + VL_API_SW_INTERFACE_VHOST_USER_DETAILS);
- mp->sw_if_index = ntohl (vui->sw_if_index);
- mp->virtio_net_hdr_sz = ntohl (vui->virtio_net_hdr_sz);
- virtio_features_encode (vui->features, (u32 *) & mp->features_first_32,
- (u32 *) & mp->features_last_32);
- mp->is_server = vui->is_server;
- mp->num_regions = ntohl (vui->num_regions);
- mp->sock_errno = ntohl (vui->sock_errno);
- mp->context = context;
-
- strncpy ((char *) mp->sock_filename,
- (char *) vui->sock_filename, ARRAY_LEN (mp->sock_filename) - 1);
- strncpy ((char *) mp->interface_name,
- (char *) vui->if_name, ARRAY_LEN (mp->interface_name) - 1);
-
- vl_api_send_msg (reg, (u8 *) mp);
-}
-
-static void
- vl_api_sw_interface_vhost_user_dump_t_handler
- (vl_api_sw_interface_vhost_user_dump_t * mp)
-{
- int rv = 0;
- vpe_api_main_t *am = &vpe_api_main;
- vnet_main_t *vnm = vnet_get_main ();
- vlib_main_t *vm = vlib_get_main ();
- vhost_user_intf_details_t *ifaces = NULL;
- vhost_user_intf_details_t *vuid = NULL;
- vl_api_registration_t *reg;
- u32 filter_sw_if_index;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- filter_sw_if_index = htonl (mp->sw_if_index);
- if (filter_sw_if_index != ~0)
- VALIDATE_SW_IF_INDEX (mp);
-
- rv = vhost_user_dump_ifs (vnm, vm, &ifaces);
- if (rv)
- return;
-
- vec_foreach (vuid, ifaces)
- {
- if ((filter_sw_if_index == ~0) ||
- (vuid->sw_if_index == filter_sw_if_index))
- send_sw_interface_vhost_user_details (am, reg, vuid, mp->context);
- }
- BAD_SW_IF_INDEX_LABEL;
- vec_free (ifaces);
-}
-
-#include <vnet/devices/virtio/vhost_user.api.c>
-static clib_error_t *
-vhost_user_api_hookup (vlib_main_t * vm)
-{
- api_main_t *am = vlibapi_get_main ();
- /* Mark CREATE_VHOST_USER_IF as mp safe */
- am->is_mp_safe[VL_API_CREATE_VHOST_USER_IF] = 1;
- am->is_mp_safe[VL_API_CREATE_VHOST_USER_IF_V2] = 1;
-
- /*
- * Set up the (msg_name, crc, message-id) table
- */
- REPLY_MSG_ID_BASE = setup_message_id_table ();
-
- return 0;
-}
-
-VLIB_API_INIT_FUNCTION (vhost_user_api_hookup);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/virtio/vhost_user_inline.h b/src/vnet/devices/virtio/vhost_user_inline.h
deleted file mode 100644
index 5297453c317..00000000000
--- a/src/vnet/devices/virtio/vhost_user_inline.h
+++ /dev/null
@@ -1,493 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __VIRTIO_VHOST_USER_INLINE_H__
-#define __VIRTIO_VHOST_USER_INLINE_H__
-/* vhost-user inline functions */
-#include <vppinfra/elog.h>
-
-static_always_inline void *
-map_guest_mem (vhost_user_intf_t * vui, uword addr, u32 * hint)
-{
- int i = *hint;
- if (PREDICT_TRUE ((vui->regions[i].guest_phys_addr <= addr) &&
- ((vui->regions[i].guest_phys_addr +
- vui->regions[i].memory_size) > addr)))
- {
- return (void *) (vui->region_mmap_addr[i] + addr -
- vui->regions[i].guest_phys_addr);
- }
-#if __SSE4_2__
- __m128i rl, rh, al, ah, r;
- al = _mm_set1_epi64x (addr + 1);
- ah = _mm_set1_epi64x (addr);
-
- rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[0]);
- rl = _mm_cmpgt_epi64 (al, rl);
- rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[0]);
- rh = _mm_cmpgt_epi64 (rh, ah);
- r = _mm_and_si128 (rl, rh);
-
- rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[2]);
- rl = _mm_cmpgt_epi64 (al, rl);
- rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[2]);
- rh = _mm_cmpgt_epi64 (rh, ah);
- r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x22);
-
- rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[4]);
- rl = _mm_cmpgt_epi64 (al, rl);
- rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[4]);
- rh = _mm_cmpgt_epi64 (rh, ah);
- r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x44);
-
- rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[6]);
- rl = _mm_cmpgt_epi64 (al, rl);
- rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[6]);
- rh = _mm_cmpgt_epi64 (rh, ah);
- r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x88);
-
- r = _mm_shuffle_epi8 (r, _mm_set_epi64x (0, 0x0e060c040a020800));
- i = count_trailing_zeros (_mm_movemask_epi8 (r) |
- (1 << VHOST_MEMORY_MAX_NREGIONS));
-
- if (i < vui->nregions)
- {
- *hint = i;
- return (void *) (vui->region_mmap_addr[i] + addr -
- vui->regions[i].guest_phys_addr);
- }
-#elif __aarch64__ && __ARM_NEON
- uint64x2_t al, ah, rl, rh, r;
- uint32_t u32 = 0;
-
- al = vdupq_n_u64 (addr + 1);
- ah = vdupq_n_u64 (addr);
-
- /*First Iteration */
- rl = vld1q_u64 (&vui->region_guest_addr_lo[0]);
- rl = vcgtq_u64 (al, rl);
- rh = vld1q_u64 (&vui->region_guest_addr_hi[0]);
- rh = vcgtq_u64 (rh, ah);
- r = vandq_u64 (rl, rh);
- u32 |= (vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1);
- u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 1);
-
- if (u32)
- {
- i = count_trailing_zeros (u32);
- goto vhost_map_guest_mem_done;
- }
-
- /*Second Iteration */
- rl = vld1q_u64 (&vui->region_guest_addr_lo[2]);
- rl = vcgtq_u64 (al, rl);
- rh = vld1q_u64 (&vui->region_guest_addr_hi[2]);
- rh = vcgtq_u64 (rh, ah);
- r = vandq_u64 (rl, rh);
- u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 2);
- u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 3);
-
- if (u32)
- {
- i = count_trailing_zeros (u32);
- goto vhost_map_guest_mem_done;
- }
-
- /*Third Iteration */
- rl = vld1q_u64 (&vui->region_guest_addr_lo[4]);
- rl = vcgtq_u64 (al, rl);
- rh = vld1q_u64 (&vui->region_guest_addr_hi[4]);
- rh = vcgtq_u64 (rh, ah);
- r = vandq_u64 (rl, rh);
- u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 6);
- u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 7);
-
- i = count_trailing_zeros (u32 | (1 << VHOST_MEMORY_MAX_NREGIONS));
-
-vhost_map_guest_mem_done:
- if (i < vui->nregions)
- {
- *hint = i;
- return (void *) (vui->region_mmap_addr[i] + addr -
- vui->regions[i].guest_phys_addr);
- }
-#else
- for (i = 0; i < vui->nregions; i++)
- {
- if ((vui->regions[i].guest_phys_addr <= addr) &&
- ((vui->regions[i].guest_phys_addr + vui->regions[i].memory_size) >
- addr))
- {
- *hint = i;
- return (void *) (vui->region_mmap_addr[i] + addr -
- vui->regions[i].guest_phys_addr);
- }
- }
-#endif
- /* *INDENT-OFF* */
- ELOG_TYPE_DECLARE (el) =
- {
- .format = "failed to map guest mem addr %lx",
- .format_args = "i8",
- };
- /* *INDENT-ON* */
- struct
- {
- uword addr;
- } *ed;
- ed = ELOG_DATA (&vlib_global_main.elog_main, el);
- ed->addr = addr;
- *hint = 0;
- return 0;
-}
-
-static_always_inline void *
-map_user_mem (vhost_user_intf_t * vui, uword addr)
-{
- int i;
- for (i = 0; i < vui->nregions; i++)
- {
- if ((vui->regions[i].userspace_addr <= addr) &&
- ((vui->regions[i].userspace_addr + vui->regions[i].memory_size) >
- addr))
- {
- return (void *) (vui->region_mmap_addr[i] + addr -
- vui->regions[i].userspace_addr);
- }
- }
- return 0;
-}
-
-#define VHOST_LOG_PAGE 0x1000
-
-static_always_inline void
-vhost_user_log_dirty_pages_2 (vhost_user_intf_t * vui,
- u64 addr, u64 len, u8 is_host_address)
-{
- if (PREDICT_TRUE (vui->log_base_addr == 0
- || !(vui->features & VIRTIO_FEATURE (VHOST_F_LOG_ALL))))
- {
- return;
- }
- if (is_host_address)
- {
- addr = pointer_to_uword (map_user_mem (vui, (uword) addr));
- }
- if (PREDICT_FALSE ((addr + len - 1) / VHOST_LOG_PAGE / 8 >= vui->log_size))
- {
- vu_log_debug (vui, "vhost_user_log_dirty_pages(): out of range\n");
- return;
- }
-
- CLIB_MEMORY_BARRIER ();
- u64 page = addr / VHOST_LOG_PAGE;
- while (page * VHOST_LOG_PAGE < addr + len)
- {
- ((u8 *) vui->log_base_addr)[page / 8] |= 1 << page % 8;
- page++;
- }
-}
-
-
-#define vhost_user_log_dirty_ring(vui, vq, member) \
- if (PREDICT_FALSE(vq->log_used)) { \
- vhost_user_log_dirty_pages_2(vui, vq->log_guest_addr + STRUCT_OFFSET_OF(vring_used_t, member), \
- sizeof(vq->used->member), 0); \
- }
-
-static_always_inline u8 *
-format_vhost_trace (u8 * s, va_list * va)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
- CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main ();
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_trace_t *t = va_arg (*va, vhost_trace_t *);
- vhost_user_intf_t *vui = vum->vhost_user_interfaces + t->device_index;
- vnet_sw_interface_t *sw;
- u32 indent;
-
- if (pool_is_free (vum->vhost_user_interfaces, vui))
- {
- s = format (s, "vhost-user interface is deleted");
- return s;
- }
- sw = vnet_get_sw_interface (vnm, vui->sw_if_index);
- indent = format_get_indent (s);
- s = format (s, "%U %U queue %d\n", format_white_space, indent,
- format_vnet_sw_interface_name, vnm, sw, t->qid);
-
- s = format (s, "%U virtio flags:\n", format_white_space, indent);
-#define _(n,i,st) \
- if (t->virtio_ring_flags & (1 << VIRTIO_TRACE_F_##n)) \
- s = format (s, "%U %s %s\n", format_white_space, indent, #n, st);
- foreach_virtio_trace_flags
-#undef _
- s = format (s, "%U virtio_net_hdr first_desc_len %u\n",
- format_white_space, indent, t->first_desc_len);
-
- s = format (s, "%U flags 0x%02x gso_type %u\n",
- format_white_space, indent,
- t->hdr.hdr.flags, t->hdr.hdr.gso_type);
-
- if (vui->virtio_net_hdr_sz == 12)
- s = format (s, "%U num_buff %u",
- format_white_space, indent, t->hdr.num_buffers);
-
- return s;
-}
-
-static_always_inline u64
-vhost_user_is_packed_ring_supported (vhost_user_intf_t * vui)
-{
- return (vui->features & VIRTIO_FEATURE (VIRTIO_F_RING_PACKED));
-}
-
-static_always_inline u64
-vhost_user_is_event_idx_supported (vhost_user_intf_t * vui)
-{
- return (vui->features & VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX));
-}
-
-static_always_inline void
-vhost_user_kick (vlib_main_t * vm, vhost_user_vring_t * vq)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- u64 x = 1;
- int fd = UNIX_GET_FD (vq->callfd_idx);
- int rv;
-
- rv = write (fd, &x, sizeof (x));
- if (PREDICT_FALSE (rv <= 0))
- {
- clib_unix_warning
- ("Error: Could not write to unix socket for callfd %d", fd);
- return;
- }
-
- vq->n_since_last_int = 0;
- vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time;
-}
-
-static_always_inline u16
-vhost_user_avail_event_idx (vhost_user_vring_t * vq)
-{
- volatile u16 *event_idx = (u16 *) & (vq->used->ring[vq->qsz_mask + 1]);
-
- return *event_idx;
-}
-
-static_always_inline u16
-vhost_user_used_event_idx (vhost_user_vring_t * vq)
-{
- volatile u16 *event_idx = (u16 *) & (vq->avail->ring[vq->qsz_mask + 1]);
-
- return *event_idx;
-}
-
-static_always_inline u16
-vhost_user_need_event (u16 event_idx, u16 new_idx, u16 old_idx)
-{
- return ((u16) (new_idx - event_idx - 1) < (u16) (new_idx - old_idx));
-}
-
-static_always_inline void
-vhost_user_send_call_event_idx (vlib_main_t * vm, vhost_user_vring_t * vq)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- u8 first_kick = vq->first_kick;
- u16 event_idx = vhost_user_used_event_idx (vq);
-
- vq->first_kick = 1;
- if (vhost_user_need_event (event_idx, vq->last_used_idx, vq->last_kick) ||
- PREDICT_FALSE (!first_kick))
- {
- vhost_user_kick (vm, vq);
- vq->last_kick = event_idx;
- }
- else
- {
- vq->n_since_last_int = 0;
- vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time;
- }
-}
-
-static_always_inline void
-vhost_user_send_call_event_idx_packed (vlib_main_t * vm,
- vhost_user_vring_t * vq)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- u8 first_kick = vq->first_kick;
- u16 off_wrap;
- u16 event_idx;
- u16 new_idx = vq->last_used_idx;
- u16 old_idx = vq->last_kick;
-
- if (PREDICT_TRUE (vq->avail_event->flags == VRING_EVENT_F_DESC))
- {
- CLIB_COMPILER_BARRIER ();
- off_wrap = vq->avail_event->off_wrap;
- event_idx = off_wrap & 0x7fff;
- if (vq->used_wrap_counter != (off_wrap >> 15))
- event_idx -= (vq->qsz_mask + 1);
-
- if (new_idx <= old_idx)
- old_idx -= (vq->qsz_mask + 1);
-
- vq->first_kick = 1;
- vq->last_kick = event_idx;
- if (vhost_user_need_event (event_idx, new_idx, old_idx) ||
- PREDICT_FALSE (!first_kick))
- vhost_user_kick (vm, vq);
- else
- {
- vq->n_since_last_int = 0;
- vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time;
- }
- }
- else
- vhost_user_kick (vm, vq);
-}
-
-static_always_inline void
-vhost_user_send_call (vlib_main_t * vm, vhost_user_intf_t * vui,
- vhost_user_vring_t * vq)
-{
- if (vhost_user_is_event_idx_supported (vui))
- {
- if (vhost_user_is_packed_ring_supported (vui))
- vhost_user_send_call_event_idx_packed (vm, vq);
- else
- vhost_user_send_call_event_idx (vm, vq);
- }
- else
- vhost_user_kick (vm, vq);
-}
-
-static_always_inline u8
-vui_is_link_up (vhost_user_intf_t * vui)
-{
- return vui->admin_up && vui->is_ready;
-}
-
-static_always_inline void
-vhost_user_update_gso_interface_count (vhost_user_intf_t * vui, u8 add)
-{
- vhost_user_main_t *vum = &vhost_user_main;
-
- if (vui->enable_gso)
- {
- if (add)
- {
- vum->gso_count++;
- }
- else
- {
- ASSERT (vum->gso_count > 0);
- vum->gso_count--;
- }
- }
-}
-
-static_always_inline u8
-vhost_user_packed_desc_available (vhost_user_vring_t * vring, u16 idx)
-{
- return (((vring->packed_desc[idx].flags & VRING_DESC_F_AVAIL) ==
- vring->avail_wrap_counter));
-}
-
-static_always_inline void
-vhost_user_advance_last_avail_idx (vhost_user_vring_t * vring)
-{
- vring->last_avail_idx++;
- if (PREDICT_FALSE ((vring->last_avail_idx & vring->qsz_mask) == 0))
- {
- vring->avail_wrap_counter ^= VRING_DESC_F_AVAIL;
- vring->last_avail_idx = 0;
- }
-}
-
-static_always_inline void
-vhost_user_advance_last_avail_table_idx (vhost_user_intf_t * vui,
- vhost_user_vring_t * vring,
- u8 chained)
-{
- if (chained)
- {
- vring_packed_desc_t *desc_table = vring->packed_desc;
-
- /* pick up the slot of the next avail idx */
- while (desc_table[vring->last_avail_idx & vring->qsz_mask].flags &
- VRING_DESC_F_NEXT)
- vhost_user_advance_last_avail_idx (vring);
- }
-
- vhost_user_advance_last_avail_idx (vring);
-}
-
-static_always_inline void
-vhost_user_undo_advanced_last_avail_idx (vhost_user_vring_t * vring)
-{
- if (PREDICT_FALSE ((vring->last_avail_idx & vring->qsz_mask) == 0))
- vring->avail_wrap_counter ^= VRING_DESC_F_AVAIL;
-
- if (PREDICT_FALSE (vring->last_avail_idx == 0))
- vring->last_avail_idx = vring->qsz_mask;
- else
- vring->last_avail_idx--;
-}
-
-static_always_inline void
-vhost_user_dequeue_descs (vhost_user_vring_t * rxvq,
- virtio_net_hdr_mrg_rxbuf_t * hdr,
- u16 * n_descs_processed)
-{
- u16 i;
-
- *n_descs_processed -= (hdr->num_buffers - 1);
- for (i = 0; i < hdr->num_buffers - 1; i++)
- vhost_user_undo_advanced_last_avail_idx (rxvq);
-}
-
-static_always_inline void
-vhost_user_dequeue_chained_descs (vhost_user_vring_t * rxvq,
- u16 * n_descs_processed)
-{
- while (*n_descs_processed)
- {
- vhost_user_undo_advanced_last_avail_idx (rxvq);
- (*n_descs_processed)--;
- }
-}
-
-static_always_inline void
-vhost_user_advance_last_used_idx (vhost_user_vring_t * vring)
-{
- vring->last_used_idx++;
- if (PREDICT_FALSE ((vring->last_used_idx & vring->qsz_mask) == 0))
- {
- vring->used_wrap_counter ^= 1;
- vring->last_used_idx = 0;
- }
-}
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/virtio/vhost_user_input.c b/src/vnet/devices/virtio/vhost_user_input.c
deleted file mode 100644
index bdb3d27245b..00000000000
--- a/src/vnet/devices/virtio/vhost_user_input.c
+++ /dev/null
@@ -1,1473 +0,0 @@
-/*
- *------------------------------------------------------------------
- * vhost-user-input
- *
- * Copyright (c) 2014-2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <fcntl.h> /* for open */
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/uio.h> /* for iovec */
-#include <netinet/in.h>
-#include <sys/vfs.h>
-
-#include <linux/if_arp.h>
-#include <linux/if_tun.h>
-
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/devices/devices.h>
-#include <vnet/feature/feature.h>
-#include <vnet/udp/udp_packet.h>
-#include <vnet/interface/rx_queue_funcs.h>
-
-#include <vnet/devices/virtio/vhost_user.h>
-#include <vnet/devices/virtio/vhost_user_inline.h>
-
-#include <vnet/ip/ip4_packet.h>
-#include <vnet/ip/ip6_packet.h>
-
-/*
- * When an RX queue is down but active, received packets
- * must be discarded. This value controls up to how many
- * packets will be discarded during each round.
- */
-#define VHOST_USER_DOWN_DISCARD_COUNT 256
-
-/*
- * When the number of available buffers gets under this threshold,
- * RX node will start discarding packets.
- */
-#define VHOST_USER_RX_BUFFER_STARVATION 32
-
-/*
- * On the receive side, the host should free descriptors as soon
- * as possible in order to avoid TX drop in the VM.
- * This value controls the number of copy operations that are stacked
- * before copy is done for all and descriptors are given back to
- * the guest.
- * The value 64 was obtained by testing (48 and 128 were not as good).
- */
-#define VHOST_USER_RX_COPY_THRESHOLD 64
-
-extern vlib_node_registration_t vhost_user_input_node;
-
-#define foreach_vhost_user_input_func_error \
- _(NO_ERROR, "no error") \
- _(NO_BUFFER, "no available buffer") \
- _(MMAP_FAIL, "mmap failure") \
- _(INDIRECT_OVERFLOW, "indirect descriptor overflows table") \
- _(UNDERSIZED_FRAME, "undersized ethernet frame received (< 14 bytes)") \
- _(NOT_READY, "vhost interface not ready or down") \
- _(FULL_RX_QUEUE, "full rx queue (possible driver tx drop)")
-
-typedef enum
-{
-#define _(f,s) VHOST_USER_INPUT_FUNC_ERROR_##f,
- foreach_vhost_user_input_func_error
-#undef _
- VHOST_USER_INPUT_FUNC_N_ERROR,
-} vhost_user_input_func_error_t;
-
-static __clib_unused char *vhost_user_input_func_error_strings[] = {
-#define _(n,s) s,
- foreach_vhost_user_input_func_error
-#undef _
-};
-
-static_always_inline void
-vhost_user_rx_trace (vhost_trace_t * t,
- vhost_user_intf_t * vui, u16 qid,
- vlib_buffer_t * b, vhost_user_vring_t * txvq,
- u16 last_avail_idx)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- u32 desc_current = txvq->avail->ring[last_avail_idx & txvq->qsz_mask];
- vring_desc_t *hdr_desc = 0;
- virtio_net_hdr_mrg_rxbuf_t *hdr;
- u32 hint = 0;
-
- clib_memset (t, 0, sizeof (*t));
- t->device_index = vui - vum->vhost_user_interfaces;
- t->qid = qid;
-
- hdr_desc = &txvq->desc[desc_current];
- if (txvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT)
- {
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
- /* Header is the first here */
- hdr_desc = map_guest_mem (vui, txvq->desc[desc_current].addr, &hint);
- }
- if (txvq->desc[desc_current].flags & VRING_DESC_F_NEXT)
- {
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
- }
- if (!(txvq->desc[desc_current].flags & VRING_DESC_F_NEXT) &&
- !(txvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT))
- {
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
- }
-
- t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
-
- if (!hdr_desc || !(hdr = map_guest_mem (vui, hdr_desc->addr, &hint)))
- {
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_MAP_ERROR;
- }
- else
- {
- u32 len = vui->virtio_net_hdr_sz;
- memcpy (&t->hdr, hdr, len > hdr_desc->len ? hdr_desc->len : len);
- }
-}
-
-static_always_inline u32
-vhost_user_input_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy,
- u16 copy_len, u32 * map_hint)
-{
- void *src0, *src1, *src2, *src3;
- if (PREDICT_TRUE (copy_len >= 4))
- {
- if (PREDICT_FALSE (!(src2 = map_guest_mem (vui, cpy[0].src, map_hint))))
- return 1;
- if (PREDICT_FALSE (!(src3 = map_guest_mem (vui, cpy[1].src, map_hint))))
- return 1;
-
- while (PREDICT_TRUE (copy_len >= 4))
- {
- src0 = src2;
- src1 = src3;
-
- if (PREDICT_FALSE
- (!(src2 = map_guest_mem (vui, cpy[2].src, map_hint))))
- return 1;
- if (PREDICT_FALSE
- (!(src3 = map_guest_mem (vui, cpy[3].src, map_hint))))
- return 1;
-
- clib_prefetch_load (src2);
- clib_prefetch_load (src3);
-
- clib_memcpy_fast ((void *) cpy[0].dst, src0, cpy[0].len);
- clib_memcpy_fast ((void *) cpy[1].dst, src1, cpy[1].len);
- copy_len -= 2;
- cpy += 2;
- }
- }
- while (copy_len)
- {
- if (PREDICT_FALSE (!(src0 = map_guest_mem (vui, cpy->src, map_hint))))
- return 1;
- clib_memcpy_fast ((void *) cpy->dst, src0, cpy->len);
- copy_len -= 1;
- cpy += 1;
- }
- return 0;
-}
-
-/**
- * Try to discard packets from the tx ring (VPP RX path).
- * Returns the number of discarded packets.
- */
-static_always_inline u32
-vhost_user_rx_discard_packet (vlib_main_t * vm,
- vhost_user_intf_t * vui,
- vhost_user_vring_t * txvq, u32 discard_max)
-{
- /*
- * On the RX side, each packet corresponds to one descriptor
- * (it is the same whether it is a shallow descriptor, chained, or indirect).
- * Therefore, discarding a packet is like discarding a descriptor.
- */
- u32 discarded_packets = 0;
- u32 avail_idx = txvq->avail->idx;
- u16 mask = txvq->qsz_mask;
- u16 last_avail_idx = txvq->last_avail_idx;
- u16 last_used_idx = txvq->last_used_idx;
- while (discarded_packets != discard_max)
- {
- if (avail_idx == last_avail_idx)
- goto out;
-
- u16 desc_chain_head = txvq->avail->ring[last_avail_idx & mask];
- last_avail_idx++;
- txvq->used->ring[last_used_idx & mask].id = desc_chain_head;
- txvq->used->ring[last_used_idx & mask].len = 0;
- vhost_user_log_dirty_ring (vui, txvq, ring[last_used_idx & mask]);
- last_used_idx++;
- discarded_packets++;
- }
-
-out:
- txvq->last_avail_idx = last_avail_idx;
- txvq->last_used_idx = last_used_idx;
- CLIB_MEMORY_STORE_BARRIER ();
- txvq->used->idx = txvq->last_used_idx;
- vhost_user_log_dirty_ring (vui, txvq, idx);
- return discarded_packets;
-}
-
-/*
- * In case of overflow, we need to rewind the array of allocated buffers.
- */
-static_always_inline void
-vhost_user_input_rewind_buffers (vlib_main_t * vm,
- vhost_cpu_t * cpu, vlib_buffer_t * b_head)
-{
- u32 bi_current = cpu->rx_buffers[cpu->rx_buffers_len];
- vlib_buffer_t *b_current = vlib_get_buffer (vm, bi_current);
- b_current->current_length = 0;
- b_current->flags = 0;
- while (b_current != b_head)
- {
- cpu->rx_buffers_len++;
- bi_current = cpu->rx_buffers[cpu->rx_buffers_len];
- b_current = vlib_get_buffer (vm, bi_current);
- b_current->current_length = 0;
- b_current->flags = 0;
- }
- cpu->rx_buffers_len++;
-}
-
-static_always_inline void
-vhost_user_handle_rx_offload (vlib_buffer_t * b0, u8 * b0_data,
- virtio_net_hdr_t * hdr)
-{
- u8 l4_hdr_sz = 0;
- u8 l4_proto = 0;
- ethernet_header_t *eh = (ethernet_header_t *) b0_data;
- u16 ethertype = clib_net_to_host_u16 (eh->type);
- u16 l2hdr_sz = sizeof (ethernet_header_t);
- vnet_buffer_oflags_t oflags = 0;
-
- if (ethernet_frame_is_tagged (ethertype))
- {
- ethernet_vlan_header_t *vlan = (ethernet_vlan_header_t *) (eh + 1);
-
- ethertype = clib_net_to_host_u16 (vlan->type);
- l2hdr_sz += sizeof (*vlan);
- if (ethertype == ETHERNET_TYPE_VLAN)
- {
- vlan++;
- ethertype = clib_net_to_host_u16 (vlan->type);
- l2hdr_sz += sizeof (*vlan);
- }
- }
- vnet_buffer (b0)->l2_hdr_offset = 0;
- vnet_buffer (b0)->l3_hdr_offset = l2hdr_sz;
- vnet_buffer (b0)->l4_hdr_offset = hdr->csum_start;
- b0->flags |= (VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
- VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
- VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
-
- if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP4))
- {
- ip4_header_t *ip4 = (ip4_header_t *) (b0_data + l2hdr_sz);
- l4_proto = ip4->protocol;
- b0->flags |= VNET_BUFFER_F_IS_IP4;
- oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM;
- }
- else if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP6))
- {
- ip6_header_t *ip6 = (ip6_header_t *) (b0_data + l2hdr_sz);
- l4_proto = ip6->protocol;
- b0->flags |= VNET_BUFFER_F_IS_IP6;
- }
-
- if (l4_proto == IP_PROTOCOL_TCP)
- {
- tcp_header_t *tcp = (tcp_header_t *)
- (b0_data + vnet_buffer (b0)->l4_hdr_offset);
- l4_hdr_sz = tcp_header_bytes (tcp);
- oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
- }
- else if (l4_proto == IP_PROTOCOL_UDP)
- {
- l4_hdr_sz = sizeof (udp_header_t);
- oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
- }
-
- if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP)
- {
- vnet_buffer2 (b0)->gso_size = hdr->gso_size;
- vnet_buffer2 (b0)->gso_l4_hdr_sz = l4_hdr_sz;
- b0->flags |= VNET_BUFFER_F_GSO;
- }
- else if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV4)
- {
- vnet_buffer2 (b0)->gso_size = hdr->gso_size;
- vnet_buffer2 (b0)->gso_l4_hdr_sz = l4_hdr_sz;
- b0->flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP4);
- }
- else if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV6)
- {
- vnet_buffer2 (b0)->gso_size = hdr->gso_size;
- vnet_buffer2 (b0)->gso_l4_hdr_sz = l4_hdr_sz;
- b0->flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP6);
- }
-
- if (oflags)
- vnet_buffer_offload_flags_set (b0, oflags);
-}
-
-static_always_inline void
-vhost_user_input_do_interrupt (vlib_main_t * vm, vhost_user_intf_t * vui,
- vhost_user_vring_t * txvq,
- vhost_user_vring_t * rxvq)
-{
- f64 now = vlib_time_now (vm);
-
- if ((txvq->n_since_last_int) && (txvq->int_deadline < now))
- vhost_user_send_call (vm, vui, txvq);
-
- if ((rxvq->n_since_last_int) && (rxvq->int_deadline < now))
- vhost_user_send_call (vm, vui, rxvq);
-}
-
-static_always_inline void
-vhost_user_input_setup_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
- vhost_user_intf_t * vui,
- u32 * current_config_index, u32 * next_index,
- u32 ** to_next, u32 * n_left_to_next)
-{
- vnet_feature_main_t *fm = &feature_main;
- u8 feature_arc_idx = fm->device_input_feature_arc_index;
-
- if (PREDICT_FALSE (vnet_have_features (feature_arc_idx, vui->sw_if_index)))
- {
- vnet_feature_config_main_t *cm;
- cm = &fm->feature_config_mains[feature_arc_idx];
- *current_config_index = vec_elt (cm->config_index_by_sw_if_index,
- vui->sw_if_index);
- vnet_get_config_data (&cm->config_main, current_config_index,
- next_index, 0);
- }
-
- vlib_get_new_next_frame (vm, node, *next_index, *to_next, *n_left_to_next);
-
- if (*next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT)
- {
- /* give some hints to ethernet-input */
- vlib_next_frame_t *nf;
- vlib_frame_t *f;
- ethernet_input_frame_t *ef;
- nf = vlib_node_runtime_get_next_frame (vm, node, *next_index);
- f = vlib_get_frame (vm, nf->frame);
- f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
-
- ef = vlib_frame_scalar_args (f);
- ef->sw_if_index = vui->sw_if_index;
- ef->hw_if_index = vui->hw_if_index;
- vlib_frame_no_append (f);
- }
-}
-
-static_always_inline u32
-vhost_user_if_input (vlib_main_t *vm, vhost_user_main_t *vum,
- vhost_user_intf_t *vui, u16 qid,
- vlib_node_runtime_t *node, u8 enable_csum)
-{
- vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
- vnet_feature_main_t *fm = &feature_main;
- u16 n_rx_packets = 0;
- u32 n_rx_bytes = 0;
- u16 n_left;
- u32 n_left_to_next, *to_next;
- u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
- u32 n_trace = vlib_get_trace_count (vm, node);
- u32 buffer_data_size = vlib_buffer_get_default_data_size (vm);
- u32 map_hint = 0;
- vhost_cpu_t *cpu = &vum->cpus[vm->thread_index];
- u16 copy_len = 0;
- u8 feature_arc_idx = fm->device_input_feature_arc_index;
- u32 current_config_index = ~(u32) 0;
- u16 mask = txvq->qsz_mask;
-
- /* The descriptor table is not ready yet */
- if (PREDICT_FALSE (txvq->avail == 0))
- goto done;
-
- {
- /* do we have pending interrupts ? */
- vhost_user_vring_t *rxvq = &vui->vrings[VHOST_VRING_IDX_RX (qid)];
- vhost_user_input_do_interrupt (vm, vui, txvq, rxvq);
- }
-
- /*
- * For adaptive mode, it is optimized to reduce interrupts.
- * If the scheduler switches the input node to polling due
- * to burst of traffic, we tell the driver no interrupt.
- * When the traffic subsides, the scheduler switches the node back to
- * interrupt mode. We must tell the driver we want interrupt.
- */
- if (PREDICT_FALSE (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
- {
- if ((node->flags &
- VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) ||
- !(node->flags &
- VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))
- /* Tell driver we want notification */
- txvq->used->flags = 0;
- else
- /* Tell driver we don't want notification */
- txvq->used->flags = VRING_USED_F_NO_NOTIFY;
- }
-
- if (PREDICT_FALSE (txvq->avail->flags & 0xFFFE))
- goto done;
-
- n_left = (u16) (txvq->avail->idx - txvq->last_avail_idx);
-
- /* nothing to do */
- if (PREDICT_FALSE (n_left == 0))
- goto done;
-
- if (PREDICT_FALSE (!vui->admin_up || !(txvq->enabled)))
- {
- /*
- * Discard input packet if interface is admin down or vring is not
- * enabled.
- * "For example, for a networking device, in the disabled state
- * client must not supply any new RX packets, but must process
- * and discard any TX packets."
- */
- vhost_user_rx_discard_packet (vm, vui, txvq,
- VHOST_USER_DOWN_DISCARD_COUNT);
- goto done;
- }
-
- if (PREDICT_FALSE (n_left == (mask + 1)))
- {
- /*
- * Informational error logging when VPP is not
- * receiving packets fast enough.
- */
- vlib_error_count (vm, node->node_index,
- VHOST_USER_INPUT_FUNC_ERROR_FULL_RX_QUEUE, 1);
- }
-
- if (n_left > VLIB_FRAME_SIZE)
- n_left = VLIB_FRAME_SIZE;
-
- /*
- * For small packets (<2kB), we will not need more than one vlib buffer
- * per packet. In case packets are bigger, we will just yield at some point
- * in the loop and come back later. This is not an issue as for big packet,
- * processing cost really comes from the memory copy.
- * The assumption is that big packets will fit in 40 buffers.
- */
- if (PREDICT_FALSE (cpu->rx_buffers_len < n_left + 1 ||
- cpu->rx_buffers_len < 40))
- {
- u32 curr_len = cpu->rx_buffers_len;
- cpu->rx_buffers_len +=
- vlib_buffer_alloc (vm, cpu->rx_buffers + curr_len,
- VHOST_USER_RX_BUFFERS_N - curr_len);
-
- if (PREDICT_FALSE
- (cpu->rx_buffers_len < VHOST_USER_RX_BUFFER_STARVATION))
- {
- /* In case of buffer starvation, discard some packets from the queue
- * and log the event.
- * We keep doing best effort for the remaining packets. */
- u32 flush = (n_left + 1 > cpu->rx_buffers_len) ?
- n_left + 1 - cpu->rx_buffers_len : 1;
- flush = vhost_user_rx_discard_packet (vm, vui, txvq, flush);
-
- n_left -= flush;
- vlib_increment_simple_counter (vnet_main.
- interface_main.sw_if_counters +
- VNET_INTERFACE_COUNTER_DROP,
- vm->thread_index, vui->sw_if_index,
- flush);
-
- vlib_error_count (vm, vhost_user_input_node.index,
- VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, flush);
- }
- }
-
- vhost_user_input_setup_frame (vm, node, vui, &current_config_index,
- &next_index, &to_next, &n_left_to_next);
-
- u16 last_avail_idx = txvq->last_avail_idx;
- u16 last_used_idx = txvq->last_used_idx;
-
- while (n_left > 0)
- {
- vlib_buffer_t *b_head, *b_current;
- u32 bi_current;
- u16 desc_current;
- u32 desc_data_offset;
- vring_desc_t *desc_table = txvq->desc;
-
- if (PREDICT_FALSE (cpu->rx_buffers_len <= 1))
- {
- /* Not enough rx_buffers
- * Note: We yeld on 1 so we don't need to do an additional
- * check for the next buffer prefetch.
- */
- n_left = 0;
- break;
- }
-
- desc_current = txvq->avail->ring[last_avail_idx & mask];
- cpu->rx_buffers_len--;
- bi_current = cpu->rx_buffers[cpu->rx_buffers_len];
- b_head = b_current = vlib_get_buffer (vm, bi_current);
- to_next[0] = bi_current; //We do that now so we can forget about bi_current
- to_next++;
- n_left_to_next--;
-
- vlib_prefetch_buffer_with_index
- (vm, cpu->rx_buffers[cpu->rx_buffers_len - 1], LOAD);
-
- /* Just preset the used descriptor id and length for later */
- txvq->used->ring[last_used_idx & mask].id = desc_current;
- txvq->used->ring[last_used_idx & mask].len = 0;
- vhost_user_log_dirty_ring (vui, txvq, ring[last_used_idx & mask]);
-
- /* The buffer should already be initialized */
- b_head->total_length_not_including_first_buffer = 0;
- b_head->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
-
- if (PREDICT_FALSE
- (n_trace > 0 && vlib_trace_buffer (vm, node, next_index, b_head,
- /* follow_chain */ 0)))
- {
- vhost_trace_t *t0 =
- vlib_add_trace (vm, node, b_head, sizeof (t0[0]));
- vhost_user_rx_trace (t0, vui, qid, b_head, txvq, last_avail_idx);
- n_trace--;
- vlib_set_trace_count (vm, node, n_trace);
- }
-
- /* This depends on the setup but is very consistent
- * So I think the CPU branch predictor will make a pretty good job
- * at optimizing the decision. */
- if (txvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT)
- {
- desc_table = map_guest_mem (vui, txvq->desc[desc_current].addr,
- &map_hint);
- desc_current = 0;
- if (PREDICT_FALSE (desc_table == 0))
- {
- vlib_error_count (vm, node->node_index,
- VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
- goto out;
- }
- }
-
- desc_data_offset = vui->virtio_net_hdr_sz;
-
- if (enable_csum)
- {
- virtio_net_hdr_mrg_rxbuf_t *hdr;
- u8 *b_data;
- u16 current;
-
- hdr = map_guest_mem (vui, desc_table[desc_current].addr, &map_hint);
- if (PREDICT_FALSE (hdr == 0))
- {
- vlib_error_count (vm, node->node_index,
- VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
- goto out;
- }
- if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
- {
- if ((desc_data_offset == desc_table[desc_current].len) &&
- (desc_table[desc_current].flags & VRING_DESC_F_NEXT))
- {
- current = desc_table[desc_current].next;
- b_data = map_guest_mem (vui, desc_table[current].addr,
- &map_hint);
- if (PREDICT_FALSE (b_data == 0))
- {
- vlib_error_count (vm, node->node_index,
- VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL,
- 1);
- goto out;
- }
- }
- else
- b_data = (u8 *) hdr + desc_data_offset;
-
- vhost_user_handle_rx_offload (b_head, b_data, &hdr->hdr);
- }
- }
-
- while (1)
- {
- /* Get more input if necessary. Or end of packet. */
- if (desc_data_offset == desc_table[desc_current].len)
- {
- if (PREDICT_FALSE (desc_table[desc_current].flags &
- VRING_DESC_F_NEXT))
- {
- desc_current = desc_table[desc_current].next;
- desc_data_offset = 0;
- }
- else
- {
- goto out;
- }
- }
-
- /* Get more output if necessary. Or end of packet. */
- if (PREDICT_FALSE (b_current->current_length == buffer_data_size))
- {
- if (PREDICT_FALSE (cpu->rx_buffers_len == 0))
- {
- /* Cancel speculation */
- to_next--;
- n_left_to_next++;
-
- /*
- * Checking if there are some left buffers.
- * If not, just rewind the used buffers and stop.
- * Note: Scheduled copies are not cancelled. This is
- * not an issue as they would still be valid. Useless,
- * but valid.
- */
- vhost_user_input_rewind_buffers (vm, cpu, b_head);
- n_left = 0;
- goto stop;
- }
-
- /* Get next output */
- cpu->rx_buffers_len--;
- u32 bi_next = cpu->rx_buffers[cpu->rx_buffers_len];
- b_current->next_buffer = bi_next;
- b_current->flags |= VLIB_BUFFER_NEXT_PRESENT;
- bi_current = bi_next;
- b_current = vlib_get_buffer (vm, bi_current);
- }
-
- /* Prepare a copy order executed later for the data */
- ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
- vhost_copy_t *cpy = &cpu->copy[copy_len];
- copy_len++;
- u32 desc_data_l = desc_table[desc_current].len - desc_data_offset;
- cpy->len = buffer_data_size - b_current->current_length;
- cpy->len = (cpy->len > desc_data_l) ? desc_data_l : cpy->len;
- cpy->dst = (uword) (vlib_buffer_get_current (b_current) +
- b_current->current_length);
- cpy->src = desc_table[desc_current].addr + desc_data_offset;
-
- desc_data_offset += cpy->len;
-
- b_current->current_length += cpy->len;
- b_head->total_length_not_including_first_buffer += cpy->len;
- }
-
- out:
-
- n_rx_bytes += b_head->total_length_not_including_first_buffer;
- n_rx_packets++;
-
- b_head->total_length_not_including_first_buffer -=
- b_head->current_length;
-
- /* consume the descriptor and return it as used */
- last_avail_idx++;
- last_used_idx++;
-
- vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index;
- vnet_buffer (b_head)->sw_if_index[VLIB_TX] = (u32) ~ 0;
- b_head->error = 0;
-
- if (current_config_index != ~(u32) 0)
- {
- b_head->current_config_index = current_config_index;
- vnet_buffer (b_head)->feature_arc_index = feature_arc_idx;
- }
-
- n_left--;
-
- /*
- * Although separating memory copies from virtio ring parsing
- * is beneficial, we can offer to perform the copies from time
- * to time in order to free some space in the ring.
- */
- if (PREDICT_FALSE (copy_len >= VHOST_USER_RX_COPY_THRESHOLD))
- {
- if (PREDICT_FALSE (vhost_user_input_copy (vui, cpu->copy,
- copy_len, &map_hint)))
- {
- vlib_error_count (vm, node->node_index,
- VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
- }
- copy_len = 0;
-
- /* give buffers back to driver */
- CLIB_MEMORY_STORE_BARRIER ();
- txvq->used->idx = last_used_idx;
- vhost_user_log_dirty_ring (vui, txvq, idx);
- }
- }
-stop:
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-
- txvq->last_used_idx = last_used_idx;
- txvq->last_avail_idx = last_avail_idx;
-
- /* Do the memory copies */
- if (PREDICT_FALSE (vhost_user_input_copy (vui, cpu->copy, copy_len,
- &map_hint)))
- {
- vlib_error_count (vm, node->node_index,
- VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
- }
-
- /* give buffers back to driver */
- CLIB_MEMORY_STORE_BARRIER ();
- txvq->used->idx = txvq->last_used_idx;
- vhost_user_log_dirty_ring (vui, txvq, idx);
-
- /* interrupt (call) handling */
- if ((txvq->callfd_idx != ~0) &&
- !(txvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
- {
- txvq->n_since_last_int += n_rx_packets;
-
- if (txvq->n_since_last_int > vum->coalesce_frames)
- vhost_user_send_call (vm, vui, txvq);
- }
-
- /* increase rx counters */
- vlib_increment_combined_counter
- (vnet_main.interface_main.combined_sw_if_counters
- + VNET_INTERFACE_COUNTER_RX, vm->thread_index, vui->sw_if_index,
- n_rx_packets, n_rx_bytes);
-
- vnet_device_increment_rx_packets (vm->thread_index, n_rx_packets);
-
-done:
- return n_rx_packets;
-}
-
-static_always_inline void
-vhost_user_mark_desc_consumed (vhost_user_intf_t * vui,
- vhost_user_vring_t * txvq, u16 desc_head,
- u16 n_descs_processed)
-{
- vring_packed_desc_t *desc_table = txvq->packed_desc;
- u16 desc_idx;
- u16 mask = txvq->qsz_mask;
-
- for (desc_idx = 0; desc_idx < n_descs_processed; desc_idx++)
- {
- if (txvq->used_wrap_counter)
- desc_table[(desc_head + desc_idx) & mask].flags |=
- (VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
- else
- desc_table[(desc_head + desc_idx) & mask].flags &=
- ~(VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
- vhost_user_advance_last_used_idx (txvq);
- }
-}
-
-static_always_inline void
-vhost_user_rx_trace_packed (vhost_trace_t * t, vhost_user_intf_t * vui,
- u16 qid, vhost_user_vring_t * txvq,
- u16 desc_current)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- vring_packed_desc_t *hdr_desc;
- virtio_net_hdr_mrg_rxbuf_t *hdr;
- u32 hint = 0;
-
- clib_memset (t, 0, sizeof (*t));
- t->device_index = vui - vum->vhost_user_interfaces;
- t->qid = qid;
-
- hdr_desc = &txvq->packed_desc[desc_current];
- if (txvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT)
- {
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
- /* Header is the first here */
- hdr_desc = map_guest_mem (vui, txvq->packed_desc[desc_current].addr,
- &hint);
- }
- if (txvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT)
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
-
- if (!(txvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT) &&
- !(txvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT))
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
-
- t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
-
- if (!hdr_desc || !(hdr = map_guest_mem (vui, hdr_desc->addr, &hint)))
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_MAP_ERROR;
- else
- {
- u32 len = vui->virtio_net_hdr_sz;
- clib_memcpy_fast (&t->hdr, hdr,
- len > hdr_desc->len ? hdr_desc->len : len);
- }
-}
-
-static_always_inline u32
-vhost_user_rx_discard_packet_packed (vlib_main_t * vm,
- vhost_user_intf_t * vui,
- vhost_user_vring_t * txvq,
- u32 discard_max)
-{
- u32 discarded_packets = 0;
- u16 mask = txvq->qsz_mask;
- u16 desc_current, desc_head;
-
- desc_head = desc_current = txvq->last_used_idx & mask;
-
- /*
- * On the RX side, each packet corresponds to one descriptor
- * (it is the same whether it is a shallow descriptor, chained, or indirect).
- * Therefore, discarding a packet is like discarding a descriptor.
- */
- while ((discarded_packets != discard_max) &&
- vhost_user_packed_desc_available (txvq, desc_current))
- {
- vhost_user_advance_last_avail_idx (txvq);
- discarded_packets++;
- desc_current = (desc_current + 1) & mask;
- }
-
- if (PREDICT_TRUE (discarded_packets))
- vhost_user_mark_desc_consumed (vui, txvq, desc_head, discarded_packets);
- return (discarded_packets);
-}
-
-static_always_inline u32
-vhost_user_input_copy_packed (vhost_user_intf_t * vui, vhost_copy_t * cpy,
- u16 copy_len, u32 * map_hint)
-{
- void *src0, *src1, *src2, *src3, *src4, *src5, *src6, *src7;
- u8 bad;
- u32 rc = VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR;
-
- if (PREDICT_TRUE (copy_len >= 8))
- {
- src4 = map_guest_mem (vui, cpy[0].src, map_hint);
- src5 = map_guest_mem (vui, cpy[1].src, map_hint);
- src6 = map_guest_mem (vui, cpy[2].src, map_hint);
- src7 = map_guest_mem (vui, cpy[3].src, map_hint);
- bad = (src4 == 0) + (src5 == 0) + (src6 == 0) + (src7 == 0);
- if (PREDICT_FALSE (bad))
- goto one_by_one;
- clib_prefetch_load (src4);
- clib_prefetch_load (src5);
- clib_prefetch_load (src6);
- clib_prefetch_load (src7);
-
- while (PREDICT_TRUE (copy_len >= 8))
- {
- src0 = src4;
- src1 = src5;
- src2 = src6;
- src3 = src7;
-
- src4 = map_guest_mem (vui, cpy[4].src, map_hint);
- src5 = map_guest_mem (vui, cpy[5].src, map_hint);
- src6 = map_guest_mem (vui, cpy[6].src, map_hint);
- src7 = map_guest_mem (vui, cpy[7].src, map_hint);
- bad = (src4 == 0) + (src5 == 0) + (src6 == 0) + (src7 == 0);
- if (PREDICT_FALSE (bad))
- break;
-
- clib_prefetch_load (src4);
- clib_prefetch_load (src5);
- clib_prefetch_load (src6);
- clib_prefetch_load (src7);
-
- clib_memcpy_fast ((void *) cpy[0].dst, src0, cpy[0].len);
- clib_memcpy_fast ((void *) cpy[1].dst, src1, cpy[1].len);
- clib_memcpy_fast ((void *) cpy[2].dst, src2, cpy[2].len);
- clib_memcpy_fast ((void *) cpy[3].dst, src3, cpy[3].len);
- copy_len -= 4;
- cpy += 4;
- }
- }
-
-one_by_one:
- while (copy_len)
- {
- if (PREDICT_FALSE (!(src0 = map_guest_mem (vui, cpy->src, map_hint))))
- {
- rc = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
- break;
- }
- clib_memcpy_fast ((void *) cpy->dst, src0, cpy->len);
- copy_len -= 1;
- cpy += 1;
- }
- return rc;
-}
-
-static_always_inline u32
-vhost_user_do_offload (vhost_user_intf_t * vui,
- vring_packed_desc_t * desc_table, u16 desc_current,
- u16 mask, vlib_buffer_t * b_head, u32 * map_hint)
-{
- u32 rc = VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR;
- virtio_net_hdr_mrg_rxbuf_t *hdr;
- u8 *b_data;
- u32 desc_data_offset = vui->virtio_net_hdr_sz;
-
- hdr = map_guest_mem (vui, desc_table[desc_current].addr, map_hint);
- if (PREDICT_FALSE (hdr == 0))
- rc = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
- else if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
- {
- if (desc_data_offset == desc_table[desc_current].len)
- {
- desc_current = (desc_current + 1) & mask;
- b_data =
- map_guest_mem (vui, desc_table[desc_current].addr, map_hint);
- if (PREDICT_FALSE (b_data == 0))
- rc = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
- else
- vhost_user_handle_rx_offload (b_head, b_data, &hdr->hdr);
- }
- else
- {
- b_data = (u8 *) hdr + desc_data_offset;
- vhost_user_handle_rx_offload (b_head, b_data, &hdr->hdr);
- }
- }
-
- return rc;
-}
-
-static_always_inline u32
-vhost_user_compute_buffers_required (u32 desc_len, u32 buffer_data_size)
-{
- div_t result;
- u32 buffers_required;
-
- if (PREDICT_TRUE (buffer_data_size == 2048))
- {
- buffers_required = desc_len >> 11;
- if ((desc_len & 2047) != 0)
- buffers_required++;
- return (buffers_required);
- }
-
- if (desc_len < buffer_data_size)
- return 1;
-
- result = div (desc_len, buffer_data_size);
- if (result.rem)
- buffers_required = result.quot + 1;
- else
- buffers_required = result.quot;
-
- return (buffers_required);
-}
-
-static_always_inline u32
-vhost_user_compute_indirect_desc_len (vhost_user_intf_t * vui,
- vhost_user_vring_t * txvq,
- u32 buffer_data_size, u16 desc_current,
- u32 * map_hint)
-{
- vring_packed_desc_t *desc_table = txvq->packed_desc;
- u32 desc_len = 0;
- u16 desc_data_offset = vui->virtio_net_hdr_sz;
- u16 desc_idx = desc_current;
- u32 n_descs;
-
- n_descs = desc_table[desc_idx].len >> 4;
- desc_table = map_guest_mem (vui, desc_table[desc_idx].addr, map_hint);
- if (PREDICT_FALSE (desc_table == 0))
- return 0;
-
- for (desc_idx = 0; desc_idx < n_descs; desc_idx++)
- desc_len += desc_table[desc_idx].len;
-
- if (PREDICT_TRUE (desc_len > desc_data_offset))
- desc_len -= desc_data_offset;
-
- return vhost_user_compute_buffers_required (desc_len, buffer_data_size);
-}
-
-static_always_inline u32
-vhost_user_compute_chained_desc_len (vhost_user_intf_t * vui,
- vhost_user_vring_t * txvq,
- u32 buffer_data_size, u16 * current,
- u16 * n_left)
-{
- vring_packed_desc_t *desc_table = txvq->packed_desc;
- u32 desc_len = 0;
- u16 mask = txvq->qsz_mask;
-
- while (desc_table[*current].flags & VRING_DESC_F_NEXT)
- {
- desc_len += desc_table[*current].len;
- (*n_left)++;
- *current = (*current + 1) & mask;
- vhost_user_advance_last_avail_idx (txvq);
- }
- desc_len += desc_table[*current].len;
- (*n_left)++;
- *current = (*current + 1) & mask;
- vhost_user_advance_last_avail_idx (txvq);
-
- if (PREDICT_TRUE (desc_len > vui->virtio_net_hdr_sz))
- desc_len -= vui->virtio_net_hdr_sz;
-
- return vhost_user_compute_buffers_required (desc_len, buffer_data_size);
-}
-
-static_always_inline void
-vhost_user_assemble_packet (vring_packed_desc_t * desc_table,
- u16 * desc_idx, vlib_buffer_t * b_head,
- vlib_buffer_t ** b_current, u32 ** next,
- vlib_buffer_t *** b, u32 * bi_current,
- vhost_cpu_t * cpu, u16 * copy_len,
- u32 * buffers_used, u32 buffers_required,
- u32 * desc_data_offset, u32 buffer_data_size,
- u16 mask)
-{
- u32 desc_data_l;
-
- while (*desc_data_offset < desc_table[*desc_idx].len)
- {
- /* Get more output if necessary. Or end of packet. */
- if (PREDICT_FALSE ((*b_current)->current_length == buffer_data_size))
- {
- /* Get next output */
- u32 bi_next = **next;
- (*next)++;
- (*b_current)->next_buffer = bi_next;
- (*b_current)->flags |= VLIB_BUFFER_NEXT_PRESENT;
- *bi_current = bi_next;
- *b_current = **b;
- (*b)++;
- (*buffers_used)++;
- ASSERT (*buffers_used <= buffers_required);
- }
-
- /* Prepare a copy order executed later for the data */
- ASSERT (*copy_len < VHOST_USER_COPY_ARRAY_N);
- vhost_copy_t *cpy = &cpu->copy[*copy_len];
- (*copy_len)++;
- desc_data_l = desc_table[*desc_idx].len - *desc_data_offset;
- cpy->len = buffer_data_size - (*b_current)->current_length;
- cpy->len = (cpy->len > desc_data_l) ? desc_data_l : cpy->len;
- cpy->dst = (uword) (vlib_buffer_get_current (*b_current) +
- (*b_current)->current_length);
- cpy->src = desc_table[*desc_idx].addr + *desc_data_offset;
-
- *desc_data_offset += cpy->len;
-
- (*b_current)->current_length += cpy->len;
- b_head->total_length_not_including_first_buffer += cpy->len;
- }
- *desc_idx = (*desc_idx + 1) & mask;;
- *desc_data_offset = 0;
-}
-
-static_always_inline u32
-vhost_user_if_input_packed (vlib_main_t *vm, vhost_user_main_t *vum,
- vhost_user_intf_t *vui, u16 qid,
- vlib_node_runtime_t *node, u8 enable_csum)
-{
- vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
- vnet_feature_main_t *fm = &feature_main;
- u8 feature_arc_idx = fm->device_input_feature_arc_index;
- u16 n_rx_packets = 0;
- u32 n_rx_bytes = 0;
- u16 n_left = 0;
- u32 buffers_required = 0;
- u32 n_left_to_next, *to_next;
- u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
- u32 n_trace = vlib_get_trace_count (vm, node);
- u32 buffer_data_size = vlib_buffer_get_default_data_size (vm);
- u32 map_hint = 0;
- vhost_cpu_t *cpu = &vum->cpus[vm->thread_index];
- u16 copy_len = 0;
- u32 current_config_index = ~0;
- u16 mask = txvq->qsz_mask;
- u16 desc_current, desc_head, last_used_idx;
- vring_packed_desc_t *desc_table = 0;
- u32 n_descs_processed = 0;
- u32 rv;
- vlib_buffer_t **b;
- u32 *next;
- u32 buffers_used = 0;
- u16 current, n_descs_to_process;
-
- /* The descriptor table is not ready yet */
- if (PREDICT_FALSE (txvq->packed_desc == 0))
- goto done;
-
- /* do we have pending interrupts ? */
- vhost_user_vring_t *rxvq = &vui->vrings[VHOST_VRING_IDX_RX (qid)];
- vhost_user_input_do_interrupt (vm, vui, txvq, rxvq);
-
- /*
- * For adaptive mode, it is optimized to reduce interrupts.
- * If the scheduler switches the input node to polling due
- * to burst of traffic, we tell the driver no interrupt.
- * When the traffic subsides, the scheduler switches the node back to
- * interrupt mode. We must tell the driver we want interrupt.
- */
- if (PREDICT_FALSE (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
- {
- if ((node->flags &
- VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) ||
- !(node->flags &
- VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))
- /* Tell driver we want notification */
- txvq->used_event->flags = 0;
- else
- /* Tell driver we don't want notification */
- txvq->used_event->flags = VRING_EVENT_F_DISABLE;
- }
-
- last_used_idx = txvq->last_used_idx & mask;
- desc_head = desc_current = last_used_idx;
-
- if (vhost_user_packed_desc_available (txvq, desc_current) == 0)
- goto done;
-
- if (PREDICT_FALSE (!vui->admin_up || !vui->is_ready || !(txvq->enabled)))
- {
- /*
- * Discard input packet if interface is admin down or vring is not
- * enabled.
- * "For example, for a networking device, in the disabled state
- * client must not supply any new RX packets, but must process
- * and discard any TX packets."
- */
- rv = vhost_user_rx_discard_packet_packed (vm, vui, txvq,
- VHOST_USER_DOWN_DISCARD_COUNT);
- vlib_error_count (vm, vhost_user_input_node.index,
- VHOST_USER_INPUT_FUNC_ERROR_NOT_READY, rv);
- goto done;
- }
-
- vhost_user_input_setup_frame (vm, node, vui, &current_config_index,
- &next_index, &to_next, &n_left_to_next);
-
- /*
- * Compute n_left and total buffers needed
- */
- desc_table = txvq->packed_desc;
- current = desc_current;
- while (vhost_user_packed_desc_available (txvq, current) &&
- (n_left < VLIB_FRAME_SIZE))
- {
- if (desc_table[current].flags & VRING_DESC_F_INDIRECT)
- {
- buffers_required +=
- vhost_user_compute_indirect_desc_len (vui, txvq, buffer_data_size,
- current, &map_hint);
- n_left++;
- current = (current + 1) & mask;
- vhost_user_advance_last_avail_idx (txvq);
- }
- else
- {
- buffers_required +=
- vhost_user_compute_chained_desc_len (vui, txvq, buffer_data_size,
- &current, &n_left);
- }
- }
-
- /* Something is broken if we need more than 10000 buffers */
- if (PREDICT_FALSE ((buffers_required == 0) || (buffers_required > 10000)))
- {
- rv = vhost_user_rx_discard_packet_packed (vm, vui, txvq, n_left);
- vlib_error_count (vm, vhost_user_input_node.index,
- VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, rv);
- goto done;
- }
-
- vec_validate (cpu->to_next_list, buffers_required);
- rv = vlib_buffer_alloc (vm, cpu->to_next_list, buffers_required);
- if (PREDICT_FALSE (rv != buffers_required))
- {
- vlib_buffer_free (vm, cpu->to_next_list, rv);
- rv = vhost_user_rx_discard_packet_packed (vm, vui, txvq, n_left);
- vlib_error_count (vm, vhost_user_input_node.index,
- VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, rv);
- goto done;
- }
-
- next = cpu->to_next_list;
- vec_validate (cpu->rx_buffers_pdesc, buffers_required);
- vlib_get_buffers (vm, next, cpu->rx_buffers_pdesc, buffers_required);
- b = cpu->rx_buffers_pdesc;
- n_descs_processed = n_left;
-
- while (n_left)
- {
- vlib_buffer_t *b_head, *b_current;
- u32 bi_current;
- u32 desc_data_offset;
- u16 desc_idx = desc_current;
- u32 n_descs;
-
- desc_table = txvq->packed_desc;
- to_next[0] = bi_current = next[0];
- b_head = b_current = b[0];
- b++;
- buffers_used++;
- ASSERT (buffers_used <= buffers_required);
- to_next++;
- next++;
- n_left_to_next--;
-
- /* The buffer should already be initialized */
- b_head->total_length_not_including_first_buffer = 0;
- b_head->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
- desc_data_offset = vui->virtio_net_hdr_sz;
- n_descs_to_process = 1;
-
- if (desc_table[desc_idx].flags & VRING_DESC_F_INDIRECT)
- {
- n_descs = desc_table[desc_idx].len >> 4;
- desc_table = map_guest_mem (vui, desc_table[desc_idx].addr,
- &map_hint);
- desc_idx = 0;
- if (PREDICT_FALSE (desc_table == 0) ||
- (enable_csum &&
- (PREDICT_FALSE
- (vhost_user_do_offload
- (vui, desc_table, desc_idx, mask, b_head,
- &map_hint) != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))))
- {
- vlib_error_count (vm, node->node_index,
- VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
- to_next--;
- next--;
- n_left_to_next++;
- buffers_used--;
- b--;
- goto out;
- }
- while (n_descs)
- {
- vhost_user_assemble_packet (desc_table, &desc_idx, b_head,
- &b_current, &next, &b, &bi_current,
- cpu, &copy_len, &buffers_used,
- buffers_required, &desc_data_offset,
- buffer_data_size, mask);
- n_descs--;
- }
- }
- else
- {
- if (enable_csum)
- {
- rv = vhost_user_do_offload (vui, desc_table, desc_idx, mask,
- b_head, &map_hint);
- if (PREDICT_FALSE (rv != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))
- {
- vlib_error_count (vm, node->node_index, rv, 1);
- to_next--;
- next--;
- n_left_to_next++;
- buffers_used--;
- b--;
- goto out;
- }
- }
- /*
- * For chained descriptor, we process all chains in a single while
- * loop. So count how many descriptors in the chain.
- */
- n_descs_to_process = 1;
- while (desc_table[desc_idx].flags & VRING_DESC_F_NEXT)
- {
- vhost_user_assemble_packet (desc_table, &desc_idx, b_head,
- &b_current, &next, &b, &bi_current,
- cpu, &copy_len, &buffers_used,
- buffers_required, &desc_data_offset,
- buffer_data_size, mask);
- n_descs_to_process++;
- }
- vhost_user_assemble_packet (desc_table, &desc_idx, b_head,
- &b_current, &next, &b, &bi_current,
- cpu, &copy_len, &buffers_used,
- buffers_required, &desc_data_offset,
- buffer_data_size, mask);
- }
-
- n_rx_bytes += b_head->total_length_not_including_first_buffer;
- n_rx_packets++;
-
- b_head->total_length_not_including_first_buffer -=
- b_head->current_length;
-
- vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index;
- vnet_buffer (b_head)->sw_if_index[VLIB_TX] = ~0;
- b_head->error = 0;
-
- if (current_config_index != ~0)
- {
- b_head->current_config_index = current_config_index;
- vnet_buffer (b_head)->feature_arc_index = feature_arc_idx;
- }
-
- out:
- ASSERT (n_left >= n_descs_to_process);
- n_left -= n_descs_to_process;
-
- /* advance to next descrptor */
- desc_current = (desc_current + n_descs_to_process) & mask;
-
- /*
- * Although separating memory copies from virtio ring parsing
- * is beneficial, we can offer to perform the copies from time
- * to time in order to free some space in the ring.
- */
- if (PREDICT_FALSE (copy_len >= VHOST_USER_RX_COPY_THRESHOLD))
- {
- rv = vhost_user_input_copy_packed (vui, cpu->copy, copy_len,
- &map_hint);
- if (PREDICT_FALSE (rv != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))
- vlib_error_count (vm, node->node_index, rv, 1);
- copy_len = 0;
- }
- }
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-
- /* Do the memory copies */
- rv = vhost_user_input_copy_packed (vui, cpu->copy, copy_len, &map_hint);
- if (PREDICT_FALSE (rv != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))
- vlib_error_count (vm, node->node_index, rv, 1);
-
- /* Must do the tracing before giving buffers back to driver */
- if (PREDICT_FALSE (n_trace))
- {
- u32 left = n_rx_packets;
-
- b = cpu->rx_buffers_pdesc;
- while (n_trace && left)
- {
- if (PREDICT_TRUE
- (vlib_trace_buffer
- (vm, node, next_index, b[0], /* follow_chain */ 0)))
- {
- vhost_trace_t *t0;
- t0 = vlib_add_trace (vm, node, b[0], sizeof (t0[0]));
- vhost_user_rx_trace_packed (t0, vui, qid, txvq, last_used_idx);
- last_used_idx = (last_used_idx + 1) & mask;
- n_trace--;
- vlib_set_trace_count (vm, node, n_trace);
- }
- left--;
- b++;
- }
- }
-
- /*
- * Give buffers back to driver.
- */
- vhost_user_mark_desc_consumed (vui, txvq, desc_head, n_descs_processed);
-
- /* interrupt (call) handling */
- if ((txvq->callfd_idx != ~0) &&
- (txvq->avail_event->flags != VRING_EVENT_F_DISABLE))
- {
- txvq->n_since_last_int += n_rx_packets;
- if (txvq->n_since_last_int > vum->coalesce_frames)
- vhost_user_send_call (vm, vui, txvq);
- }
-
- /* increase rx counters */
- vlib_increment_combined_counter
- (vnet_main.interface_main.combined_sw_if_counters
- + VNET_INTERFACE_COUNTER_RX, vm->thread_index, vui->sw_if_index,
- n_rx_packets, n_rx_bytes);
-
- vnet_device_increment_rx_packets (vm->thread_index, n_rx_packets);
-
- if (PREDICT_FALSE (buffers_used < buffers_required))
- vlib_buffer_free (vm, next, buffers_required - buffers_used);
-
-done:
- return n_rx_packets;
-}
-
-VLIB_NODE_FN (vhost_user_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- uword n_rx_packets = 0;
- vhost_user_intf_t *vui;
- vnet_hw_if_rxq_poll_vector_t *pv = vnet_hw_if_get_rxq_poll_vector (vm, node);
- vnet_hw_if_rxq_poll_vector_t *pve;
-
- vec_foreach (pve, pv)
- {
- vui = pool_elt_at_index (vum->vhost_user_interfaces, pve->dev_instance);
- if (vhost_user_is_packed_ring_supported (vui))
- {
- if (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_CSUM))
- n_rx_packets += vhost_user_if_input_packed (
- vm, vum, vui, pve->queue_id, node, 1);
- else
- n_rx_packets += vhost_user_if_input_packed (
- vm, vum, vui, pve->queue_id, node, 0);
- }
- else
- {
- if (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_CSUM))
- n_rx_packets +=
- vhost_user_if_input (vm, vum, vui, pve->queue_id, node, 1);
- else
- n_rx_packets +=
- vhost_user_if_input (vm, vum, vui, pve->queue_id, node, 0);
- }
- }
-
- return n_rx_packets;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (vhost_user_input_node) = {
- .type = VLIB_NODE_TYPE_INPUT,
- .name = "vhost-user-input",
- .sibling_of = "device-input",
- .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
-
- /* Will be enabled if/when hardware is detected. */
- .state = VLIB_NODE_STATE_DISABLED,
-
- .format_buffer = format_ethernet_header_with_length,
- .format_trace = format_vhost_trace,
-
- .n_errors = VHOST_USER_INPUT_FUNC_N_ERROR,
- .error_strings = vhost_user_input_func_error_strings,
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/virtio/vhost_user_output.c b/src/vnet/devices/virtio/vhost_user_output.c
deleted file mode 100644
index 15e39a11692..00000000000
--- a/src/vnet/devices/virtio/vhost_user_output.c
+++ /dev/null
@@ -1,1144 +0,0 @@
-/*
- *------------------------------------------------------------------
- * vhost-user-output
- *
- * Copyright (c) 2014-2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <stddef.h>
-#include <fcntl.h> /* for open */
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/uio.h> /* for iovec */
-#include <netinet/in.h>
-#include <sys/vfs.h>
-
-#include <linux/if_arp.h>
-#include <linux/if_tun.h>
-
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/devices/devices.h>
-#include <vnet/feature/feature.h>
-#include <vnet/ip/ip_psh_cksum.h>
-
-#include <vnet/devices/virtio/vhost_user.h>
-#include <vnet/devices/virtio/vhost_user_inline.h>
-
-#include <vnet/gso/hdr_offset_parser.h>
-/*
- * On the transmit side, we keep processing the buffers from vlib in the while
- * loop and prepare the copy order to be executed later. However, the static
- * array which we keep the copy order is limited to VHOST_USER_COPY_ARRAY_N
- * entries. In order to not corrupt memory, we have to do the copy when the
- * static array reaches the copy threshold. We subtract 40 in case the code
- * goes into the inner loop for a maximum of 64k frames which may require
- * more array entries. We subtract 200 because our default buffer size is
- * 2048 and the default desc len is likely 1536. While it takes less than 40
- * vlib buffers for the jumbo frame, it may take twice as much descriptors
- * for the same jumbo frame. Use 200 for the extra head room.
- */
-#define VHOST_USER_TX_COPY_THRESHOLD (VHOST_USER_COPY_ARRAY_N - 200)
-
-extern vnet_device_class_t vhost_user_device_class;
-
-#define foreach_vhost_user_tx_func_error \
- _(NONE, "no error") \
- _(NOT_READY, "vhost vring not ready") \
- _(DOWN, "vhost interface is down") \
- _(PKT_DROP_NOBUF, "tx packet drops (no available descriptors)") \
- _(PKT_DROP_NOMRG, "tx packet drops (cannot merge descriptors)") \
- _(MMAP_FAIL, "mmap failure") \
- _(INDIRECT_OVERFLOW, "indirect descriptor table overflow")
-
-typedef enum
-{
-#define _(f,s) VHOST_USER_TX_FUNC_ERROR_##f,
- foreach_vhost_user_tx_func_error
-#undef _
- VHOST_USER_TX_FUNC_N_ERROR,
-} vhost_user_tx_func_error_t;
-
-static __clib_unused char *vhost_user_tx_func_error_strings[] = {
-#define _(n,s) s,
- foreach_vhost_user_tx_func_error
-#undef _
-};
-
-static __clib_unused u8 *
-format_vhost_user_interface_name (u8 * s, va_list * args)
-{
- u32 i = va_arg (*args, u32);
- u32 show_dev_instance = ~0;
- vhost_user_main_t *vum = &vhost_user_main;
-
- if (i < vec_len (vum->show_dev_instance_by_real_dev_instance))
- show_dev_instance = vum->show_dev_instance_by_real_dev_instance[i];
-
- if (show_dev_instance != ~0)
- i = show_dev_instance;
-
- s = format (s, "VirtualEthernet0/0/%d", i);
- return s;
-}
-
-static __clib_unused int
-vhost_user_name_renumber (vnet_hw_interface_t * hi, u32 new_dev_instance)
-{
- // FIXME: check if the new dev instance is already used
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui = pool_elt_at_index (vum->vhost_user_interfaces,
- hi->dev_instance);
-
- vec_validate_init_empty (vum->show_dev_instance_by_real_dev_instance,
- hi->dev_instance, ~0);
-
- vum->show_dev_instance_by_real_dev_instance[hi->dev_instance] =
- new_dev_instance;
-
- vu_log_debug (vui, "renumbered vhost-user interface dev_instance %d to %d",
- hi->dev_instance, new_dev_instance);
-
- return 0;
-}
-
-static_always_inline void
-vhost_user_tx_trace (vhost_trace_t * t,
- vhost_user_intf_t * vui, u16 qid,
- vlib_buffer_t * b, vhost_user_vring_t * rxvq)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- u32 last_avail_idx = rxvq->last_avail_idx;
- u32 desc_current = rxvq->avail->ring[last_avail_idx & rxvq->qsz_mask];
- vring_desc_t *hdr_desc = 0;
- u32 hint = 0;
-
- clib_memset (t, 0, sizeof (*t));
- t->device_index = vui - vum->vhost_user_interfaces;
- t->qid = qid;
-
- hdr_desc = &rxvq->desc[desc_current];
- if (rxvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT)
- {
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
- /* Header is the first here */
- hdr_desc = map_guest_mem (vui, rxvq->desc[desc_current].addr, &hint);
- }
- if (rxvq->desc[desc_current].flags & VRING_DESC_F_NEXT)
- {
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
- }
- if (!(rxvq->desc[desc_current].flags & VRING_DESC_F_NEXT) &&
- !(rxvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT))
- {
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
- }
-
- t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
-}
-
-static_always_inline u32
-vhost_user_tx_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy,
- u16 copy_len, u32 * map_hint)
-{
- void *dst0, *dst1, *dst2, *dst3;
- if (PREDICT_TRUE (copy_len >= 4))
- {
- if (PREDICT_FALSE (!(dst2 = map_guest_mem (vui, cpy[0].dst, map_hint))))
- return 1;
- if (PREDICT_FALSE (!(dst3 = map_guest_mem (vui, cpy[1].dst, map_hint))))
- return 1;
- while (PREDICT_TRUE (copy_len >= 4))
- {
- dst0 = dst2;
- dst1 = dst3;
-
- if (PREDICT_FALSE
- (!(dst2 = map_guest_mem (vui, cpy[2].dst, map_hint))))
- return 1;
- if (PREDICT_FALSE
- (!(dst3 = map_guest_mem (vui, cpy[3].dst, map_hint))))
- return 1;
-
- clib_prefetch_load ((void *) cpy[2].src);
- clib_prefetch_load ((void *) cpy[3].src);
-
- clib_memcpy_fast (dst0, (void *) cpy[0].src, cpy[0].len);
- clib_memcpy_fast (dst1, (void *) cpy[1].src, cpy[1].len);
-
- vhost_user_log_dirty_pages_2 (vui, cpy[0].dst, cpy[0].len, 1);
- vhost_user_log_dirty_pages_2 (vui, cpy[1].dst, cpy[1].len, 1);
- copy_len -= 2;
- cpy += 2;
- }
- }
- while (copy_len)
- {
- if (PREDICT_FALSE (!(dst0 = map_guest_mem (vui, cpy->dst, map_hint))))
- return 1;
- clib_memcpy_fast (dst0, (void *) cpy->src, cpy->len);
- vhost_user_log_dirty_pages_2 (vui, cpy->dst, cpy->len, 1);
- copy_len -= 1;
- cpy += 1;
- }
- return 0;
-}
-
-static_always_inline void
-vhost_user_handle_tx_offload (vhost_user_intf_t * vui, vlib_buffer_t * b,
- virtio_net_hdr_t * hdr)
-{
- generic_header_offset_t gho = { 0 };
- int is_ip4 = b->flags & VNET_BUFFER_F_IS_IP4;
- int is_ip6 = b->flags & VNET_BUFFER_F_IS_IP6;
- vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags;
- u16 psh_cksum = 0;
- ip4_header_t *ip4 = 0;
- ip6_header_t *ip6 = 0;
-
- ASSERT (!(is_ip4 && is_ip6));
- vnet_generic_header_offset_parser (b, &gho, 1 /* l2 */ , is_ip4, is_ip6);
- if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
- {
- ip4 =
- (ip4_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset);
- ip4->checksum = ip4_header_checksum (ip4);
- psh_cksum = ip4_pseudo_header_cksum (ip4);
- }
- else
- {
- ip6 = (ip6_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset);
- psh_cksum = ip6_pseudo_header_cksum (ip6);
- }
-
- /* checksum offload */
- if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)
- {
- udp_header_t *udp =
- (udp_header_t *) (vlib_buffer_get_current (b) + gho.l4_hdr_offset);
- udp->checksum = psh_cksum;
- hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- hdr->csum_start = gho.l4_hdr_offset;
- hdr->csum_offset = offsetof (udp_header_t, checksum);
- }
- else if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
- {
- tcp_header_t *tcp =
- (tcp_header_t *) (vlib_buffer_get_current (b) + gho.l4_hdr_offset);
- tcp->checksum = psh_cksum;
- hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- hdr->csum_start = gho.l4_hdr_offset;
- hdr->csum_offset = offsetof (tcp_header_t, checksum);
- }
-
- /* GSO offload */
- if (b->flags & VNET_BUFFER_F_GSO)
- {
- if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
- {
- if (is_ip4 &&
- (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO4)))
- {
- hdr->gso_size = vnet_buffer2 (b)->gso_size;
- hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
- }
- else if (is_ip6 &&
- (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO6)))
- {
- hdr->gso_size = vnet_buffer2 (b)->gso_size;
- hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
- }
- }
- else if ((vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_UFO)) &&
- (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM))
- {
- hdr->gso_size = vnet_buffer2 (b)->gso_size;
- hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
- }
- }
-}
-
-static_always_inline void
-vhost_user_mark_desc_available (vlib_main_t * vm, vhost_user_intf_t * vui,
- vhost_user_vring_t * rxvq,
- u16 * n_descs_processed, u8 chained,
- vlib_frame_t * frame, u32 n_left)
-{
- u16 desc_idx, flags;
- vring_packed_desc_t *desc_table = rxvq->packed_desc;
- u16 last_used_idx = rxvq->last_used_idx;
-
- if (PREDICT_FALSE (*n_descs_processed == 0))
- return;
-
- if (rxvq->used_wrap_counter)
- flags = desc_table[last_used_idx & rxvq->qsz_mask].flags |
- (VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
- else
- flags = desc_table[last_used_idx & rxvq->qsz_mask].flags &
- ~(VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
-
- vhost_user_advance_last_used_idx (rxvq);
-
- for (desc_idx = 1; desc_idx < *n_descs_processed; desc_idx++)
- {
- if (rxvq->used_wrap_counter)
- desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags |=
- (VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
- else
- desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags &=
- ~(VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
- vhost_user_advance_last_used_idx (rxvq);
- }
-
- desc_table[last_used_idx & rxvq->qsz_mask].flags = flags;
-
- *n_descs_processed = 0;
-
- if (chained)
- {
- vring_packed_desc_t *desc_table = rxvq->packed_desc;
-
- while (desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags &
- VRING_DESC_F_NEXT)
- vhost_user_advance_last_used_idx (rxvq);
-
- /* Advance past the current chained table entries */
- vhost_user_advance_last_used_idx (rxvq);
- }
-
- /* interrupt (call) handling */
- if ((rxvq->callfd_idx != ~0) &&
- (rxvq->avail_event->flags != VRING_EVENT_F_DISABLE))
- {
- vhost_user_main_t *vum = &vhost_user_main;
-
- rxvq->n_since_last_int += frame->n_vectors - n_left;
- if (rxvq->n_since_last_int > vum->coalesce_frames)
- vhost_user_send_call (vm, vui, rxvq);
- }
-}
-
-static_always_inline void
-vhost_user_tx_trace_packed (vhost_trace_t * t, vhost_user_intf_t * vui,
- u16 qid, vlib_buffer_t * b,
- vhost_user_vring_t * rxvq)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- u32 last_avail_idx = rxvq->last_avail_idx;
- u32 desc_current = last_avail_idx & rxvq->qsz_mask;
- vring_packed_desc_t *hdr_desc = 0;
- u32 hint = 0;
-
- clib_memset (t, 0, sizeof (*t));
- t->device_index = vui - vum->vhost_user_interfaces;
- t->qid = qid;
-
- hdr_desc = &rxvq->packed_desc[desc_current];
- if (rxvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT)
- {
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
- /* Header is the first here */
- hdr_desc = map_guest_mem (vui, rxvq->packed_desc[desc_current].addr,
- &hint);
- }
- if (rxvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT)
- {
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
- }
- if (!(rxvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT) &&
- !(rxvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT))
- {
- t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
- }
-
- t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
-}
-
-static_always_inline uword
-vhost_user_device_class_packed (vlib_main_t *vm, vlib_node_runtime_t *node,
- vlib_frame_t *frame, vhost_user_intf_t *vui,
- vhost_user_vring_t *rxvq)
-{
- u32 *buffers = vlib_frame_vector_args (frame);
- u32 n_left = frame->n_vectors;
- vhost_user_main_t *vum = &vhost_user_main;
- u32 qid = rxvq->qid;
- u8 error;
- u32 thread_index = vm->thread_index;
- vhost_cpu_t *cpu = &vum->cpus[thread_index];
- u32 map_hint = 0;
- u8 retry = 8;
- u16 copy_len;
- u16 tx_headers_len;
- vring_packed_desc_t *desc_table;
- u32 or_flags;
- u16 desc_head, desc_index, desc_len;
- u16 n_descs_processed;
- u8 indirect, chained;
-
-retry:
- error = VHOST_USER_TX_FUNC_ERROR_NONE;
- tx_headers_len = 0;
- copy_len = 0;
- n_descs_processed = 0;
-
- while (n_left > 0)
- {
- vlib_buffer_t *b0, *current_b0;
- uword buffer_map_addr;
- u32 buffer_len;
- u16 bytes_left;
- u32 total_desc_len = 0;
- u16 n_entries = 0;
-
- indirect = 0;
- chained = 0;
- if (PREDICT_TRUE (n_left > 1))
- vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD);
-
- b0 = vlib_get_buffer (vm, buffers[0]);
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- cpu->current_trace = vlib_add_trace (vm, node, b0,
- sizeof (*cpu->current_trace));
- vhost_user_tx_trace_packed (cpu->current_trace, vui, qid / 2, b0,
- rxvq);
- }
-
- desc_table = rxvq->packed_desc;
- desc_head = desc_index = rxvq->last_avail_idx & rxvq->qsz_mask;
- if (PREDICT_FALSE (!vhost_user_packed_desc_available (rxvq, desc_head)))
- {
- error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
- goto done;
- }
- /*
- * Go deeper in case of indirect descriptor.
- * To test it, turn off mrg_rxbuf.
- */
- if (desc_table[desc_head].flags & VRING_DESC_F_INDIRECT)
- {
- indirect = 1;
- if (PREDICT_FALSE (desc_table[desc_head].len <
- sizeof (vring_packed_desc_t)))
- {
- error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
- goto done;
- }
- n_entries = desc_table[desc_head].len >> 4;
- desc_table = map_guest_mem (vui, desc_table[desc_index].addr,
- &map_hint);
- if (PREDICT_FALSE (desc_table == 0))
- {
- error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
- goto done;
- }
- desc_index = 0;
- }
- else if (rxvq->packed_desc[desc_head].flags & VRING_DESC_F_NEXT)
- chained = 1;
-
- desc_len = vui->virtio_net_hdr_sz;
- buffer_map_addr = desc_table[desc_index].addr;
- buffer_len = desc_table[desc_index].len;
-
- /* Get a header from the header array */
- virtio_net_hdr_mrg_rxbuf_t *hdr = &cpu->tx_headers[tx_headers_len];
- tx_headers_len++;
- hdr->hdr.flags = 0;
- hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
- hdr->num_buffers = 1;
-
- or_flags = (b0->flags & VNET_BUFFER_F_OFFLOAD);
-
- /* Guest supports csum offload and buffer requires checksum offload? */
- if (or_flags &&
- (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM)))
- vhost_user_handle_tx_offload (vui, b0, &hdr->hdr);
-
- /* Prepare a copy order executed later for the header */
- ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
- vhost_copy_t *cpy = &cpu->copy[copy_len];
- copy_len++;
- cpy->len = vui->virtio_net_hdr_sz;
- cpy->dst = buffer_map_addr;
- cpy->src = (uword) hdr;
-
- buffer_map_addr += vui->virtio_net_hdr_sz;
- buffer_len -= vui->virtio_net_hdr_sz;
- bytes_left = b0->current_length;
- current_b0 = b0;
- while (1)
- {
- if (buffer_len == 0)
- {
- /* Get new output */
- if (chained)
- {
- /*
- * Next one is chained
- * Test it with both indirect and mrg_rxbuf off
- */
- if (PREDICT_FALSE (!(desc_table[desc_index].flags &
- VRING_DESC_F_NEXT)))
- {
- /*
- * Last descriptor in chain.
- * Dequeue queued descriptors for this packet
- */
- vhost_user_dequeue_chained_descs (rxvq,
- &n_descs_processed);
- error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
- goto done;
- }
- vhost_user_advance_last_avail_idx (rxvq);
- desc_index = rxvq->last_avail_idx & rxvq->qsz_mask;
- n_descs_processed++;
- buffer_map_addr = desc_table[desc_index].addr;
- buffer_len = desc_table[desc_index].len;
- total_desc_len += desc_len;
- desc_len = 0;
- }
- else if (indirect)
- {
- /*
- * Indirect table
- * Test it with mrg_rxnuf off
- */
- if (PREDICT_TRUE (n_entries > 0))
- n_entries--;
- else
- {
- /* Dequeue queued descriptors for this packet */
- vhost_user_dequeue_chained_descs (rxvq,
- &n_descs_processed);
- error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
- goto done;
- }
- total_desc_len += desc_len;
- desc_index = (desc_index + 1) & rxvq->qsz_mask;
- buffer_map_addr = desc_table[desc_index].addr;
- buffer_len = desc_table[desc_index].len;
- desc_len = 0;
- }
- else if (vui->virtio_net_hdr_sz == 12)
- {
- /*
- * MRG is available
- * This is the default setting for the guest VM
- */
- virtio_net_hdr_mrg_rxbuf_t *hdr =
- &cpu->tx_headers[tx_headers_len - 1];
-
- desc_table[desc_index].len = desc_len;
- vhost_user_advance_last_avail_idx (rxvq);
- desc_head = desc_index =
- rxvq->last_avail_idx & rxvq->qsz_mask;
- hdr->num_buffers++;
- n_descs_processed++;
- desc_len = 0;
-
- if (PREDICT_FALSE (!vhost_user_packed_desc_available
- (rxvq, desc_index)))
- {
- /* Dequeue queued descriptors for this packet */
- vhost_user_dequeue_descs (rxvq, hdr,
- &n_descs_processed);
- error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
- goto done;
- }
-
- buffer_map_addr = desc_table[desc_index].addr;
- buffer_len = desc_table[desc_index].len;
- }
- else
- {
- error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG;
- goto done;
- }
- }
-
- ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
- vhost_copy_t *cpy = &cpu->copy[copy_len];
- copy_len++;
- cpy->len = bytes_left;
- cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len;
- cpy->dst = buffer_map_addr;
- cpy->src = (uword) vlib_buffer_get_current (current_b0) +
- current_b0->current_length - bytes_left;
-
- bytes_left -= cpy->len;
- buffer_len -= cpy->len;
- buffer_map_addr += cpy->len;
- desc_len += cpy->len;
-
- clib_prefetch_load (&rxvq->packed_desc);
-
- /* Check if vlib buffer has more data. If not, get more or break */
- if (PREDICT_TRUE (!bytes_left))
- {
- if (PREDICT_FALSE
- (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT))
- {
- current_b0 = vlib_get_buffer (vm, current_b0->next_buffer);
- bytes_left = current_b0->current_length;
- }
- else
- {
- /* End of packet */
- break;
- }
- }
- }
-
- /* Move from available to used ring */
- total_desc_len += desc_len;
- rxvq->packed_desc[desc_head].len = total_desc_len;
-
- vhost_user_advance_last_avail_table_idx (vui, rxvq, chained);
- n_descs_processed++;
-
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- cpu->current_trace->hdr = cpu->tx_headers[tx_headers_len - 1];
-
- n_left--;
-
- /*
- * Do the copy periodically to prevent
- * cpu->copy array overflow and corrupt memory
- */
- if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD) || chained)
- {
- if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
- &map_hint)))
- vlib_error_count (vm, node->node_index,
- VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
- copy_len = 0;
-
- /* give buffers back to driver */
- vhost_user_mark_desc_available (vm, vui, rxvq, &n_descs_processed,
- chained, frame, n_left);
- }
-
- buffers++;
- }
-
-done:
- if (PREDICT_TRUE (copy_len))
- {
- if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
- &map_hint)))
- vlib_error_count (vm, node->node_index,
- VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
-
- vhost_user_mark_desc_available (vm, vui, rxvq, &n_descs_processed,
- chained, frame, n_left);
- }
-
- /*
- * When n_left is set, error is always set to something too.
- * In case error is due to lack of remaining buffers, we go back up and
- * retry.
- * The idea is that it is better to waste some time on packets
- * that have been processed already than dropping them and get
- * more fresh packets with a good likelyhood that they will be dropped too.
- * This technique also gives more time to VM driver to pick-up packets.
- * In case the traffic flows from physical to virtual interfaces, this
- * technique will end-up leveraging the physical NIC buffer in order to
- * absorb the VM's CPU jitter.
- */
- if (n_left && (error == VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF) && retry)
- {
- retry--;
- goto retry;
- }
-
- clib_spinlock_unlock (&rxvq->vring_lock);
-
- if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE))
- {
- vlib_error_count (vm, node->node_index, error, n_left);
- vlib_increment_simple_counter
- (vnet_main.interface_main.sw_if_counters +
- VNET_INTERFACE_COUNTER_DROP, thread_index, vui->sw_if_index, n_left);
- }
-
- vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
- return frame->n_vectors;
-}
-
-VNET_DEVICE_CLASS_TX_FN (vhost_user_device_class) (vlib_main_t * vm,
- vlib_node_runtime_t *
- node, vlib_frame_t * frame)
-{
- u32 *buffers = vlib_frame_vector_args (frame);
- u32 n_left = frame->n_vectors;
- vhost_user_main_t *vum = &vhost_user_main;
- vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
- vhost_user_intf_t *vui =
- pool_elt_at_index (vum->vhost_user_interfaces, rd->dev_instance);
- u32 qid;
- vhost_user_vring_t *rxvq;
- u8 error;
- u32 thread_index = vm->thread_index;
- vhost_cpu_t *cpu = &vum->cpus[thread_index];
- u32 map_hint = 0;
- u8 retry = 8;
- u16 copy_len;
- u16 tx_headers_len;
- u32 or_flags;
- vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame);
-
- if (PREDICT_FALSE (!vui->admin_up))
- {
- error = VHOST_USER_TX_FUNC_ERROR_DOWN;
- goto done3;
- }
-
- if (PREDICT_FALSE (!vui->is_ready))
- {
- error = VHOST_USER_TX_FUNC_ERROR_NOT_READY;
- goto done3;
- }
-
- qid = VHOST_VRING_IDX_RX (tf->queue_id);
- rxvq = &vui->vrings[qid];
- ASSERT (tf->queue_id == rxvq->qid);
-
- if (PREDICT_FALSE (rxvq->avail == 0))
- {
- error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
- goto done3;
- }
- if (tf->shared_queue)
- clib_spinlock_lock (&rxvq->vring_lock);
-
- if (vhost_user_is_packed_ring_supported (vui))
- return (vhost_user_device_class_packed (vm, node, frame, vui, rxvq));
-
-retry:
- error = VHOST_USER_TX_FUNC_ERROR_NONE;
- tx_headers_len = 0;
- copy_len = 0;
- while (n_left > 0)
- {
- vlib_buffer_t *b0, *current_b0;
- u16 desc_head, desc_index, desc_len;
- vring_desc_t *desc_table;
- uword buffer_map_addr;
- u32 buffer_len;
- u16 bytes_left;
-
- if (PREDICT_TRUE (n_left > 1))
- vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD);
-
- b0 = vlib_get_buffer (vm, buffers[0]);
-
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- cpu->current_trace = vlib_add_trace (vm, node, b0,
- sizeof (*cpu->current_trace));
- vhost_user_tx_trace (cpu->current_trace, vui, qid / 2, b0, rxvq);
- }
-
- if (PREDICT_FALSE (rxvq->last_avail_idx == rxvq->avail->idx))
- {
- error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
- goto done;
- }
-
- desc_table = rxvq->desc;
- desc_head = desc_index =
- rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask];
-
- /* Go deeper in case of indirect descriptor
- * I don't know of any driver providing indirect for RX. */
- if (PREDICT_FALSE (rxvq->desc[desc_head].flags & VRING_DESC_F_INDIRECT))
- {
- if (PREDICT_FALSE
- (rxvq->desc[desc_head].len < sizeof (vring_desc_t)))
- {
- error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
- goto done;
- }
- if (PREDICT_FALSE
- (!(desc_table =
- map_guest_mem (vui, rxvq->desc[desc_index].addr,
- &map_hint))))
- {
- error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
- goto done;
- }
- desc_index = 0;
- }
-
- desc_len = vui->virtio_net_hdr_sz;
- buffer_map_addr = desc_table[desc_index].addr;
- buffer_len = desc_table[desc_index].len;
-
- {
- // Get a header from the header array
- virtio_net_hdr_mrg_rxbuf_t *hdr = &cpu->tx_headers[tx_headers_len];
- tx_headers_len++;
- hdr->hdr.flags = 0;
- hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
- hdr->num_buffers = 1; //This is local, no need to check
-
- or_flags = (b0->flags & VNET_BUFFER_F_OFFLOAD);
-
- /* Guest supports csum offload and buffer requires checksum offload? */
- if (or_flags
- && (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM)))
- vhost_user_handle_tx_offload (vui, b0, &hdr->hdr);
-
- // Prepare a copy order executed later for the header
- ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
- vhost_copy_t *cpy = &cpu->copy[copy_len];
- copy_len++;
- cpy->len = vui->virtio_net_hdr_sz;
- cpy->dst = buffer_map_addr;
- cpy->src = (uword) hdr;
- }
-
- buffer_map_addr += vui->virtio_net_hdr_sz;
- buffer_len -= vui->virtio_net_hdr_sz;
- bytes_left = b0->current_length;
- current_b0 = b0;
- while (1)
- {
- if (buffer_len == 0)
- { //Get new output
- if (desc_table[desc_index].flags & VRING_DESC_F_NEXT)
- {
- //Next one is chained
- desc_index = desc_table[desc_index].next;
- buffer_map_addr = desc_table[desc_index].addr;
- buffer_len = desc_table[desc_index].len;
- }
- else if (vui->virtio_net_hdr_sz == 12) //MRG is available
- {
- virtio_net_hdr_mrg_rxbuf_t *hdr =
- &cpu->tx_headers[tx_headers_len - 1];
-
- //Move from available to used buffer
- rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id =
- desc_head;
- rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len =
- desc_len;
- vhost_user_log_dirty_ring (vui, rxvq,
- ring[rxvq->last_used_idx &
- rxvq->qsz_mask]);
-
- rxvq->last_avail_idx++;
- rxvq->last_used_idx++;
- hdr->num_buffers++;
- desc_len = 0;
-
- if (PREDICT_FALSE
- (rxvq->last_avail_idx == rxvq->avail->idx))
- {
- //Dequeue queued descriptors for this packet
- rxvq->last_used_idx -= hdr->num_buffers - 1;
- rxvq->last_avail_idx -= hdr->num_buffers - 1;
- error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
- goto done;
- }
-
- desc_table = rxvq->desc;
- desc_head = desc_index =
- rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask];
- if (PREDICT_FALSE
- (rxvq->desc[desc_head].flags & VRING_DESC_F_INDIRECT))
- {
- //It is seriously unlikely that a driver will put indirect descriptor
- //after non-indirect descriptor.
- if (PREDICT_FALSE
- (rxvq->desc[desc_head].len < sizeof (vring_desc_t)))
- {
- error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
- goto done;
- }
- if (PREDICT_FALSE
- (!(desc_table =
- map_guest_mem (vui,
- rxvq->desc[desc_index].addr,
- &map_hint))))
- {
- error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
- goto done;
- }
- desc_index = 0;
- }
- buffer_map_addr = desc_table[desc_index].addr;
- buffer_len = desc_table[desc_index].len;
- }
- else
- {
- error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG;
- goto done;
- }
- }
-
- {
- ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
- vhost_copy_t *cpy = &cpu->copy[copy_len];
- copy_len++;
- cpy->len = bytes_left;
- cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len;
- cpy->dst = buffer_map_addr;
- cpy->src = (uword) vlib_buffer_get_current (current_b0) +
- current_b0->current_length - bytes_left;
-
- bytes_left -= cpy->len;
- buffer_len -= cpy->len;
- buffer_map_addr += cpy->len;
- desc_len += cpy->len;
-
- clib_prefetch_load (&rxvq->desc);
- }
-
- // Check if vlib buffer has more data. If not, get more or break.
- if (PREDICT_TRUE (!bytes_left))
- {
- if (PREDICT_FALSE
- (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT))
- {
- current_b0 = vlib_get_buffer (vm, current_b0->next_buffer);
- bytes_left = current_b0->current_length;
- }
- else
- {
- //End of packet
- break;
- }
- }
- }
-
- //Move from available to used ring
- rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id = desc_head;
- rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len = desc_len;
- vhost_user_log_dirty_ring (vui, rxvq,
- ring[rxvq->last_used_idx & rxvq->qsz_mask]);
- rxvq->last_avail_idx++;
- rxvq->last_used_idx++;
-
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- cpu->current_trace->hdr = cpu->tx_headers[tx_headers_len - 1];
- }
-
- n_left--; //At the end for error counting when 'goto done' is invoked
-
- /*
- * Do the copy periodically to prevent
- * cpu->copy array overflow and corrupt memory
- */
- if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD))
- {
- if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
- &map_hint)))
- {
- vlib_error_count (vm, node->node_index,
- VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
- }
- copy_len = 0;
-
- /* give buffers back to driver */
- CLIB_MEMORY_BARRIER ();
- rxvq->used->idx = rxvq->last_used_idx;
- vhost_user_log_dirty_ring (vui, rxvq, idx);
- }
- buffers++;
- }
-
-done:
- //Do the memory copies
- if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
- &map_hint)))
- {
- vlib_error_count (vm, node->node_index,
- VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
- }
-
- CLIB_MEMORY_BARRIER ();
- rxvq->used->idx = rxvq->last_used_idx;
- vhost_user_log_dirty_ring (vui, rxvq, idx);
-
- /*
- * When n_left is set, error is always set to something too.
- * In case error is due to lack of remaining buffers, we go back up and
- * retry.
- * The idea is that it is better to waste some time on packets
- * that have been processed already than dropping them and get
- * more fresh packets with a good likelihood that they will be dropped too.
- * This technique also gives more time to VM driver to pick-up packets.
- * In case the traffic flows from physical to virtual interfaces, this
- * technique will end-up leveraging the physical NIC buffer in order to
- * absorb the VM's CPU jitter.
- */
- if (n_left && (error == VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF) && retry)
- {
- retry--;
- goto retry;
- }
-
- /* interrupt (call) handling */
- if ((rxvq->callfd_idx != ~0) &&
- !(rxvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
- {
- rxvq->n_since_last_int += frame->n_vectors - n_left;
-
- if (rxvq->n_since_last_int > vum->coalesce_frames)
- vhost_user_send_call (vm, vui, rxvq);
- }
-
- clib_spinlock_unlock (&rxvq->vring_lock);
-
-done3:
- if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE))
- {
- vlib_error_count (vm, node->node_index, error, n_left);
- vlib_increment_simple_counter
- (vnet_main.interface_main.sw_if_counters
- + VNET_INTERFACE_COUNTER_DROP,
- thread_index, vui->sw_if_index, n_left);
- }
-
- vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
- return frame->n_vectors;
-}
-
-static __clib_unused clib_error_t *
-vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index,
- u32 qid, vnet_hw_if_rx_mode mode)
-{
- vlib_main_t *vm = vnm->vlib_main;
- vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui =
- pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
- vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
- vhost_cpu_t *cpu;
-
- if (mode == txvq->mode)
- return 0;
-
- if ((mode != VNET_HW_IF_RX_MODE_POLLING) &&
- (mode != VNET_HW_IF_RX_MODE_ADAPTIVE) &&
- (mode != VNET_HW_IF_RX_MODE_INTERRUPT))
- {
- vu_log_err (vui, "unhandled mode %d changed for if %d queue %d", mode,
- hw_if_index, qid);
- return clib_error_return (0, "unsupported");
- }
-
- if (txvq->thread_index == ~0)
- return clib_error_return (0, "Queue initialization is not finished yet");
-
- cpu = vec_elt_at_index (vum->cpus, txvq->thread_index);
- if ((mode == VNET_HW_IF_RX_MODE_INTERRUPT) ||
- (mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
- {
- if (txvq->kickfd_idx == ~0)
- {
- // We cannot support interrupt mode if the driver opts out
- return clib_error_return (0, "Driver does not support interrupt");
- }
- if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING)
- {
- ASSERT (cpu->polling_q_count != 0);
- if (cpu->polling_q_count)
- cpu->polling_q_count--;
- vum->ifq_count++;
- // Start the timer if this is the first encounter on interrupt
- // interface/queue
- if ((vum->ifq_count == 1) &&
- ((vum->coalesce_time > 0.0) || (vum->coalesce_frames > 0)))
- vlib_process_signal_event (vm,
- vhost_user_send_interrupt_node.index,
- VHOST_USER_EVENT_START_TIMER, 0);
- }
- }
- else if (mode == VNET_HW_IF_RX_MODE_POLLING)
- {
- if (((txvq->mode == VNET_HW_IF_RX_MODE_INTERRUPT) ||
- (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)) && vum->ifq_count)
- {
- cpu->polling_q_count++;
- vum->ifq_count--;
- // Stop the timer if there is no more interrupt interface/queue
- if (vum->ifq_count == 0)
- vlib_process_signal_event (vm,
- vhost_user_send_interrupt_node.index,
- VHOST_USER_EVENT_STOP_TIMER, 0);
- }
- }
-
- txvq->mode = mode;
- vhost_user_set_operation_mode (vui, txvq);
-
- return 0;
-}
-
-static __clib_unused clib_error_t *
-vhost_user_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
- u32 flags)
-{
- vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui =
- pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
- u8 link_old, link_new;
-
- link_old = vui_is_link_up (vui);
-
- vui->admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
-
- link_new = vui_is_link_up (vui);
-
- if (link_old != link_new)
- vnet_hw_interface_set_flags (vnm, vui->hw_if_index, link_new ?
- VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
-
- return /* no error */ 0;
-}
-
-/* *INDENT-OFF* */
-VNET_DEVICE_CLASS (vhost_user_device_class) = {
- .name = "vhost-user",
- .tx_function_n_errors = VHOST_USER_TX_FUNC_N_ERROR,
- .tx_function_error_strings = vhost_user_tx_func_error_strings,
- .format_device_name = format_vhost_user_interface_name,
- .name_renumber = vhost_user_name_renumber,
- .admin_up_down_function = vhost_user_interface_admin_up_down,
- .rx_mode_change_function = vhost_user_interface_rx_mode_change,
- .format_tx_trace = format_vhost_trace,
-};
-
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/virtio/virtio.api b/src/vnet/devices/virtio/virtio.api
index bbe2341a001..a11492ec258 100644
--- a/src/vnet/devices/virtio/virtio.api
+++ b/src/vnet/devices/virtio/virtio.api
@@ -56,7 +56,7 @@ define virtio_pci_create_reply
vl_api_interface_index_t sw_if_index;
};
-enum virtio_flags {
+enumflag virtio_flags {
VIRTIO_API_FLAG_GSO = 1, /* enable gso on the interface */
VIRTIO_API_FLAG_CSUM_OFFLOAD = 2, /* enable checksum offload without gso on the interface */
VIRTIO_API_FLAG_GRO_COALESCE = 4, /* enable packet coalescing on tx side, provided gso enabled */
diff --git a/src/vnet/devices/virtio/virtio.c b/src/vnet/devices/virtio/virtio.c
index fe808f12da4..d2302fa1dc4 100644
--- a/src/vnet/devices/virtio/virtio.c
+++ b/src/vnet/devices/virtio/virtio.c
@@ -19,7 +19,11 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <net/if.h>
+#ifdef __linux__
#include <linux/if_tun.h>
+#elif __FreeBSD__
+#include <net/if_tun.h>
+#endif /* __linux__ */
#include <sys/ioctl.h>
#include <sys/eventfd.h>
@@ -33,6 +37,7 @@
#include <vnet/devices/virtio/virtio_inline.h>
#include <vnet/devices/virtio/pci.h>
#include <vnet/interface/rx_queue_funcs.h>
+#include <vnet/interface/tx_queue_funcs.h>
virtio_main_t virtio_main;
@@ -59,7 +64,7 @@ call_read_ready (clib_file_t * uf)
clib_error_t *
virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, u16 sz)
{
- virtio_vring_t *vring;
+ vnet_virtio_vring_t *vring;
int i;
if (!is_pow2 (sz))
@@ -73,12 +78,10 @@ virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, u16 sz)
if (idx % 2)
{
- vlib_thread_main_t *thm = vlib_get_thread_main ();
vec_validate_aligned (vif->txq_vrings, TX_QUEUE_ACCESS (idx),
CLIB_CACHE_LINE_BYTES);
vring = vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS (idx));
- if (thm->n_vlib_mains > vif->num_txqs)
- clib_spinlock_init (&vring->lockp);
+ clib_spinlock_init (&vring->lockp);
}
else
{
@@ -86,19 +89,20 @@ virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, u16 sz)
CLIB_CACHE_LINE_BYTES);
vring = vec_elt_at_index (vif->rxq_vrings, RX_QUEUE_ACCESS (idx));
}
- i = sizeof (vring_desc_t) * sz;
+ i = sizeof (vnet_virtio_vring_desc_t) * sz;
i = round_pow2 (i, CLIB_CACHE_LINE_BYTES);
vring->desc = clib_mem_alloc_aligned (i, CLIB_CACHE_LINE_BYTES);
clib_memset (vring->desc, 0, i);
- i = sizeof (vring_avail_t) + sz * sizeof (vring->avail->ring[0]);
+ i = sizeof (vnet_virtio_vring_avail_t) + sz * sizeof (vring->avail->ring[0]);
i = round_pow2 (i, CLIB_CACHE_LINE_BYTES);
vring->avail = clib_mem_alloc_aligned (i, CLIB_CACHE_LINE_BYTES);
clib_memset (vring->avail, 0, i);
// tell kernel that we don't need interrupt
vring->avail->flags = VRING_AVAIL_F_NO_INTERRUPT;
- i = sizeof (vring_used_t) + sz * sizeof (vring_used_elem_t);
+ i = sizeof (vnet_virtio_vring_used_t) +
+ sz * sizeof (vnet_virtio_vring_used_elem_t);
i = round_pow2 (i, CLIB_CACHE_LINE_BYTES);
vring->used = clib_mem_alloc_aligned (i, CLIB_CACHE_LINE_BYTES);
clib_memset (vring->used, 0, i);
@@ -116,20 +120,21 @@ virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, u16 sz)
else
vring->call_fd = eventfd (0, EFD_NONBLOCK | EFD_CLOEXEC);
- vring->size = sz;
+ vring->total_packets = 0;
+ vring->queue_size = sz;
vring->kick_fd = eventfd (0, EFD_NONBLOCK | EFD_CLOEXEC);
virtio_log_debug (vif, "vring %u size %u call_fd %d kick_fd %d", idx,
- vring->size, vring->call_fd, vring->kick_fd);
+ vring->queue_size, vring->call_fd, vring->kick_fd);
return 0;
}
inline void
-virtio_free_buffers (vlib_main_t * vm, virtio_vring_t * vring)
+virtio_free_buffers (vlib_main_t *vm, vnet_virtio_vring_t *vring)
{
u16 used = vring->desc_in_use;
u16 last = vring->last_used_idx;
- u16 mask = vring->size - 1;
+ u16 mask = vring->queue_size - 1;
while (used)
{
@@ -142,7 +147,7 @@ virtio_free_buffers (vlib_main_t * vm, virtio_vring_t * vring)
clib_error_t *
virtio_vring_free_rx (vlib_main_t * vm, virtio_if_t * vif, u32 idx)
{
- virtio_vring_t *vring =
+ vnet_virtio_vring_t *vring =
vec_elt_at_index (vif->rxq_vrings, RX_QUEUE_ACCESS (idx));
clib_file_del_by_index (&file_main, vring->call_file_index);
@@ -164,7 +169,7 @@ virtio_vring_free_rx (vlib_main_t * vm, virtio_if_t * vif, u32 idx)
clib_error_t *
virtio_vring_free_tx (vlib_main_t * vm, virtio_if_t * vif, u32 idx)
{
- virtio_vring_t *vring =
+ vnet_virtio_vring_t *vring =
vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS (idx));
close (vring->kick_fd);
@@ -189,7 +194,7 @@ virtio_set_packet_coalesce (virtio_if_t * vif)
{
vnet_main_t *vnm = vnet_get_main ();
vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vif->hw_if_index);
- virtio_vring_t *vring;
+ vnet_virtio_vring_t *vring;
vif->packet_coalesce = 1;
vec_foreach (vring, vif->txq_vrings)
{
@@ -204,9 +209,8 @@ virtio_set_packet_buffering (virtio_if_t * vif, u16 buffering_size)
{
vnet_main_t *vnm = vnet_get_main ();
vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vif->hw_if_index);
- virtio_vring_t *vring;
+ vnet_virtio_vring_t *vring;
clib_error_t *error = 0;
- vif->packet_buffering = 1;
vec_foreach (vring, vif->txq_vrings)
{
@@ -222,7 +226,8 @@ virtio_set_packet_buffering (virtio_if_t * vif, u16 buffering_size)
}
static void
-virtio_vring_fill (vlib_main_t *vm, virtio_if_t *vif, virtio_vring_t *vring)
+virtio_vring_fill (vlib_main_t *vm, virtio_if_t *vif,
+ vnet_virtio_vring_t *vring)
{
if (vif->is_packed)
virtio_refill_vring_packed (vm, vif, vif->type, vring,
@@ -238,7 +243,7 @@ void
virtio_vring_set_rx_queues (vlib_main_t *vm, virtio_if_t *vif)
{
vnet_main_t *vnm = vnet_get_main ();
- virtio_vring_t *vring;
+ vnet_virtio_vring_t *vring;
u32 i = 0;
vnet_hw_if_set_input_node (vnm, vif->hw_if_index, virtio_input_node.index);
@@ -284,14 +289,42 @@ virtio_vring_set_rx_queues (vlib_main_t *vm, virtio_if_t *vif)
vnet_hw_if_update_runtime_data (vnm, vif->hw_if_index);
}
+void
+virtio_vring_set_tx_queues (vlib_main_t *vm, virtio_if_t *vif)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_virtio_vring_t *vring;
+
+ vec_foreach (vring, vif->txq_vrings)
+ {
+ vring->queue_index = vnet_hw_if_register_tx_queue (
+ vnm, vif->hw_if_index, TX_QUEUE_ACCESS (vring->queue_id));
+ }
+
+ if (vif->num_txqs == 0)
+ {
+ virtio_log_error (vif, "Interface %U has 0 txq",
+ format_vnet_hw_if_index_name, vnm, vif->hw_if_index);
+ return;
+ }
+
+ for (u32 j = 0; j < vlib_get_n_threads (); j++)
+ {
+ u32 qi = vif->txq_vrings[j % vif->num_txqs].queue_index;
+ vnet_hw_if_tx_queue_assign_thread (vnm, qi, j);
+ }
+
+ vnet_hw_if_update_runtime_data (vnm, vif->hw_if_index);
+}
+
inline void
virtio_set_net_hdr_size (virtio_if_t * vif)
{
if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF) ||
vif->features & VIRTIO_FEATURE (VIRTIO_F_VERSION_1))
- vif->virtio_net_hdr_sz = sizeof (virtio_net_hdr_v1_t);
+ vif->virtio_net_hdr_sz = sizeof (vnet_virtio_net_hdr_v1_t);
else
- vif->virtio_net_hdr_sz = sizeof (virtio_net_hdr_t);
+ vif->virtio_net_hdr_sz = sizeof (vnet_virtio_net_hdr_t);
}
inline void
@@ -302,7 +335,7 @@ virtio_show (vlib_main_t *vm, u32 *hw_if_indices, u8 show_descr,
virtio_if_t *vif;
vnet_main_t *vnm = &vnet_main;
virtio_main_t *mm = &virtio_main;
- virtio_vring_t *vring;
+ vnet_virtio_vring_t *vring;
struct feat_struct
{
u8 bit;
@@ -415,10 +448,10 @@ virtio_show (vlib_main_t *vm, u32 *hw_if_indices, u8 show_descr,
{
vring = vec_elt_at_index (vif->rxq_vrings, i);
vlib_cli_output (vm, " Virtqueue (RX) %d", vring->queue_id);
- vlib_cli_output (vm,
- " qsz %d, last_used_idx %d, desc_next %d, desc_in_use %d",
- vring->size, vring->last_used_idx, vring->desc_next,
- vring->desc_in_use);
+ vlib_cli_output (
+ vm, " qsz %d, last_used_idx %d, desc_next %d, desc_in_use %d",
+ vring->queue_size, vring->last_used_idx, vring->desc_next,
+ vring->desc_in_use);
if (vif->is_packed)
{
vlib_cli_output (vm,
@@ -449,11 +482,12 @@ virtio_show (vlib_main_t *vm, u32 *hw_if_indices, u8 show_descr,
" id addr len flags next/id user_addr\n");
vlib_cli_output (vm,
" ===== ================== ===== ====== ======= ==================\n");
- for (j = 0; j < vring->size; j++)
+ for (j = 0; j < vring->queue_size; j++)
{
if (vif->is_packed)
{
- vring_packed_desc_t *desc = &vring->packed_desc[j];
+ vnet_virtio_vring_packed_desc_t *desc =
+ &vring->packed_desc[j];
vlib_cli_output (vm,
" %-5d 0x%016lx %-5d 0x%04x %-8d 0x%016lx\n",
j, desc->addr,
@@ -462,7 +496,7 @@ virtio_show (vlib_main_t *vm, u32 *hw_if_indices, u8 show_descr,
}
else
{
- vring_desc_t *desc = &vring->desc[j];
+ vnet_virtio_vring_desc_t *desc = &vring->desc[j];
vlib_cli_output (vm,
" %-5d 0x%016lx %-5d 0x%04x %-8d 0x%016lx\n",
j, desc->addr,
@@ -476,10 +510,10 @@ virtio_show (vlib_main_t *vm, u32 *hw_if_indices, u8 show_descr,
{
vring = vec_elt_at_index (vif->txq_vrings, i);
vlib_cli_output (vm, " Virtqueue (TX) %d", vring->queue_id);
- vlib_cli_output (vm,
- " qsz %d, last_used_idx %d, desc_next %d, desc_in_use %d",
- vring->size, vring->last_used_idx, vring->desc_next,
- vring->desc_in_use);
+ vlib_cli_output (
+ vm, " qsz %d, last_used_idx %d, desc_next %d, desc_in_use %d",
+ vring->queue_size, vring->last_used_idx, vring->desc_next,
+ vring->desc_in_use);
if (vif->is_packed)
{
vlib_cli_output (vm,
@@ -520,11 +554,12 @@ virtio_show (vlib_main_t *vm, u32 *hw_if_indices, u8 show_descr,
" id addr len flags next/id user_addr\n");
vlib_cli_output (vm,
" ===== ================== ===== ====== ======== ==================\n");
- for (j = 0; j < vring->size; j++)
+ for (j = 0; j < vring->queue_size; j++)
{
if (vif->is_packed)
{
- vring_packed_desc_t *desc = &vring->packed_desc[j];
+ vnet_virtio_vring_packed_desc_t *desc =
+ &vring->packed_desc[j];
vlib_cli_output (vm,
" %-5d 0x%016lx %-5d 0x%04x %-8d 0x%016lx\n",
j, desc->addr,
@@ -533,7 +568,7 @@ virtio_show (vlib_main_t *vm, u32 *hw_if_indices, u8 show_descr,
}
else
{
- vring_desc_t *desc = &vring->desc[j];
+ vnet_virtio_vring_desc_t *desc = &vring->desc[j];
vlib_cli_output (vm,
" %-5d 0x%016lx %-5d 0x%04x %-8d 0x%016lx\n",
j, desc->addr,
@@ -548,10 +583,10 @@ virtio_show (vlib_main_t *vm, u32 *hw_if_indices, u8 show_descr,
{
vring = vif->cxq_vring;
vlib_cli_output (vm, " Virtqueue (CTRL) %d", vring->queue_id);
- vlib_cli_output (vm,
- " qsz %d, last_used_idx %d, desc_next %d, desc_in_use %d",
- vring->size, vring->last_used_idx,
- vring->desc_next, vring->desc_in_use);
+ vlib_cli_output (
+ vm, " qsz %d, last_used_idx %d, desc_next %d, desc_in_use %d",
+ vring->queue_size, vring->last_used_idx, vring->desc_next,
+ vring->desc_in_use);
if (vif->is_packed)
{
vlib_cli_output (vm,
@@ -579,11 +614,12 @@ virtio_show (vlib_main_t *vm, u32 *hw_if_indices, u8 show_descr,
" id addr len flags next/id user_addr\n");
vlib_cli_output (vm,
" ===== ================== ===== ====== ======== ==================\n");
- for (j = 0; j < vring->size; j++)
+ for (j = 0; j < vring->queue_size; j++)
{
if (vif->is_packed)
{
- vring_packed_desc_t *desc = &vring->packed_desc[j];
+ vnet_virtio_vring_packed_desc_t *desc =
+ &vring->packed_desc[j];
vlib_cli_output (vm,
" %-5d 0x%016lx %-5d 0x%04x %-8d 0x%016lx\n",
j, desc->addr,
@@ -592,7 +628,7 @@ virtio_show (vlib_main_t *vm, u32 *hw_if_indices, u8 show_descr,
}
else
{
- vring_desc_t *desc = &vring->desc[j];
+ vnet_virtio_vring_desc_t *desc = &vring->desc[j];
vlib_cli_output (vm,
" %-5d 0x%016lx %-5d 0x%04x %-8d 0x%016lx\n",
j, desc->addr,
diff --git a/src/vnet/devices/virtio/virtio.h b/src/vnet/devices/virtio/virtio.h
index 87ecfcbdf71..431b1d25c26 100644
--- a/src/vnet/devices/virtio/virtio.h
+++ b/src/vnet/devices/virtio/virtio.h
@@ -22,6 +22,7 @@
#include <vnet/devices/virtio/vhost_std.h>
#include <vnet/devices/virtio/virtio_buffering.h>
#include <vnet/gso/gro.h>
+#include <vnet/interface.h>
#define foreach_virtio_if_flag \
_(0, ADMIN_UP, "admin-up") \
@@ -68,19 +69,19 @@ typedef struct
{
struct
{
- vring_desc_t *desc;
- vring_used_t *used;
- vring_avail_t *avail;
+ vnet_virtio_vring_desc_t *desc;
+ vnet_virtio_vring_used_t *used;
+ vnet_virtio_vring_avail_t *avail;
};
struct
{
- vring_packed_desc_t *packed_desc;
- vring_desc_event_t *driver_event;
- vring_desc_event_t *device_event;
+ vnet_virtio_vring_packed_desc_t *packed_desc;
+ vnet_virtio_vring_desc_event_t *driver_event;
+ vnet_virtio_vring_desc_event_t *device_event;
};
};
u32 *buffers;
- u16 size;
+ u16 queue_size;
u16 queue_id;
u32 queue_index;
u16 desc_in_use;
@@ -103,12 +104,14 @@ typedef struct
};
};
#define VRING_TX_OUT_OF_ORDER 1
+#define VRING_TX_SCHEDULED 2
u16 flags;
u8 buffer_pool_index;
vnet_hw_if_rx_mode mode;
virtio_vring_buffering_t *buffering;
gro_flow_table_t *flow_table;
-} virtio_vring_t;
+ u64 total_packets;
+} vnet_virtio_vring_t;
typedef union
{
@@ -133,8 +136,8 @@ typedef struct
u32 per_interface_next_index;
u16 num_rxqs;
u16 num_txqs;
- virtio_vring_t *rxq_vrings;
- virtio_vring_t *txq_vrings;
+ vnet_virtio_vring_t *rxq_vrings;
+ vnet_virtio_vring_t *txq_vrings;
int gso_enabled;
int csum_offload_enabled;
union
@@ -192,7 +195,7 @@ typedef struct
struct /* native virtio */
{
void *bar;
- virtio_vring_t *cxq_vring;
+ vnet_virtio_vring_t *cxq_vring;
pci_addr_t pci_addr;
u32 bar_id;
u32 notify_off_multiplier;
@@ -213,7 +216,7 @@ typedef struct
typedef struct
{
- u32 interrupt_queues_count;
+ u32 gro_or_buffering_if_count;
/* logging */
vlib_log_class_t log_default;
@@ -224,7 +227,6 @@ typedef struct
extern virtio_main_t virtio_main;
extern vnet_device_class_t virtio_device_class;
extern vlib_node_registration_t virtio_input_node;
-extern vlib_node_registration_t virtio_send_interrupt_node;
clib_error_t *virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx,
u16 sz);
@@ -233,7 +235,8 @@ clib_error_t *virtio_vring_free_rx (vlib_main_t * vm, virtio_if_t * vif,
clib_error_t *virtio_vring_free_tx (vlib_main_t * vm, virtio_if_t * vif,
u32 idx);
void virtio_vring_set_rx_queues (vlib_main_t *vm, virtio_if_t *vif);
-extern void virtio_free_buffers (vlib_main_t * vm, virtio_vring_t * vring);
+void virtio_vring_set_tx_queues (vlib_main_t *vm, virtio_if_t *vif);
+extern void virtio_free_buffers (vlib_main_t *vm, vnet_virtio_vring_t *vring);
extern void virtio_set_net_hdr_size (virtio_if_t * vif);
extern void virtio_show (vlib_main_t *vm, u32 *hw_if_indices, u8 show_descr,
virtio_if_type_t type);
@@ -245,11 +248,14 @@ extern void virtio_pci_legacy_notify_queue (vlib_main_t * vm,
extern void virtio_pci_modern_notify_queue (vlib_main_t * vm,
virtio_if_t * vif, u16 queue_id,
u16 queue_notify_offset);
+extern void virtio_pre_input_node_enable (vlib_main_t *vm, virtio_if_t *vif);
+extern void virtio_pre_input_node_disable (vlib_main_t *vm, virtio_if_t *vif);
+
format_function_t format_virtio_device_name;
format_function_t format_virtio_log_name;
static_always_inline void
-virtio_kick (vlib_main_t * vm, virtio_vring_t * vring, virtio_if_t * vif)
+virtio_kick (vlib_main_t *vm, vnet_virtio_vring_t *vring, virtio_if_t *vif)
{
if (vif->type == VIRTIO_IF_TYPE_PCI)
{
@@ -270,6 +276,56 @@ virtio_kick (vlib_main_t * vm, virtio_vring_t * vring, virtio_if_t * vif)
}
}
+static_always_inline u8
+virtio_txq_is_scheduled (vnet_virtio_vring_t *vring)
+{
+ if (vring)
+ return (vring->flags & VRING_TX_SCHEDULED);
+ return 1;
+}
+
+static_always_inline void
+virtio_txq_set_scheduled (vnet_virtio_vring_t *vring)
+{
+ if (vring)
+ vring->flags |= VRING_TX_SCHEDULED;
+}
+
+static_always_inline void
+virtio_txq_clear_scheduled (vnet_virtio_vring_t *vring)
+{
+ if (vring)
+ vring->flags &= ~VRING_TX_SCHEDULED;
+}
+
+static_always_inline void
+vnet_virtio_vring_init (vnet_virtio_vring_t *vring, u16 queue_size, void *p,
+ u32 align)
+{
+ vring->queue_size = queue_size;
+ vring->desc = p;
+ vring->avail =
+ (vnet_virtio_vring_avail_t *) ((char *) p +
+ queue_size *
+ sizeof (vnet_virtio_vring_desc_t));
+ vring->used =
+ (vnet_virtio_vring_used_t
+ *) ((char *) p + ((sizeof (vnet_virtio_vring_desc_t) * queue_size +
+ sizeof (u16) * (3 + queue_size) + align - 1) &
+ ~(align - 1)));
+ vring->avail->flags = VIRTIO_RING_FLAG_MASK_INT;
+}
+
+static_always_inline u16
+vnet_virtio_vring_size (u16 queue_size, u32 align)
+{
+ return ((sizeof (vnet_virtio_vring_desc_t) * queue_size +
+ sizeof (u16) * (3 + queue_size) + align - 1) &
+ ~(align - 1)) +
+ sizeof (u16) * 3 +
+ sizeof (vnet_virtio_vring_used_elem_t) * queue_size;
+}
+
#define virtio_log_debug(vif, f, ...) \
{ \
vlib_log(VLIB_LOG_LEVEL_DEBUG, virtio_main.log_default, \
diff --git a/src/vnet/devices/virtio/virtio_api.c b/src/vnet/devices/virtio/virtio_api.c
index 11514c75c59..3197a2fab6d 100644
--- a/src/vnet/devices/virtio/virtio_api.c
+++ b/src/vnet/devices/virtio/virtio_api.c
@@ -193,10 +193,10 @@ virtio_pci_send_sw_interface_details (vpe_api_main_t * am,
pci_address_encode ((vlib_pci_addr_t *) & vif->pci_addr.as_u32,
&mp->pci_addr);
mp->sw_if_index = htonl (vif->sw_if_index);
- virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, 0);
- mp->rx_ring_sz = htons (vring->size);
+ vnet_virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, 0);
+ mp->rx_ring_sz = htons (vring->queue_size);
vring = vec_elt_at_index (vif->txq_vrings, 0);
- mp->tx_ring_sz = htons (vring->size);
+ mp->tx_ring_sz = htons (vring->queue_size);
clib_memcpy (mp->mac_addr, vif->mac_addr, 6);
mp->features = clib_host_to_net_u64 (vif->features);
diff --git a/src/vnet/devices/virtio/virtio_buffering.h b/src/vnet/devices/virtio/virtio_buffering.h
index ef3d9d27652..6f13a1f5c36 100644
--- a/src/vnet/devices/virtio/virtio_buffering.h
+++ b/src/vnet/devices/virtio/virtio_buffering.h
@@ -18,6 +18,8 @@
#ifndef _VNET_DEVICES_VIRTIO_VIRTIO_BUFFERING_H_
#define _VNET_DEVICES_VIRTIO_VIRTIO_BUFFERING_H_
+#include <vnet/interface.h>
+
#define VIRTIO_BUFFERING_DEFAULT_SIZE 1024
#define VIRTIO_BUFFERING_TIMEOUT 1e-5
@@ -205,15 +207,18 @@ virtio_vring_buffering_read_from_back (virtio_vring_buffering_t * buffering)
}
static_always_inline void
-virtio_vring_buffering_schedule_node_on_dispatcher (vlib_main_t * vm,
- virtio_vring_buffering_t *
- buffering)
+virtio_vring_buffering_schedule_node_on_dispatcher (
+ vlib_main_t *vm, vnet_hw_if_tx_queue_t *txq,
+ virtio_vring_buffering_t *buffering)
{
if (buffering && virtio_vring_buffering_is_timeout (vm, buffering)
&& virtio_vring_n_buffers (buffering))
{
vlib_frame_t *f = vlib_get_frame_to_node (vm, buffering->node_index);
+ vnet_hw_if_tx_frame_t *ft = vlib_frame_scalar_args (f);
u32 *f_to = vlib_frame_vector_args (f);
+ ft->shared_queue = txq->shared_queue;
+ ft->queue_id = txq->queue_id;
f_to[f->n_vectors] = virtio_vring_buffering_read_from_back (buffering);
f->n_vectors++;
vlib_put_frame_to_node (vm, buffering->node_index, f);
diff --git a/src/vnet/devices/virtio/virtio_inline.h b/src/vnet/devices/virtio/virtio_inline.h
index 209817d48c7..41bba755934 100644
--- a/src/vnet/devices/virtio/virtio_inline.h
+++ b/src/vnet/devices/virtio/virtio_inline.h
@@ -17,6 +17,7 @@
#define foreach_virtio_input_error \
_ (BUFFER_ALLOC, "buffer alloc error") \
+ _ (FULL_RX_QUEUE, "full rx queue (driver tx drop)") \
_ (UNKNOWN, "unknown")
typedef enum
@@ -29,11 +30,11 @@ typedef enum
static_always_inline void
virtio_refill_vring_split (vlib_main_t *vm, virtio_if_t *vif,
- virtio_if_type_t type, virtio_vring_t *vring,
+ virtio_if_type_t type, vnet_virtio_vring_t *vring,
const int hdr_sz, u32 node_index)
{
u16 used, next, avail, n_slots, n_refill;
- u16 sz = vring->size;
+ u16 sz = vring->queue_size;
u16 mask = sz - 1;
more:
@@ -47,8 +48,9 @@ more:
next = vring->desc_next;
avail = vring->avail->idx;
- n_slots = vlib_buffer_alloc_to_ring_from_pool (
- vm, vring->buffers, next, vring->size, n_refill, vring->buffer_pool_index);
+ n_slots = vlib_buffer_alloc_to_ring_from_pool (vm, vring->buffers, next,
+ vring->queue_size, n_refill,
+ vring->buffer_pool_index);
if (PREDICT_FALSE (n_slots != n_refill))
{
@@ -60,7 +62,7 @@ more:
while (n_slots)
{
- vring_desc_t *d = &vring->desc[next];
+ vnet_virtio_vring_desc_t *d = &vring->desc[next];
;
vlib_buffer_t *b = vlib_get_buffer (vm, vring->buffers[next]);
/*
@@ -94,11 +96,11 @@ more:
static_always_inline void
virtio_refill_vring_packed (vlib_main_t *vm, virtio_if_t *vif,
- virtio_if_type_t type, virtio_vring_t *vring,
+ virtio_if_type_t type, vnet_virtio_vring_t *vring,
const int hdr_sz, u32 node_index)
{
u16 used, next, n_slots, n_refill, flags = 0, first_desc_flags;
- u16 sz = vring->size;
+ u16 sz = vring->queue_size;
more:
used = vring->desc_in_use;
@@ -124,7 +126,7 @@ more:
while (n_slots)
{
- vring_packed_desc_t *d = &vring->packed_desc[next];
+ vnet_virtio_vring_packed_desc_t *d = &vring->packed_desc[next];
vlib_buffer_t *b = vlib_get_buffer (vm, vring->buffers[next]);
/*
* current_data may not be initialized with 0 and may contain
diff --git a/src/vnet/devices/virtio/virtio_pci_legacy.c b/src/vnet/devices/virtio/virtio_pci_legacy.c
index 1426a7035a2..d7a1c982413 100644
--- a/src/vnet/devices/virtio/virtio_pci_legacy.c
+++ b/src/vnet/devices/virtio/virtio_pci_legacy.c
@@ -176,10 +176,11 @@ virtio_pci_legacy_set_queue_num (vlib_main_t * vm, virtio_if_t * vif,
}
static u8
-virtio_pci_legacy_setup_queue (vlib_main_t * vm, virtio_if_t * vif,
- u16 queue_id, void *p)
+virtio_pci_legacy_setup_queue (vlib_main_t *vm, virtio_if_t *vif, u16 queue_id,
+ vnet_virtio_vring_t *vring)
{
- u64 addr = vlib_physmem_get_pa (vm, p) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
+ u64 addr =
+ vlib_physmem_get_pa (vm, vring->desc) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
u32 addr2 = 0, a = (u32) addr;
vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL,
&queue_id);
diff --git a/src/vnet/devices/virtio/virtio_pci_modern.c b/src/vnet/devices/virtio/virtio_pci_modern.c
index 8e090ffed3a..50a7b392367 100644
--- a/src/vnet/devices/virtio/virtio_pci_modern.c
+++ b/src/vnet/devices/virtio/virtio_pci_modern.c
@@ -164,9 +164,7 @@ virtio_pci_modern_set_queue_size (vlib_main_t * vm, virtio_if_t * vif,
return;
}
- if (virtio_pci_modern_get_queue_size (vm, vif, queue_id) > queue_size)
- virtio_pci_reg_write_u16 (vif, VIRTIO_QUEUE_SIZE_OFFSET (vif),
- queue_size);
+ virtio_pci_reg_write_u16 (vif, VIRTIO_QUEUE_SIZE_OFFSET (vif), queue_size);
}
static u16
@@ -265,32 +263,24 @@ virtio_pci_modern_set_queue_device (virtio_if_t * vif, u64 queue_device)
}
static u8
-virtio_pci_modern_setup_queue (vlib_main_t * vm, virtio_if_t * vif,
- u16 queue_id, void *p)
+virtio_pci_modern_setup_queue (vlib_main_t *vm, virtio_if_t *vif, u16 queue_id,
+ vnet_virtio_vring_t *vring)
{
u64 desc, avail, used;
- u16 queue_size = 0;
virtio_pci_modern_set_queue_select (vif, queue_id);
- queue_size = virtio_pci_modern_get_queue_size (vm, vif, queue_id);
if (vif->is_packed)
{
- virtio_vring_t *vring = (virtio_vring_t *) p;
-
desc = vlib_physmem_get_pa (vm, vring->packed_desc);
avail = vlib_physmem_get_pa (vm, vring->driver_event);
used = vlib_physmem_get_pa (vm, vring->device_event);
}
else
{
- vring_t vr;
-
- vring_init (&vr, queue_size, p, VIRTIO_PCI_VRING_ALIGN);
-
- desc = vlib_physmem_get_pa (vm, vr.desc);
- avail = vlib_physmem_get_pa (vm, vr.avail);
- used = vlib_physmem_get_pa (vm, vr.used);
+ desc = vlib_physmem_get_pa (vm, vring->desc);
+ avail = vlib_physmem_get_pa (vm, vring->avail);
+ used = vlib_physmem_get_pa (vm, vring->used);
}
virtio_pci_modern_set_queue_desc (vif, desc);
diff --git a/src/vnet/devices/virtio/virtio_pre_input.c b/src/vnet/devices/virtio/virtio_pre_input.c
new file mode 100644
index 00000000000..80cc8d6edb0
--- /dev/null
+++ b/src/vnet/devices/virtio/virtio_pre_input.c
@@ -0,0 +1,160 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/gso/gro_func.h>
+#include <vnet/interface/tx_queue_funcs.h>
+#include <vnet/devices/virtio/virtio.h>
+#include <vnet/devices/virtio/virtio_inline.h>
+
+static_always_inline uword
+virtio_pre_input_inline (vlib_main_t *vm, vnet_virtio_vring_t *txq_vring,
+ vnet_hw_if_tx_queue_t *txq, u8 packet_coalesce,
+ u8 packet_buffering)
+{
+ if (txq->shared_queue)
+ {
+ if (clib_spinlock_trylock (&txq_vring->lockp))
+ {
+ if (virtio_txq_is_scheduled (txq_vring))
+ goto unlock;
+ if (packet_coalesce)
+ vnet_gro_flow_table_schedule_node_on_dispatcher (
+ vm, txq, txq_vring->flow_table);
+ else if (packet_buffering)
+ virtio_vring_buffering_schedule_node_on_dispatcher (
+ vm, txq, txq_vring->buffering);
+ virtio_txq_set_scheduled (txq_vring);
+ unlock:
+ clib_spinlock_unlock (&txq_vring->lockp);
+ }
+ }
+ else
+ {
+ if (packet_coalesce)
+ vnet_gro_flow_table_schedule_node_on_dispatcher (
+ vm, txq, txq_vring->flow_table);
+ else if (packet_buffering)
+ virtio_vring_buffering_schedule_node_on_dispatcher (
+ vm, txq, txq_vring->buffering);
+ }
+ return 0;
+}
+
+static uword
+virtio_pre_input (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ virtio_main_t *vim = &virtio_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ virtio_if_t *vif;
+
+ pool_foreach (vif, vim->interfaces)
+ {
+ if (vif->packet_coalesce || vif->packet_buffering)
+ {
+ vnet_virtio_vring_t *txq_vring;
+ vec_foreach (txq_vring, vif->txq_vrings)
+ {
+ vnet_hw_if_tx_queue_t *txq =
+ vnet_hw_if_get_tx_queue (vnm, txq_vring->queue_index);
+ if (clib_bitmap_get (txq->threads, vm->thread_index) == 1)
+ virtio_pre_input_inline (vm, txq_vring, txq,
+ vif->packet_coalesce,
+ vif->packet_buffering);
+ }
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * virtio interfaces support packet coalescing and buffering which
+ * depends on timer expiry to flush the stored packets periodically.
+ * Previously, virtio input node checked timer expiry and scheduled
+ * tx queue accordingly.
+ *
+ * In poll mode, timer expiry was handled naturally, as input node
+ * runs periodically. In interrupt mode, virtio input node was dependent
+ * on the interrupts send from backend. Stored packets could starve,
+ * if there would not be interrupts to input node.
+ *
+ * This problem had been solved through a dedicated process node which
+ * periodically sends interrupt to virtio input node given coalescing
+ * or buffering feature were enabled on an interface.
+ *
+ * But that approach worked with following limitations:
+ * 1) Each VPP thread should have (atleast) 1 rx queue of an interface
+ * (with buffering enabled). And rxqs and txqs should be placed on the
+ * same thread.
+ *
+ * New design provides solution to above problem(s) without any limitation
+ * through (dedicated) pre-input node running on each VPP thread when
+ * atleast 1 virtio interface is enabled with coalescing or buffering.
+ */
+VLIB_REGISTER_NODE (virtio_pre_input_node) = {
+ .function = virtio_pre_input,
+ .type = VLIB_NODE_TYPE_PRE_INPUT,
+ .name = "virtio-pre-input",
+ .state = VLIB_NODE_STATE_DISABLED,
+};
+
+void
+virtio_pre_input_node_enable (vlib_main_t *vm, virtio_if_t *vif)
+{
+ virtio_main_t *vim = &virtio_main;
+ if (vif->packet_coalesce || vif->packet_buffering)
+ {
+ vim->gro_or_buffering_if_count++;
+ if (vim->gro_or_buffering_if_count == 1)
+ {
+ foreach_vlib_main ()
+ {
+ vlib_node_set_state (this_vlib_main, virtio_pre_input_node.index,
+ VLIB_NODE_STATE_POLLING);
+ }
+ }
+ }
+}
+
+void
+virtio_pre_input_node_disable (vlib_main_t *vm, virtio_if_t *vif)
+{
+ virtio_main_t *vim = &virtio_main;
+ if (vif->packet_coalesce || vif->packet_buffering)
+ {
+ if (vim->gro_or_buffering_if_count > 0)
+ vim->gro_or_buffering_if_count--;
+ if (vim->gro_or_buffering_if_count == 0)
+ {
+ foreach_vlib_main ()
+ {
+ vlib_node_set_state (this_vlib_main, virtio_pre_input_node.index,
+ VLIB_NODE_STATE_DISABLED);
+ }
+ }
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/virtio/virtio_process.c b/src/vnet/devices/virtio/virtio_process.c
index f347ef2ab57..13ba590659c 100644
--- a/src/vnet/devices/virtio/virtio_process.c
+++ b/src/vnet/devices/virtio/virtio_process.c
@@ -50,7 +50,7 @@ virtio_send_interrupt_process (vlib_main_t * vm,
{
if (vif->packet_coalesce || vif->packet_buffering)
{
- virtio_vring_t *vring;
+ vnet_virtio_vring_t *vring;
vec_foreach (vring, vif->rxq_vrings)
{
if (vring->mode == VNET_HW_IF_RX_MODE_INTERRUPT ||
@@ -70,13 +70,11 @@ virtio_send_interrupt_process (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (virtio_send_interrupt_node) = {
.function = virtio_send_interrupt_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "virtio-send-interrupt-process",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/devices/virtio/virtio_std.h b/src/vnet/devices/virtio/virtio_std.h
index 619dd66d5ed..ec988c08dbb 100644
--- a/src/vnet/devices/virtio/virtio_std.h
+++ b/src/vnet/devices/virtio/virtio_std.h
@@ -64,7 +64,7 @@ typedef enum
#define _(f,n) f = n,
foreach_virtio_net_features
#undef _
-} virtio_net_feature_t;
+} vnet_virtio_net_feature_t;
#define VIRTIO_FEATURE(X) (1ULL << X)
@@ -87,7 +87,7 @@ typedef enum
#define _(f,n) f = n,
foreach_virtio_event_idx_flags
#undef _
-} virtio_event_idx_flags_t;
+} vnet_virtio_event_idx_flags_t;
#define VRING_USED_F_NO_NOTIFY 1
#define VRING_AVAIL_F_NO_INTERRUPT 1
@@ -98,7 +98,7 @@ typedef struct
u32 len;
u16 flags;
u16 next;
-} vring_desc_t;
+} vnet_virtio_vring_desc_t;
typedef struct
{
@@ -106,38 +106,35 @@ typedef struct
u16 idx;
u16 ring[0];
/* u16 used_event; */
-} vring_avail_t;
+} vnet_virtio_vring_avail_t;
typedef struct
{
u32 id;
u32 len;
-} vring_used_elem_t;
+} vnet_virtio_vring_used_elem_t;
typedef struct
{
u16 flags;
u16 idx;
- vring_used_elem_t ring[0];
+ vnet_virtio_vring_used_elem_t ring[0];
/* u16 avail_event; */
-} vring_used_t;
+} vnet_virtio_vring_used_t;
-/* *INDENT-OFF* */
-typedef CLIB_PACKED (struct
-{
- u64 addr; // packet data buffer address
- u32 len; // packet data buffer size
- u16 id; // buffer id
- u16 flags; // flags
-}) vring_packed_desc_t;
+typedef CLIB_PACKED (struct {
+ u64 addr; // packet data buffer address
+ u32 len; // packet data buffer size
+ u16 id; // buffer id
+ u16 flags; // flags
+}) vnet_virtio_vring_packed_desc_t;
-STATIC_ASSERT_SIZEOF (vring_packed_desc_t, 16);
+STATIC_ASSERT_SIZEOF (vnet_virtio_vring_packed_desc_t, 16);
-typedef CLIB_PACKED (struct
-{
+typedef CLIB_PACKED (struct {
u16 off_wrap;
u16 flags;
-}) vring_desc_event_t;
+}) vnet_virtio_vring_desc_event_t;
#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */
#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
@@ -148,8 +145,7 @@ typedef CLIB_PACKED (struct
#define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */
#define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */
-typedef CLIB_PACKED (struct
-{
+typedef CLIB_PACKED (struct {
u8 flags;
u8 gso_type;
u16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */
@@ -157,54 +153,22 @@ typedef CLIB_PACKED (struct
u16 csum_start; /* Position to start checksumming from */
u16 csum_offset; /* Offset after that to place checksum */
u16 num_buffers; /* Number of merged rx buffers */
-}) virtio_net_hdr_v1_t;
+}) vnet_virtio_net_hdr_v1_t;
-typedef CLIB_PACKED (struct
-{
+typedef CLIB_PACKED (struct {
u8 flags;
u8 gso_type;
u16 hdr_len;
u16 gso_size;
u16 csum_start;
u16 csum_offset;
-}) virtio_net_hdr_t;
+}) vnet_virtio_net_hdr_t;
-typedef CLIB_PACKED (struct
-{
- virtio_net_hdr_t hdr;
+typedef CLIB_PACKED (struct {
+ vnet_virtio_net_hdr_t hdr;
u16 num_buffers;
-}) virtio_net_hdr_mrg_rxbuf_t;
+}) vnet_virtio_net_hdr_mrg_rxbuf_t;
-/* *INDENT-ON* */
-
-typedef struct
-{
- u16 num;
- vring_desc_t *desc;
- vring_avail_t *avail;
- vring_used_t *used;
-} vring_t;
-
-static_always_inline void
-vring_init (vring_t * vr, u32 num, void *p, u32 align)
-{
- vr->num = num;
- vr->desc = p;
- vr->avail = (vring_avail_t *) ((char *) p + num * sizeof (vring_desc_t));
- vr->used =
- (vring_used_t *) ((char *) p +
- ((sizeof (vring_desc_t) * num +
- sizeof (u16) * (3 + num) + align - 1) & ~(align -
- 1)));
-}
-
-static_always_inline u16
-vring_size (u32 num, u32 align)
-{
- return ((sizeof (vring_desc_t) * num + sizeof (u16) * (3 + num)
- + align - 1) & ~(align - 1))
- + sizeof (u16) * 3 + sizeof (vring_used_elem_t) * num;
-}
#endif
/*
diff --git a/src/vnet/dpo/dpo.c b/src/vnet/dpo/dpo.c
index d8342ff17ae..fc789ae0a7f 100644
--- a/src/vnet/dpo/dpo.c
+++ b/src/vnet/dpo/dpo.c
@@ -613,12 +613,10 @@ dpo_module_init (vlib_main_t * vm)
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION(dpo_module_init) =
{
.runs_before = VLIB_INITS ("ip_main_init"),
};
-/* *INDENT-ON* */
static clib_error_t *
dpo_memory_show (vlib_main_t * vm,
@@ -640,7 +638,6 @@ dpo_memory_show (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
/*?
* The '<em>sh dpo memory </em>' command displays the memory usage for each
* data-plane object type.
@@ -662,6 +659,5 @@ VLIB_CLI_COMMAND (show_fib_memory, static) = {
.function = dpo_memory_show,
.short_help = "show dpo memory",
};
-/* *INDENT-ON* */
// clang-format on
diff --git a/src/vnet/dpo/dpo.h b/src/vnet/dpo/dpo.h
index e9976c2dd87..470359df95c 100644
--- a/src/vnet/dpo/dpo.h
+++ b/src/vnet/dpo/dpo.h
@@ -543,7 +543,7 @@ dpo_get_next_node_by_type_and_proto (dpo_type_t child_type,
#define dpo_pool_barrier_sync(VM,P,YESNO) \
do { \
- pool_get_aligned_will_expand ((P), YESNO, CLIB_CACHE_LINE_BYTES); \
+ YESNO = pool_get_will_expand (P); \
\
if (YESNO) \
{ \
diff --git a/src/vnet/dpo/dvr_dpo.c b/src/vnet/dpo/dvr_dpo.c
index 5db9c803145..2b66467837c 100644
--- a/src/vnet/dpo/dvr_dpo.c
+++ b/src/vnet/dpo/dvr_dpo.c
@@ -206,12 +206,9 @@ format_dvr_dpo (u8* s, va_list *ap)
vnet_main_t * vnm = vnet_get_main();
dvr_dpo_t *dd = dvr_dpo_get(index);
- return (format(s, "%U-dvr-%U-dpo %U",
- format_dpo_proto, dd->dd_proto,
- format_vnet_sw_interface_name,
- vnm,
- vnet_get_sw_interface(vnm, dd->dd_sw_if_index),
- format_dvr_reinject, dd->dd_reinject));
+ return format (s, "%U-dvr-%U-dpo %U", format_dpo_proto, dd->dd_proto,
+ format_vnet_sw_if_index_name, vnm, dd->dd_sw_if_index,
+ format_dvr_reinject, dd->dd_reinject);
}
static void
diff --git a/src/vnet/dpo/interface_rx_dpo.c b/src/vnet/dpo/interface_rx_dpo.c
index d3615d0ce76..5a519d344c1 100644
--- a/src/vnet/dpo/interface_rx_dpo.c
+++ b/src/vnet/dpo/interface_rx_dpo.c
@@ -160,11 +160,8 @@ format_interface_rx_dpo (u8* s, va_list *ap)
vnet_main_t * vnm = vnet_get_main();
interface_rx_dpo_t *ido = interface_rx_dpo_get(index);
- return (format(s, "%U-rx-dpo: %U",
- format_vnet_sw_interface_name,
- vnm,
- vnet_get_sw_interface(vnm, ido->ido_sw_if_index),
- format_dpo_proto, ido->ido_proto));
+ return format (s, "%U-rx-dpo: %U", format_vnet_sw_if_index_name, vnm,
+ ido->ido_sw_if_index, format_dpo_proto, ido->ido_proto);
}
static void
diff --git a/src/vnet/dpo/interface_tx_dpo.c b/src/vnet/dpo/interface_tx_dpo.c
index 870579884a0..73f4e906268 100644
--- a/src/vnet/dpo/interface_tx_dpo.c
+++ b/src/vnet/dpo/interface_tx_dpo.c
@@ -50,10 +50,7 @@ format_interface_tx_dpo (u8* s, va_list *ap)
CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
vnet_main_t * vnm = vnet_get_main();
- return (format(s, "%U-tx-dpo:",
- format_vnet_sw_interface_name,
- vnm,
- vnet_get_sw_interface(vnm, index)));
+ return format (s, "%U-tx-dpo:", format_vnet_sw_if_index_name, vnm, index);
}
static void
diff --git a/src/vnet/dpo/ip6_ll_dpo.c b/src/vnet/dpo/ip6_ll_dpo.c
index f86472c16c5..86908efbc04 100644
--- a/src/vnet/dpo/ip6_ll_dpo.c
+++ b/src/vnet/dpo/ip6_ll_dpo.c
@@ -97,6 +97,11 @@ typedef enum ip6_ll_next_t_
IP6_LL_NEXT_NUM,
} ip6_ll_next_t;
+typedef enum ip6_ll_error_t_
+{
+ IP6_LL_ERROR_NO_TABLE,
+} ip6_ll_error_t;
+
always_inline uword
ip6_ll_dpo_inline (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * frame)
@@ -131,10 +136,19 @@ ip6_ll_dpo_inline (vlib_main_t * vm,
/* use the packet's RX interface to pick the link-local FIB */
fib_index0 =
ip6_ll_fib_get (vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+
+ if (~0 == fib_index0)
+ {
+ next0 = IP6_LL_NEXT_DROP;
+ p0->error = node->errors[IP6_LL_ERROR_NO_TABLE];
+ goto trace0;
+ }
+
/* write that fib index into the packet so it's used in the
* lookup node next */
vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0;
+ trace0:
if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
{
ip6_ll_dpo_trace_t *tr = vlib_add_trace (vm, node, p0,
@@ -170,23 +184,27 @@ ip6_ll_dpo_switch (vlib_main_t * vm,
return (ip6_ll_dpo_inline (vm, node, frame));
}
+static char *ip6_ll_dpo_error_strings[] = {
+ [IP6_LL_ERROR_NO_TABLE] = "Interface is not mapped to an IP6-LL table",
+};
+
/**
* @brief
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_ll_dpo_node) =
{
.function = ip6_ll_dpo_switch,
.name = "ip6-link-local",
.vector_size = sizeof (u32),
.format_trace = format_ip6_ll_dpo_trace,
+ .n_errors = ARRAY_LEN (ip6_ll_dpo_error_strings),
+ .error_strings = ip6_ll_dpo_error_strings,
.n_next_nodes = IP6_LL_NEXT_NUM,
.next_nodes = {
[IP6_LL_NEXT_DROP] = "ip6-drop",
[IP6_LL_NEXT_LOOKUP] = "ip6-lookup",
},
};
-/* *INDENT-ON* */
void
ip6_ll_dpo_module_init (void)
diff --git a/src/vnet/dpo/l3_proxy_dpo.c b/src/vnet/dpo/l3_proxy_dpo.c
index 41156301a0e..f89554d775f 100644
--- a/src/vnet/dpo/l3_proxy_dpo.c
+++ b/src/vnet/dpo/l3_proxy_dpo.c
@@ -116,9 +116,8 @@ format_l3_proxy_dpo (u8 *s, va_list *ap)
if (~0 != l3p->l3p_sw_if_index)
{
- return (format(s, "dpo-l3_proxy: %U",
- format_vnet_sw_interface_name, vnm,
- vnet_get_sw_interface(vnm, l3p->l3p_sw_if_index)));
+ return (format (s, "dpo-l3_proxy: %U", format_vnet_sw_if_index_name, vnm,
+ l3p->l3p_sw_if_index));
}
else
{
diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c
index a212532dffd..8f2a0de6ea8 100644
--- a/src/vnet/dpo/load_balance.c
+++ b/src/vnet/dpo/load_balance.c
@@ -100,8 +100,8 @@ load_balance_alloc_i (void)
vlib_main_t *vm = vlib_get_main();
ASSERT (vm->thread_index == 0);
- pool_get_aligned_will_expand (load_balance_pool, need_barrier_sync,
- CLIB_CACHE_LINE_BYTES);
+ need_barrier_sync = pool_get_will_expand (load_balance_pool);
+
if (need_barrier_sync)
vlib_worker_thread_barrier_sync (vm);
@@ -149,7 +149,13 @@ load_balance_format (index_t lbi,
dpo_id_t *buckets;
u32 i;
- lb = load_balance_get(lbi);
+ lb = load_balance_get_or_null(lbi);
+ if (lb == NULL)
+ {
+ s = format(s, "DELETED lb:%u", lbi);
+ return (s);
+ }
+
vlib_get_combined_counter(&(load_balance_main.lbm_to_counters), lbi, &to);
vlib_get_combined_counter(&(load_balance_main.lbm_via_counters), lbi, &via);
buckets = load_balance_get_buckets(lb);
@@ -244,6 +250,8 @@ load_balance_create_i (u32 num_buckets,
{
load_balance_t *lb;
+ ASSERT (num_buckets <= LB_MAX_BUCKETS);
+
lb = load_balance_alloc_i();
lb->lb_hash_config = fhc;
lb->lb_n_buckets = num_buckets;
@@ -408,7 +416,7 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops,
{
nhs[0] = raw_next_hops[0];
nhs[0].path_weight = 1;
- _vec_len (nhs) = 1;
+ vec_set_len (nhs, 1);
sum_weight = 1;
goto done;
}
@@ -425,7 +433,7 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops,
if (nhs[0].path_weight == nhs[1].path_weight)
{
nhs[0].path_weight = nhs[1].path_weight = 1;
- _vec_len (nhs) = 2;
+ vec_set_len (nhs, 2);
sum_weight = 2;
goto done;
}
@@ -455,8 +463,9 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops,
/* Try larger and larger power of 2 sized adjacency blocks until we
find one where traffic flows to within 1% of specified weights. */
- for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2)
+ for (n_adj = clib_min(max_pow2 (n_nhs), LB_MAX_BUCKETS); ; n_adj *= 2)
{
+ ASSERT (n_adj <= LB_MAX_BUCKETS);
error = 0;
norm = n_adj / ((f64) sum_weight);
@@ -487,12 +496,22 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops,
nhs[0].path_weight += n_adj_left;
- /* Less than 5% average error per adjacency with this size adjacency block? */
- if (error <= multipath_next_hop_error_tolerance*n_adj)
+ /* Less than 1% average error per adjacency with this size adjacency block,
+ * or did we reached the maximum number of buckets we support? */
+ if (error <= multipath_next_hop_error_tolerance*n_adj ||
+ n_adj >= LB_MAX_BUCKETS)
{
- /* Truncate any next hops with zero weight. */
- _vec_len (nhs) = i;
- break;
+ if (i < n_nhs)
+ {
+ /* Truncate any next hops in excess */
+ vlib_log_err(load_balance_logger,
+ "Too many paths for load-balance, truncating %d -> %d",
+ n_nhs, i);
+ for (int j = i; j < n_nhs; j++)
+ dpo_reset (&vec_elt(nhs, j).path_dpo);
+ }
+ vec_set_len (nhs, i);
+ break;
}
}
@@ -592,6 +611,7 @@ load_balance_fill_buckets_sticky (load_balance_t *lb,
{
/* fill the bucks from the next up path */
load_balance_set_bucket_i(lb, bucket++, buckets, &fwding_paths[fpath].path_dpo);
+ ASSERT(vec_len(fwding_paths) > 0);
fpath = (fpath + 1) % vec_len(fwding_paths);
}
}
@@ -621,6 +641,7 @@ static inline void
load_balance_set_n_buckets (load_balance_t *lb,
u32 n_buckets)
{
+ ASSERT (n_buckets <= LB_MAX_BUCKETS);
lb->lb_n_buckets = n_buckets;
lb->lb_n_buckets_minus_1 = n_buckets-1;
}
@@ -650,8 +671,6 @@ load_balance_multipath_update (const dpo_id_t *dpo,
&sum_of_weights,
multipath_next_hop_error_tolerance);
- ASSERT (n_buckets >= vec_len (raw_nhs));
-
/*
* Save the old load-balance map used, and get a new one if required.
*/
diff --git a/src/vnet/dpo/load_balance.h b/src/vnet/dpo/load_balance.h
index 5428e20e981..eee073f5892 100644
--- a/src/vnet/dpo/load_balance.h
+++ b/src/vnet/dpo/load_balance.h
@@ -50,6 +50,12 @@ typedef struct load_balance_main_t_
extern load_balance_main_t load_balance_main;
/**
+ * The maximum number of buckets that a load-balance object can have
+ * This must not overflow the lb_n_buckets field
+ */
+#define LB_MAX_BUCKETS 8192
+
+/**
* The number of buckets that a load-balance object can have and still
* fit in one cache-line
*/
@@ -176,6 +182,10 @@ typedef struct load_balance_t_ {
STATIC_ASSERT(sizeof(load_balance_t) <= CLIB_CACHE_LINE_BYTES,
"A load_balance object size exceeds one cacheline");
+STATIC_ASSERT (LB_MAX_BUCKETS <= CLIB_U16_MAX,
+ "Too many buckets for load_balance object");
+STATIC_ASSERT (LB_MAX_BUCKETS && !(LB_MAX_BUCKETS & (LB_MAX_BUCKETS - 1)),
+ "LB_MAX_BUCKETS must be a power of 2");
/**
* Flags controlling load-balance formatting/display
@@ -222,6 +232,14 @@ load_balance_get (index_t lbi)
return (pool_elt_at_index(load_balance_pool, lbi));
}
+static inline load_balance_t *
+load_balance_get_or_null (index_t lbi)
+{
+ if (pool_is_free_index (load_balance_pool, lbi))
+ return 0;
+ return (pool_elt_at_index (load_balance_pool, lbi));
+}
+
#define LB_HAS_INLINE_BUCKETS(_lb) \
((_lb)->lb_n_buckets <= LB_NUM_INLINE_BUCKETS)
diff --git a/src/vnet/dpo/load_balance_map.c b/src/vnet/dpo/load_balance_map.c
index 55249747e5d..765cd856608 100644
--- a/src/vnet/dpo/load_balance_map.c
+++ b/src/vnet/dpo/load_balance_map.c
@@ -317,7 +317,7 @@ load_balance_map_fill (load_balance_map_t *lbm)
bucket += lbmp->lbmp_weight;
}
}
- _vec_len(tmp_buckets) = jj;
+ vec_set_len (tmp_buckets, jj);
/*
* If the number of temporaries written is as many as we need, implying
diff --git a/src/vnet/dpo/mpls_disposition.c b/src/vnet/dpo/mpls_disposition.c
index 7bc2cb65f87..2f996727a2d 100644
--- a/src/vnet/dpo/mpls_disposition.c
+++ b/src/vnet/dpo/mpls_disposition.c
@@ -431,14 +431,14 @@ VLIB_NODE_FN (ip4_mpls_label_disposition_pipe_node) (vlib_main_t * vm,
FIB_MPLS_LSP_MODE_PIPE));
}
-VLIB_REGISTER_NODE(ip4_mpls_label_disposition_pipe_node) = {
- .name = "ip4-mpls-label-disposition-pipe",
- .vector_size = sizeof(u32),
-
- .format_trace = format_mpls_label_disposition_trace,
- .sibling_of = "ip4-input",
- .n_errors = IP4_N_ERROR,
- .error_strings = ip4_error_strings,
+VLIB_REGISTER_NODE (ip4_mpls_label_disposition_pipe_node) = {
+ .name = "ip4-mpls-label-disposition-pipe",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_mpls_label_disposition_trace,
+ .sibling_of = "ip4-input",
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
};
VLIB_NODE_FN (ip6_mpls_label_disposition_pipe_node) (vlib_main_t * vm,
@@ -449,14 +449,14 @@ VLIB_NODE_FN (ip6_mpls_label_disposition_pipe_node) (vlib_main_t * vm,
FIB_MPLS_LSP_MODE_PIPE));
}
-VLIB_REGISTER_NODE(ip6_mpls_label_disposition_pipe_node) = {
- .name = "ip6-mpls-label-disposition-pipe",
- .vector_size = sizeof(u32),
+VLIB_REGISTER_NODE (ip6_mpls_label_disposition_pipe_node) = {
+ .name = "ip6-mpls-label-disposition-pipe",
+ .vector_size = sizeof (u32),
- .format_trace = format_mpls_label_disposition_trace,
- .sibling_of = "ip6-input",
- .n_errors = IP6_N_ERROR,
- .error_strings = ip6_error_strings,
+ .format_trace = format_mpls_label_disposition_trace,
+ .sibling_of = "ip6-input",
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
};
VLIB_NODE_FN (ip4_mpls_label_disposition_uniform_node) (vlib_main_t * vm,
@@ -467,14 +467,14 @@ VLIB_NODE_FN (ip4_mpls_label_disposition_uniform_node) (vlib_main_t * vm,
FIB_MPLS_LSP_MODE_UNIFORM));
}
-VLIB_REGISTER_NODE(ip4_mpls_label_disposition_uniform_node) = {
- .name = "ip4-mpls-label-disposition-uniform",
- .vector_size = sizeof(u32),
+VLIB_REGISTER_NODE (ip4_mpls_label_disposition_uniform_node) = {
+ .name = "ip4-mpls-label-disposition-uniform",
+ .vector_size = sizeof (u32),
- .format_trace = format_mpls_label_disposition_trace,
- .sibling_of = "ip4-input",
- .n_errors = IP4_N_ERROR,
- .error_strings = ip4_error_strings,
+ .format_trace = format_mpls_label_disposition_trace,
+ .sibling_of = "ip4-input",
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
};
VLIB_NODE_FN (ip6_mpls_label_disposition_uniform_node) (vlib_main_t * vm,
@@ -485,14 +485,14 @@ VLIB_NODE_FN (ip6_mpls_label_disposition_uniform_node) (vlib_main_t * vm,
FIB_MPLS_LSP_MODE_UNIFORM));
}
-VLIB_REGISTER_NODE(ip6_mpls_label_disposition_uniform_node) = {
- .name = "ip6-mpls-label-disposition-uniform",
- .vector_size = sizeof(u32),
+VLIB_REGISTER_NODE (ip6_mpls_label_disposition_uniform_node) = {
+ .name = "ip6-mpls-label-disposition-uniform",
+ .vector_size = sizeof (u32),
- .format_trace = format_mpls_label_disposition_trace,
- .sibling_of = "ip6-input",
- .n_errors = IP6_N_ERROR,
- .error_strings = ip6_error_strings,
+ .format_trace = format_mpls_label_disposition_trace,
+ .sibling_of = "ip6-input",
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
};
#ifndef CLIB_MARCH_VARIANT
diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c
index 7856f050cb0..872577dfbe1 100644
--- a/src/vnet/dpo/mpls_label_dpo.c
+++ b/src/vnet/dpo/mpls_label_dpo.c
@@ -84,6 +84,7 @@ mpls_label_dpo_create (fib_mpls_label_t *label_stack,
mld = mpls_label_dpo_alloc();
mld->mld_flags = flags;
+ mld->mld_payload_proto = payload_proto;
dtype = mpls_label_dpo_types[flags];
if (MPLS_LABEL_DPO_MAX_N_LABELS < vec_len(label_stack))
@@ -92,13 +93,12 @@ mpls_label_dpo_create (fib_mpls_label_t *label_stack,
dpo_stack(dtype,
mld->mld_payload_proto,
&mld->mld_dpo,
- drop_dpo_get(DPO_PROTO_MPLS));
+ drop_dpo_get(mld->mld_payload_proto));
}
else
{
mld->mld_n_labels = vec_len(label_stack);
mld->mld_n_hdr_bytes = mld->mld_n_labels * sizeof(mld->mld_hdr[0]);
- mld->mld_payload_proto = payload_proto;
/*
* construct label rewrite headers for each value passed.
@@ -398,22 +398,22 @@ mpls_label_imposition_inline (vlib_main_t * vm,
/* Prefetch next iteration. */
{
- vlib_buffer_t * p2, * p3, *p4, *p5;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
- p4 = vlib_get_buffer (vm, from[4]);
- p5 = vlib_get_buffer (vm, from[5]);
-
- vlib_prefetch_buffer_header (p2, STORE);
- vlib_prefetch_buffer_header (p3, STORE);
- vlib_prefetch_buffer_header (p4, STORE);
- vlib_prefetch_buffer_header (p5, STORE);
-
- CLIB_PREFETCH (p2->data, sizeof (hdr0[0]), STORE);
- CLIB_PREFETCH (p3->data, sizeof (hdr0[0]), STORE);
- CLIB_PREFETCH (p4->data, sizeof (hdr0[0]), STORE);
- CLIB_PREFETCH (p5->data, sizeof (hdr0[0]), STORE);
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ vlib_prefetch_buffer_header (p4, STORE);
+ vlib_prefetch_buffer_header (p5, STORE);
+ vlib_prefetch_buffer_header (p6, STORE);
+ vlib_prefetch_buffer_header (p7, STORE);
+
+ CLIB_PREFETCH (p4->data, sizeof (hdr0[0]), STORE);
+ CLIB_PREFETCH (p5->data, sizeof (hdr0[0]), STORE);
+ CLIB_PREFETCH (p6->data, sizeof (hdr0[0]), STORE);
+ CLIB_PREFETCH (p7->data, sizeof (hdr0[0]), STORE);
}
from += 4;
diff --git a/src/vnet/dpo/receive_dpo.c b/src/vnet/dpo/receive_dpo.c
index 0a97e1d373b..413c3ae5b47 100644
--- a/src/vnet/dpo/receive_dpo.c
+++ b/src/vnet/dpo/receive_dpo.c
@@ -122,10 +122,9 @@ format_receive_dpo (u8 *s, va_list *ap)
if (~0 != rd->rd_sw_if_index)
{
- return (format(s, "dpo-receive: %U on %U",
- format_ip46_address, &rd->rd_addr, IP46_TYPE_ANY,
- format_vnet_sw_interface_name, vnm,
- vnet_get_sw_interface(vnm, rd->rd_sw_if_index)));
+ return (format (s, "dpo-receive: %U on %U", format_ip46_address,
+ &rd->rd_addr, IP46_TYPE_ANY,
+ format_vnet_sw_if_index_name, vnm, rd->rd_sw_if_index));
}
else
{
diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c
index 5f88f12b910..0474fd82984 100644
--- a/src/vnet/dpo/replicate_dpo.c
+++ b/src/vnet/dpo/replicate_dpo.c
@@ -172,6 +172,8 @@ replicate_create_i (u32 num_buckets,
{
replicate_t *rep;
+ ASSERT (num_buckets <= REP_MAX_BUCKETS);
+
rep = replicate_alloc_i();
rep->rep_n_buckets = num_buckets;
rep->rep_proto = rep_proto;
@@ -311,7 +313,8 @@ static inline void
replicate_set_n_buckets (replicate_t *rep,
u32 n_buckets)
{
- rep->rep_n_buckets = n_buckets;
+ ASSERT (n_buckets <= REP_MAX_BUCKETS);
+ rep->rep_n_buckets = n_buckets;
}
void
@@ -331,6 +334,17 @@ replicate_multipath_update (const dpo_id_t *dpo,
rep->rep_proto);
n_buckets = vec_len(nhs);
+ if (n_buckets > REP_MAX_BUCKETS)
+ {
+ vlib_log_err (replicate_logger,
+ "Too many paths for replicate, truncating %d -> %d",
+ n_buckets, REP_MAX_BUCKETS);
+ for (int i = REP_MAX_BUCKETS; i < n_buckets; i++)
+ dpo_reset (&vec_elt (nhs, i).path_dpo);
+ vec_set_len (nhs, REP_MAX_BUCKETS);
+ n_buckets = REP_MAX_BUCKETS;
+ }
+
if (0 == rep->rep_n_buckets)
{
/*
diff --git a/src/vnet/dpo/replicate_dpo.h b/src/vnet/dpo/replicate_dpo.h
index 908c20c1d56..d21f52a4833 100644
--- a/src/vnet/dpo/replicate_dpo.h
+++ b/src/vnet/dpo/replicate_dpo.h
@@ -41,6 +41,12 @@ typedef struct replicate_main_t_
extern replicate_main_t replicate_main;
/**
+ * The number of buckets that a replicate object can have
+ * This must not overflow the rep_n_buckets field
+ */
+#define REP_MAX_BUCKETS 1024
+
+/**
* The number of buckets that a load-balance object can have and still
* fit in one cache-line
*/
@@ -108,6 +114,8 @@ typedef struct replicate_t_ {
STATIC_ASSERT(sizeof(replicate_t) <= CLIB_CACHE_LINE_BYTES,
"A replicate object size exceeds one cacheline");
+STATIC_ASSERT (REP_MAX_BUCKETS <= CLIB_U16_MAX,
+ "Too many buckets for replicate object");
/**
* Flags controlling load-balance formatting/display
diff --git a/src/vnet/error.c b/src/vnet/error.c
new file mode 100644
index 00000000000..473d11135f1
--- /dev/null
+++ b/src/vnet/error.c
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/error.h>
+#include <vnet/api_errno.h>
+
+static char *error_strings[] = {
+#define _(a, b, c) [-(b)] = c,
+ foreach_vnet_error
+#undef _
+};
+
+clib_error_t *
+vnet_error (vnet_error_t rv, char *fmt, ...)
+{
+ clib_error_t *e, *err = 0;
+ va_list va;
+ vec_add2 (err, e, 1);
+ e->what = format (e->what, "%s", error_strings[-rv]);
+
+ if (fmt)
+ {
+ vec_add1 (e->what, ' ');
+ vec_add1 (e->what, '(');
+ va_start (va, fmt);
+ e->what = va_format (e->what, fmt, &va);
+ vec_add1 (e->what, ')');
+ va_end (va);
+ }
+
+ e->code = rv;
+ return err;
+}
+
+u8 *
+format_vnet_api_errno (u8 *s, va_list *args)
+{
+ vnet_api_error_t api_error = va_arg (*args, vnet_api_error_t);
+#ifdef _
+#undef _
+#endif
+#define _(a, b, c) \
+ case b: \
+ s = format (s, "%s", c); \
+ break;
+ switch (api_error)
+ {
+ foreach_vnet_error default : s = format (s, "UNKNOWN");
+ break;
+ }
+ return s;
+#undef _
+}
diff --git a/src/vnet/error.h b/src/vnet/error.h
new file mode 100644
index 00000000000..fa1337538c4
--- /dev/null
+++ b/src/vnet/error.h
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+#ifndef included_vnet_error_h
+#define included_vnet_error_h
+
+#include <stdarg.h>
+#include <vppinfra/types.h>
+#include <vppinfra/format.h>
+
+#define foreach_vnet_error \
+ _ (UNSPECIFIED, -1, "Unspecified Error") \
+ _ (INVALID_SW_IF_INDEX, -2, "Invalid sw_if_index") \
+ _ (NO_SUCH_FIB, -3, "No such FIB / VRF") \
+ _ (NO_SUCH_INNER_FIB, -4, "No such inner FIB / VRF") \
+ _ (NO_SUCH_LABEL, -5, "No such label") \
+ _ (NO_SUCH_ENTRY, -6, "No such entry") \
+ _ (INVALID_VALUE, -7, "Invalid value") \
+ _ (INVALID_VALUE_2, -8, "Invalid value #2") \
+ _ (UNIMPLEMENTED, -9, "Unimplemented") \
+ _ (INVALID_SW_IF_INDEX_2, -10, "Invalid sw_if_index #2") \
+ _ (SYSCALL_ERROR_1, -11, "System call error #1") \
+ _ (SYSCALL_ERROR_2, -12, "System call error #2") \
+ _ (SYSCALL_ERROR_3, -13, "System call error #3") \
+ _ (SYSCALL_ERROR_4, -14, "System call error #4") \
+ _ (SYSCALL_ERROR_5, -15, "System call error #5") \
+ _ (SYSCALL_ERROR_6, -16, "System call error #6") \
+ _ (SYSCALL_ERROR_7, -17, "System call error #7") \
+ _ (SYSCALL_ERROR_8, -18, "System call error #8") \
+ _ (SYSCALL_ERROR_9, -19, "System call error #9") \
+ _ (SYSCALL_ERROR_10, -20, "System call error #10") \
+ _ (FEATURE_DISABLED, -30, "Feature disabled by configuration") \
+ _ (INVALID_REGISTRATION, -31, "Invalid registration") \
+ _ (NEXT_HOP_NOT_IN_FIB, -50, "Next hop not in FIB") \
+ _ (UNKNOWN_DESTINATION, -51, "Unknown destination") \
+ _ (NO_PATHS_IN_ROUTE, -52, "No paths specified in route") \
+ _ (NEXT_HOP_NOT_FOUND_MP, -53, "Next hop not found (multipath)") \
+ _ (NO_MATCHING_INTERFACE, -54, "No matching interface for probe") \
+ _ (INVALID_VLAN, -55, "Invalid VLAN") \
+ _ (VLAN_ALREADY_EXISTS, -56, "VLAN subif already exists") \
+ _ (INVALID_SRC_ADDRESS, -57, "Invalid src address") \
+ _ (INVALID_DST_ADDRESS, -58, "Invalid dst address") \
+ _ (ADDRESS_LENGTH_MISMATCH, -59, "Address length mismatch") \
+ _ (ADDRESS_NOT_FOUND_FOR_INTERFACE, -60, "Address not found for interface") \
+ _ (ADDRESS_NOT_DELETABLE, -61, "Address not deletable") \
+ _ (IP6_NOT_ENABLED, -62, "ip6 not enabled") \
+ _ (NO_SUCH_NODE, -63, "No such graph node") \
+ _ (NO_SUCH_NODE2, -64, "No such graph node #2") \
+ _ (NO_SUCH_TABLE, -65, "No such table") \
+ _ (NO_SUCH_TABLE2, -66, "No such table #2") \
+ _ (NO_SUCH_TABLE3, -67, "No such table #3") \
+ _ (SUBIF_ALREADY_EXISTS, -68, "Subinterface already exists") \
+ _ (SUBIF_CREATE_FAILED, -69, "Subinterface creation failed") \
+ _ (INVALID_MEMORY_SIZE, -70, "Invalid memory size requested") \
+ _ (INVALID_INTERFACE, -71, "Invalid interface") \
+ _ (INVALID_VLAN_TAG_COUNT, -72, \
+ "Invalid number of tags for requested operation") \
+ _ (INVALID_ARGUMENT, -73, "Invalid argument") \
+ _ (UNEXPECTED_INTF_STATE, -74, "Unexpected interface state") \
+ _ (TUNNEL_EXIST, -75, "Tunnel already exists") \
+ _ (INVALID_DECAP_NEXT, -76, "Invalid decap-next") \
+ _ (RESPONSE_NOT_READY, -77, "Response not ready") \
+ _ (NOT_CONNECTED, -78, "Not connected to the data plane") \
+ _ (IF_ALREADY_EXISTS, -79, "Interface already exists") \
+ _ (BOND_SLAVE_NOT_ALLOWED, -80, \
+ "Operation not allowed on slave of BondEthernet") \
+ _ (VALUE_EXIST, -81, "Value already exists") \
+ _ (SAME_SRC_DST, -82, "Source and destination are the same") \
+ _ (IP6_MULTICAST_ADDRESS_NOT_PRESENT, -83, \
+ "IP6 multicast address required") \
+ _ (SR_POLICY_NAME_NOT_PRESENT, -84, "Segment routing policy name required") \
+ _ (NOT_RUNNING_AS_ROOT, -85, "Not running as root") \
+ _ (ALREADY_CONNECTED, -86, "Connection to the data plane already exists") \
+ _ (UNSUPPORTED_JNI_VERSION, -87, "Unsupported JNI version") \
+ _ (IP_PREFIX_INVALID, -88, "IP prefix invalid (masked bits set in address") \
+ _ (INVALID_WORKER, -89, "Invalid worker thread") \
+ _ (LISP_DISABLED, -90, "LISP is disabled") \
+ _ (CLASSIFY_TABLE_NOT_FOUND, -91, "Classify table not found") \
+ _ (INVALID_EID_TYPE, -92, "Unsupported LISP EID type") \
+ _ (CANNOT_CREATE_PCAP_FILE, -93, "Cannot create pcap file") \
+ _ (INCORRECT_ADJACENCY_TYPE, -94, \
+ "Invalid adjacency type for this operation") \
+ _ (EXCEEDED_NUMBER_OF_RANGES_CAPACITY, -95, \
+ "Operation would exceed configured capacity of ranges") \
+ _ (EXCEEDED_NUMBER_OF_PORTS_CAPACITY, -96, \
+ "Operation would exceed capacity of number of ports") \
+ _ (INVALID_ADDRESS_FAMILY, -97, "Invalid address family") \
+ _ (INVALID_SUB_SW_IF_INDEX, -98, "Invalid sub-interface sw_if_index") \
+ _ (TABLE_TOO_BIG, -99, "Table too big") \
+ _ (CANNOT_ENABLE_DISABLE_FEATURE, -100, "Cannot enable/disable feature") \
+ _ (BFD_EEXIST, -101, "Duplicate BFD object") \
+ _ (BFD_ENOENT, -102, "No such BFD object") \
+ _ (BFD_EINUSE, -103, "BFD object in use") \
+ _ (BFD_NOTSUPP, -104, "BFD feature not supported") \
+ _ (ADDRESS_IN_USE, -105, "Address in use") \
+ _ (ADDRESS_NOT_IN_USE, -106, "Address not in use") \
+ _ (QUEUE_FULL, -107, "Queue full") \
+ _ (APP_UNSUPPORTED_CFG, -108, "Unsupported application config") \
+ _ (URI_FIFO_CREATE_FAILED, -109, "URI FIFO segment create failed") \
+ _ (LISP_RLOC_LOCAL, -110, "RLOC address is local") \
+ _ (BFD_EAGAIN, -111, "BFD object cannot be manipulated at this time") \
+ _ (INVALID_GPE_MODE, -112, "Invalid GPE mode") \
+ _ (LISP_GPE_ENTRIES_PRESENT, -113, "LISP GPE entries are present") \
+ _ (ADDRESS_FOUND_FOR_INTERFACE, -114, "Address found for interface") \
+ _ (SESSION_CONNECT, -115, "Session failed to connect") \
+ _ (ENTRY_ALREADY_EXISTS, -116, "Entry already exists") \
+ _ (SVM_SEGMENT_CREATE_FAIL, -117, "Svm segment create fail") \
+ _ (APPLICATION_NOT_ATTACHED, -118, "Application not attached") \
+ _ (BD_ALREADY_EXISTS, -119, "Bridge domain already exists") \
+ _ (BD_IN_USE, -120, "Bridge domain has member interfaces") \
+ _ (BD_NOT_MODIFIABLE, -121, "Bridge domain 0 can't be deleted/modified") \
+ _ (BD_ID_EXCEED_MAX, -122, "Bridge domain ID exceeds 16M limit") \
+ _ (SUBIF_DOESNT_EXIST, -123, "Subinterface doesn't exist") \
+ _ (L2_MACS_EVENT_CLINET_PRESENT, -124, \
+ "Client already exist for L2 MACs events") \
+ _ (INVALID_QUEUE, -125, "Invalid queue") \
+ _ (UNSUPPORTED, -126, "Unsupported") \
+ _ (DUPLICATE_IF_ADDRESS, -127, \
+ "Address already present on another interface") \
+ _ (APP_INVALID_NS, -128, "Invalid application namespace") \
+ _ (APP_WRONG_NS_SECRET, -129, "Wrong app namespace secret") \
+ _ (APP_CONNECT_SCOPE, -130, "Connect scope") \
+ _ (APP_ALREADY_ATTACHED, -131, "App already attached") \
+ _ (SESSION_REDIRECT, -132, "Redirect failed") \
+ _ (ILLEGAL_NAME, -133, "Illegal name") \
+ _ (NO_NAME_SERVERS, -134, "No name servers configured") \
+ _ (NAME_SERVER_NOT_FOUND, -135, "Name server not found") \
+ _ (NAME_RESOLUTION_NOT_ENABLED, -136, "Name resolution not enabled") \
+ _ (NAME_SERVER_FORMAT_ERROR, -137, "Server format error (bug!)") \
+ _ (NAME_SERVER_NO_SUCH_NAME, -138, "No such name") \
+ _ (NAME_SERVER_NO_ADDRESSES, -139, "No addresses available") \
+ _ (NAME_SERVER_NEXT_SERVER, -140, "Retry with new server") \
+ _ (APP_CONNECT_FILTERED, -141, "Connect was filtered") \
+ _ (ACL_IN_USE_INBOUND, -142, "Inbound ACL in use") \
+ _ (ACL_IN_USE_OUTBOUND, -143, "Outbound ACL in use") \
+ _ (INIT_FAILED, -144, "Initialization Failed") \
+ _ (NETLINK_ERROR, -145, "Netlink error") \
+ _ (BIER_BSL_UNSUP, -146, "BIER bit-string-length unsupported") \
+ _ (INSTANCE_IN_USE, -147, "Instance in use") \
+ _ (INVALID_SESSION_ID, -148, "Session ID out of range") \
+ _ (ACL_IN_USE_BY_LOOKUP_CONTEXT, -149, "ACL in use by a lookup context") \
+ _ (INVALID_VALUE_3, -150, "Invalid value #3") \
+ _ (NON_ETHERNET, -151, "Interface is not an Ethernet interface") \
+ _ (BD_ALREADY_HAS_BVI, -152, "Bridge domain already has a BVI interface") \
+ _ (INVALID_PROTOCOL, -153, "Invalid Protocol") \
+ _ (INVALID_ALGORITHM, -154, "Invalid Algorithm") \
+ _ (RSRC_IN_USE, -155, "Resource In Use") \
+ _ (KEY_LENGTH, -156, "invalid Key Length") \
+ _ (FIB_PATH_UNSUPPORTED_NH_PROTO, -157, "Unsupported FIB Path protocol") \
+ _ (API_ENDIAN_FAILED, -159, "Endian mismatch detected") \
+ _ (NO_CHANGE, -160, "No change in table") \
+ _ (MISSING_CERT_KEY, -161, "Missing certifcate or key") \
+ _ (LIMIT_EXCEEDED, -162, "limit exceeded") \
+ _ (IKE_NO_PORT, -163, "port not managed by IKE") \
+ _ (UDP_PORT_TAKEN, -164, "UDP port already taken") \
+ _ (EAGAIN, -165, "Retry stream call with cursor") \
+ _ (INVALID_VALUE_4, -166, "Invalid value #4") \
+ _ (BUSY, -167, "Busy") \
+ _ (BUG, -168, "Bug") \
+ _ (FEATURE_ALREADY_DISABLED, -169, "Feature already disabled") \
+ _ (FEATURE_ALREADY_ENABLED, -170, "Feature already enabled") \
+ _ (INVALID_PREFIX_LENGTH, -171, "Invalid prefix length")
+
+typedef enum
+{
+#define _(a, b, c) VNET_ERR_##a = (b),
+ foreach_vnet_error
+#undef _
+ VNET_N_ERROR,
+} vnet_error_t;
+
+clib_error_t __clib_warn_unused_result *vnet_error (vnet_error_t code,
+ char *fmt, ...);
+
+format_function_t format_vnet_api_errno;
+
+#endif
diff --git a/src/vnet/ethernet/arp_packet.h b/src/vnet/ethernet/arp_packet.h
index 6b4dfa6ab3d..9a9df680853 100644
--- a/src/vnet/ethernet/arp_packet.h
+++ b/src/vnet/ethernet/arp_packet.h
@@ -110,22 +110,10 @@ typedef enum
IP4_ARP_N_NEXT,
} ip4_arp_next_t;
-typedef enum
-{
- IP4_ARP_ERROR_THROTTLED,
- IP4_ARP_ERROR_RESOLVED,
- IP4_ARP_ERROR_NO_BUFFERS,
- IP4_ARP_ERROR_REQUEST_SENT,
- IP4_ARP_ERROR_NON_ARP_ADJ,
- IP4_ARP_ERROR_NO_SOURCE_ADDRESS,
-} ip4_arp_error_t;
-
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
mac_address_t mac;
ip4_address_t ip4;
}) ethernet_arp_ip4_over_ethernet_address_t;
-/* *INDENT-ON* */
STATIC_ASSERT (sizeof (ethernet_arp_ip4_over_ethernet_address_t) == 10,
"Packet ethernet address and IP4 address too big");
diff --git a/src/vnet/ethernet/ethernet.h b/src/vnet/ethernet/ethernet.h
index f3dd1a24a4f..858400d08d8 100644
--- a/src/vnet/ethernet/ethernet.h
+++ b/src/vnet/ethernet/ethernet.h
@@ -128,6 +128,15 @@ struct vnet_hw_interface_t;
typedef u32 (ethernet_flag_change_function_t)
(vnet_main_t * vnm, struct vnet_hw_interface_t * hi, u32 flags);
+typedef struct
+{
+ /* ethernet interface flags change */
+ ethernet_flag_change_function_t *flag_change;
+
+ /* set Max Frame Size callback */
+ vnet_interface_set_max_frame_size_function_t *set_max_frame_size;
+} vnet_eth_if_callbacks_t;
+
#define ETHERNET_MIN_PACKET_BYTES 64
#define ETHERNET_MAX_PACKET_BYTES 9216
@@ -161,11 +170,8 @@ typedef struct ethernet_interface
/* Set interface to accept all packets (promiscuous mode). */
#define ETHERNET_INTERFACE_FLAG_ACCEPT_ALL 1
- /* Change MTU on interface from hw interface structure */
-#define ETHERNET_INTERFACE_FLAG_MTU 2
-
/* Callback, e.g. to turn on/off promiscuous mode */
- ethernet_flag_change_function_t *flag_change;
+ vnet_eth_if_callbacks_t cb;
u32 driver_instance;
@@ -353,14 +359,6 @@ mac_address_t *ethernet_interface_add_del_address (ethernet_main_t * em,
const u8 * address,
u8 is_add);
-clib_error_t *ethernet_register_interface (vnet_main_t * vnm,
- u32 dev_class_index,
- u32 dev_instance,
- const u8 * address,
- u32 * hw_if_index_return,
- ethernet_flag_change_function_t
- flag_change);
-
void ethernet_delete_interface (vnet_main_t * vnm, u32 hw_if_index);
/* Register given node index to take input for given ethernet type. */
@@ -574,6 +572,18 @@ vnet_get_ethernet_main (void)
return &ethernet_main;
}
+typedef struct
+{
+ u32 dev_class_index;
+ u32 dev_instance;
+ u16 max_frame_size;
+ u16 frame_overhead;
+ vnet_eth_if_callbacks_t cb;
+ const u8 *address;
+} vnet_eth_interface_registration_t;
+
+u32 vnet_eth_register_interface (vnet_main_t *vnm,
+ vnet_eth_interface_registration_t *r);
void ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai);
u8 *ethernet_build_rewrite (vnet_main_t * vnm,
u32 sw_if_index,
diff --git a/src/vnet/ethernet/init.c b/src/vnet/ethernet/init.c
index f78b65c7cc0..3921e1ec0e6 100644
--- a/src/vnet/ethernet/init.c
+++ b/src/vnet/ethernet/init.c
@@ -62,7 +62,6 @@ add_type (ethernet_main_t * em, ethernet_type_t type, char *type_name)
}
/* Built-in ip4 tx feature path definition */
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (ethernet_output, static) =
{
.arc_name = "ethernet-output",
@@ -77,7 +76,6 @@ VNET_FEATURE_INIT (ethernet_tx_drop, static) =
.node_name = "error-drop",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
static clib_error_t *
ethernet_init (vlib_main_t * vm)
@@ -107,7 +105,6 @@ ethernet_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ethernet_init) =
{
/*
@@ -119,7 +116,6 @@ VLIB_INIT_FUNCTION (ethernet_init) =
"llc_init",
"vnet_feature_init"),
};
-/* *INDENT-ON* */
ethernet_main_t *
ethernet_get_main (vlib_main_t * vm)
diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c
index d287748c0e6..f1bb6b81070 100644
--- a/src/vnet/ethernet/interface.c
+++ b/src/vnet/ethernet/interface.c
@@ -303,16 +303,40 @@ ethernet_mac_change (vnet_hw_interface_t * hi,
{
ethernet_address_change_ctx_t *cb;
+ u32 id, sw_if_index;
vec_foreach (cb, em->address_change_callbacks)
- cb->function (em, hi->sw_if_index, cb->function_opaque);
+ {
+ cb->function (em, hi->sw_if_index, cb->function_opaque);
+ /* clang-format off */
+ hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id,
+ ({
+ cb->function (em, sw_if_index, cb->function_opaque);
+ }));
+ /* clang-format on */
+ }
}
return (NULL);
}
-/* *INDENT-OFF* */
+static clib_error_t *
+ethernet_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hi,
+ u32 frame_size)
+{
+ ethernet_interface_t *ei =
+ pool_elt_at_index (ethernet_main.interfaces, hi->hw_instance);
+
+ if (ei->cb.set_max_frame_size)
+ return ei->cb.set_max_frame_size (vnm, hi, frame_size);
+
+ return vnet_error (
+ VNET_ERR_UNSUPPORTED,
+ "underlying driver doesn't support changing Max Frame Size");
+}
+
VNET_HW_INTERFACE_CLASS (ethernet_hw_interface_class) = {
.name = "Ethernet",
+ .tx_hash_fn_type = VNET_HASH_FN_TYPE_ETHERNET,
.format_address = format_ethernet_address,
.format_header = format_ethernet_header_with_length,
.unformat_hw_address = unformat_ethernet_address,
@@ -320,8 +344,8 @@ VNET_HW_INTERFACE_CLASS (ethernet_hw_interface_class) = {
.build_rewrite = ethernet_build_rewrite,
.update_adjacency = ethernet_update_adjacency,
.mac_addr_change_function = ethernet_mac_change,
+ .set_max_frame_size = ethernet_set_max_frame_size,
};
-/* *INDENT-ON* */
uword
unformat_ethernet_interface (unformat_input_t * input, va_list * args)
@@ -344,49 +368,41 @@ unformat_ethernet_interface (unformat_input_t * input, va_list * args)
return 0;
}
-clib_error_t *
-ethernet_register_interface (vnet_main_t * vnm,
- u32 dev_class_index,
- u32 dev_instance,
- const u8 * address,
- u32 * hw_if_index_return,
- ethernet_flag_change_function_t flag_change)
+u32
+vnet_eth_register_interface (vnet_main_t *vnm,
+ vnet_eth_interface_registration_t *r)
{
ethernet_main_t *em = &ethernet_main;
ethernet_interface_t *ei;
vnet_hw_interface_t *hi;
- clib_error_t *error = 0;
u32 hw_if_index;
pool_get (em->interfaces, ei);
- ei->flag_change = flag_change;
+ clib_memcpy (&ei->cb, &r->cb, sizeof (vnet_eth_if_callbacks_t));
- hw_if_index = vnet_register_interface
- (vnm,
- dev_class_index, dev_instance,
- ethernet_hw_interface_class.index, ei - em->interfaces);
- *hw_if_index_return = hw_if_index;
+ hw_if_index = vnet_register_interface (
+ vnm, r->dev_class_index, r->dev_instance,
+ ethernet_hw_interface_class.index, ei - em->interfaces);
hi = vnet_get_hw_interface (vnm, hw_if_index);
ethernet_setup_node (vnm->vlib_main, hi->output_node_index);
- hi->min_packet_bytes = hi->min_supported_packet_bytes =
- ETHERNET_MIN_PACKET_BYTES;
- hi->max_packet_bytes = hi->max_supported_packet_bytes =
- ETHERNET_MAX_PACKET_BYTES;
+ hi->min_frame_size = ETHERNET_MIN_PACKET_BYTES;
+ hi->frame_overhead =
+ r->frame_overhead ?
+ r->frame_overhead :
+ sizeof (ethernet_header_t) + 2 * sizeof (ethernet_vlan_header_t);
+ hi->max_frame_size = r->max_frame_size ?
+ r->max_frame_size :
+ ethernet_main.default_mtu + hi->frame_overhead;
+ ;
/* Default ethernet MTU, 9000 unless set by ethernet_config see below */
vnet_sw_interface_set_mtu (vnm, hi->sw_if_index, em->default_mtu);
- ethernet_set_mac (hi, ei, address);
-
- if (error)
- {
- pool_put (em->interfaces, ei);
- return error;
- }
- return error;
+ ethernet_set_mac (hi, ei, r->address);
+ return hw_if_index;
}
void
@@ -454,14 +470,14 @@ ethernet_set_flags (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
/* preserve status bits and update last set operation bits */
ei->flags = (ei->flags & ETHERNET_INTERFACE_FLAGS_STATUS_MASK) | opn_flags;
- if (ei->flag_change)
+ if (ei->cb.flag_change)
{
switch (opn_flags)
{
case ETHERNET_INTERFACE_FLAG_DEFAULT_L3:
- if (hi->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_MAC_FILTER)
+ if (hi->caps & VNET_HW_IF_CAP_MAC_FILTER)
{
- if (ei->flag_change (vnm, hi, opn_flags) != ~0)
+ if (ei->cb.flag_change (vnm, hi, opn_flags) != ~0)
{
ei->flags |= ETHERNET_INTERFACE_FLAG_STATUS_L3;
return 0;
@@ -472,9 +488,7 @@ ethernet_set_flags (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
/* fall through */
case ETHERNET_INTERFACE_FLAG_ACCEPT_ALL:
ei->flags &= ~ETHERNET_INTERFACE_FLAG_STATUS_L3;
- /* fall through */
- case ETHERNET_INTERFACE_FLAG_MTU:
- return ei->flag_change (vnm, hi, opn_flags);
+ return ei->cb.flag_change (vnm, hi, opn_flags);
default:
return ~0;
}
@@ -520,7 +534,7 @@ simulated_ethernet_interface_tx (vlib_main_t * vm,
while (n_left_from >= 4)
{
u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3;
- u32 not_all_match_config;
+ u32x4 xor_ifx4;
/* Prefetch next iteration. */
if (PREDICT_TRUE (n_left_from >= 8))
@@ -537,12 +551,11 @@ simulated_ethernet_interface_tx (vlib_main_t * vm,
sw_if_index2 = vnet_buffer (b[2])->sw_if_index[VLIB_TX];
sw_if_index3 = vnet_buffer (b[3])->sw_if_index[VLIB_TX];
- not_all_match_config = (sw_if_index0 ^ sw_if_index1)
- ^ (sw_if_index2 ^ sw_if_index3);
- not_all_match_config += sw_if_index0 ^ new_rx_sw_if_index;
+ xor_ifx4 = u32x4_gather (&sw_if_index0, &sw_if_index1, &sw_if_index2,
+ &sw_if_index3);
/* Speed path / expected case: all pkts on the same intfc */
- if (PREDICT_TRUE (not_all_match_config == 0))
+ if (PREDICT_TRUE (u32x4_is_all_equal (xor_ifx4, new_rx_sw_if_index)))
{
next[0] = next_index;
next[1] = next_index;
@@ -745,7 +758,6 @@ simulated_ethernet_mac_change (vnet_hw_interface_t * hi,
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (ethernet_simulated_device_class) = {
.name = "Loopback",
.format_device_name = format_simulated_ethernet_name,
@@ -753,7 +765,6 @@ VNET_DEVICE_CLASS (ethernet_simulated_device_class) = {
.admin_up_down_function = simulated_ethernet_admin_up_down,
.mac_addr_change_function = simulated_ethernet_mac_change,
};
-/* *INDENT-ON* */
/*
* Maintain a bitmap of allocated loopback instance numbers.
@@ -833,13 +844,11 @@ vnet_create_loopback_interface (u32 * sw_if_indexp, u8 * mac_address,
{
vnet_main_t *vnm = vnet_get_main ();
vlib_main_t *vm = vlib_get_main ();
- clib_error_t *error;
u32 instance;
u8 address[6];
u32 hw_if_index;
vnet_hw_interface_t *hw_if;
u32 slot;
- int rv = 0;
ASSERT (sw_if_indexp);
@@ -871,18 +880,11 @@ vnet_create_loopback_interface (u32 * sw_if_indexp, u8 * mac_address,
address[5] = instance;
}
- error = ethernet_register_interface
- (vnm,
- ethernet_simulated_device_class.index, instance, address, &hw_if_index,
- /* flag change */ 0);
-
- if (error)
- {
- rv = VNET_API_ERROR_INVALID_REGISTRATION;
- clib_error_report (error);
- return rv;
- }
-
+ vnet_eth_interface_registration_t eir = {};
+ eir.dev_class_index = ethernet_simulated_device_class.index;
+ eir.dev_instance = instance;
+ eir.address = address;
+ hw_if_index = vnet_eth_register_interface (vnm, &eir);
hw_if = vnet_get_hw_interface (vnm, hw_if_index);
slot = vlib_node_add_named_next_with_slot
(vm, hw_if->tx_node_index,
@@ -951,13 +953,11 @@ create_simulated_ethernet_interfaces (vlib_main_t * vm,
* Example of how to create a loopback interface:
* @cliexcmd{loopback create-interface}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_simulated_ethernet_interface_command, static) = {
.path = "loopback create-interface",
.short_help = "loopback create-interface [mac <mac-addr>] [instance <instance>]",
.function = create_simulated_ethernet_interfaces,
};
-/* *INDENT-ON* */
/*?
* Create a loopback interface. Optionally, a MAC Address can be
@@ -970,13 +970,11 @@ VLIB_CLI_COMMAND (create_simulated_ethernet_interface_command, static) = {
* Example of how to create a loopback interface:
* @cliexcmd{create loopback interface}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_loopback_interface_command, static) = {
.path = "create loopback interface",
.short_help = "create loopback interface [mac <mac-addr>] [instance <instance>]",
.function = create_simulated_ethernet_interfaces,
};
-/* *INDENT-ON* */
ethernet_interface_t *
ethernet_get_interface (ethernet_main_t * em, u32 hw_if_index)
@@ -1187,13 +1185,11 @@ delete_sub_interface (vlib_main_t * vm,
* Example of how to delete a loopback interface:
* @cliexcmd{loopback delete-interface intfc loop0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (delete_simulated_ethernet_interface_command, static) = {
.path = "loopback delete-interface",
.short_help = "loopback delete-interface intfc <interface>",
.function = delete_simulated_ethernet_interfaces,
};
-/* *INDENT-ON* */
/*?
* Delete a loopback interface.
@@ -1205,13 +1201,11 @@ VLIB_CLI_COMMAND (delete_simulated_ethernet_interface_command, static) = {
* Example of how to delete a loopback interface:
* @cliexcmd{delete loopback interface intfc loop0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (delete_loopback_interface_command, static) = {
.path = "delete loopback interface",
.short_help = "delete loopback interface intfc <interface>",
.function = delete_simulated_ethernet_interfaces,
};
-/* *INDENT-ON* */
/*?
* Delete a sub-interface.
@@ -1220,13 +1214,11 @@ VLIB_CLI_COMMAND (delete_loopback_interface_command, static) = {
* Example of how to delete a sub-interface:
* @cliexcmd{delete sub-interface GigabitEthernet0/8/0.200}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (delete_sub_interface_command, static) = {
.path = "delete sub-interface",
.short_help = "delete sub-interface <interface>",
.function = delete_sub_interface,
};
-/* *INDENT-ON* */
/* ethernet { ... } configuration. */
/*?
diff --git a/src/vnet/ethernet/mac_address.c b/src/vnet/ethernet/mac_address.c
index b7981299700..098b3ce19c1 100644
--- a/src/vnet/ethernet/mac_address.c
+++ b/src/vnet/ethernet/mac_address.c
@@ -15,13 +15,11 @@
#include <vnet/ethernet/mac_address.h>
-/* *INDENT-OFF* */
const mac_address_t ZERO_MAC_ADDRESS = {
.bytes = {
0, 0, 0, 0, 0, 0,
},
};
-/* *INDENT-ON* */
u8 *
format_mac_address_t (u8 * s, va_list * args)
@@ -66,9 +64,9 @@ mac_address_increment (mac_address_t * mac)
{
u64 a;
- a = mac_address_as_u64 (mac);
+ a = ethernet_mac_address_u64 (mac->bytes);
a++;
- mac_address_from_u64 (mac, a);
+ ethernet_mac_address_from_u64 (a, mac->bytes);
}
/*
diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c
index 214e68809cc..03cbdde1c2b 100644
--- a/src/vnet/ethernet/node.c
+++ b/src/vnet/ethernet/node.c
@@ -225,25 +225,24 @@ identify_subint (ethernet_main_t * em,
// A unicast packet arriving on an L3 interface must have a dmac
// matching the interface mac. If interface has STATUS_L3 bit set
// mac filter is already done.
- if (!(*is_l2 || (ei->flags & ETHERNET_INTERFACE_FLAG_STATUS_L3)))
+ if ((!*is_l2) && ei &&
+ (!(ei->flags & ETHERNET_INTERFACE_FLAG_STATUS_L3)))
{
u64 dmacs[2];
u8 dmacs_bad[2];
ethernet_header_t *e0;
- ethernet_interface_t *ei0;
e0 = (void *) (b0->data + vnet_buffer (b0)->l2_hdr_offset);
dmacs[0] = *(u64 *) e0;
- ei0 = ethernet_get_interface (&ethernet_main, hi->hw_if_index);
- if (ei0 && vec_len (ei0->secondary_addrs))
+ if (vec_len (ei->secondary_addrs))
ethernet_input_inline_dmac_check (hi, dmacs, dmacs_bad,
- 1 /* n_packets */ , ei0,
- 1 /* have_sec_dmac */ );
+ 1 /* n_packets */, ei,
+ 1 /* have_sec_dmac */);
else
ethernet_input_inline_dmac_check (hi, dmacs, dmacs_bad,
- 1 /* n_packets */ , ei0,
- 0 /* have_sec_dmac */ );
+ 1 /* n_packets */, ei,
+ 0 /* have_sec_dmac */);
if (dmacs_bad[0])
*error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
}
@@ -983,8 +982,31 @@ eth_input_process_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
else
{
for (int j = 0; j < 16; j++)
- if (next[j] == 0)
- slowpath_indices[n_slowpath++] = i + j;
+ {
+ if (next[j] == 0)
+ slowpath_indices[n_slowpath++] = i + j;
+ else if (dmac_check && main_is_l3 && dmacs_bad[i + j])
+ {
+ next[j] = 0;
+ slowpath_indices[n_slowpath++] = i + j;
+ }
+ }
+ }
+ }
+ else
+ {
+ if (dmac_check && main_is_l3)
+ {
+ u8x16 dmac_bad = u8x16_load_unaligned (&dmacs_bad[i]);
+ if (!u8x16_is_all_zero (dmac_bad))
+ {
+ for (int j = 0; j < 16; j++)
+ if (dmacs_bad[i + j])
+ {
+ next[j] = 0;
+ slowpath_indices[n_slowpath++] = i + j;
+ }
+ }
}
}
@@ -995,7 +1017,12 @@ eth_input_process_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
continue;
}
#endif
- if (main_is_l3 && etype[0] == et_ip4)
+ if (dmac_check && main_is_l3 && dmacs_bad[i])
+ {
+ next[0] = 0;
+ slowpath_indices[n_slowpath++] = i;
+ }
+ else if (main_is_l3 && etype[0] == et_ip4)
next[0] = next_ip4;
else if (main_is_l3 && etype[0] == et_ip6)
next[0] = next_ip6;
@@ -1053,7 +1080,7 @@ eth_input_process_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
}
else
{
- /* untagged packet with not well known etyertype */
+ /* untagged packet with not well known ethertype */
if (last_unknown_etype != etype)
{
last_unknown_etype = etype;
@@ -1564,21 +1591,20 @@ ethernet_input_inline (vlib_main_t * vm,
dmacs[0] = *(u64 *) e0;
- if (ei && vec_len (ei->secondary_addrs))
- ethernet_input_inline_dmac_check (hi, dmacs,
- dmacs_bad,
- 1 /* n_packets */ ,
- ei,
- 1 /* have_sec_dmac */ );
- else
- ethernet_input_inline_dmac_check (hi, dmacs,
- dmacs_bad,
- 1 /* n_packets */ ,
- ei,
- 0 /* have_sec_dmac */ );
-
- if (dmacs_bad[0])
- error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
+ if (ei)
+ {
+ if (vec_len (ei->secondary_addrs))
+ ethernet_input_inline_dmac_check (
+ hi, dmacs, dmacs_bad, 1 /* n_packets */, ei,
+ 1 /* have_sec_dmac */);
+ else
+ ethernet_input_inline_dmac_check (
+ hi, dmacs, dmacs_bad, 1 /* n_packets */, ei,
+ 0 /* have_sec_dmac */);
+
+ if (dmacs_bad[0])
+ error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
+ }
skip_dmac_check0:
vlib_buffer_advance (b0, sizeof (ethernet_header_t));
@@ -2100,7 +2126,6 @@ static char *ethernet_error_strings[] = {
#undef ethernet_error
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ethernet_input_node) = {
.name = "ethernet-input",
/* Takes a vector of packets. */
@@ -2142,7 +2167,6 @@ VLIB_REGISTER_NODE (ethernet_input_not_l2_node) = {
#undef _
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
void
diff --git a/src/vnet/ethernet/p2p_ethernet.c b/src/vnet/ethernet/p2p_ethernet.c
index ddf23901419..0ece84fd9cc 100644
--- a/src/vnet/ethernet/p2p_ethernet.c
+++ b/src/vnet/ethernet/p2p_ethernet.c
@@ -146,6 +146,8 @@ p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index,
vnet_feature_enable_disable ("device-input",
"p2p-ethernet-input",
parent_if_index, 1, 0, 0);
+ vnet_feature_enable_disable ("port-rx-eth", "p2p-ethernet-input",
+ parent_if_index, 1, 0, 0);
/* Set promiscuous mode on the l2 interface */
ethernet_set_flags (vnm, parent_if_index,
ETHERNET_INTERFACE_FLAG_ACCEPT_ALL);
@@ -153,7 +155,7 @@ p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index,
}
p2pm->p2p_ethernet_by_sw_if_index[parent_if_index]++;
/* set the interface mode */
- set_int_l2_mode (vm, vnm, MODE_L3, p2pe_subif_id, 0,
+ set_int_l2_mode (vm, vnm, MODE_L3, p2pe_sw_if_index, 0,
L2_BD_PORT_TYPE_NORMAL, 0, 0);
return 0;
}
@@ -176,6 +178,9 @@ p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index,
vnet_feature_enable_disable ("device-input",
"p2p-ethernet-input",
parent_if_index, 0, 0, 0);
+ vnet_feature_enable_disable ("port-rx-eth",
+ "p2p-ethernet-input",
+ parent_if_index, 0, 0, 0);
/* Disable promiscuous mode on the l2 interface */
ethernet_set_flags (vnm, parent_if_index, 0);
}
@@ -248,10 +253,11 @@ vnet_p2p_ethernet_add_del (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-VLIB_CLI_COMMAND (p2p_ethernet_add_del_command, static) =
-{
-.path = "p2p_ethernet ",.function = vnet_p2p_ethernet_add_del,.short_help =
- "p2p_ethernet <intfc> <mac-address> [sub-id <id> | del]",};
+VLIB_CLI_COMMAND (p2p_ethernet_add_del_command, static) = {
+ .path = "p2p_ethernet",
+ .function = vnet_p2p_ethernet_add_del,
+ .short_help = "p2p_ethernet <intfc> <mac-address> [sub-id <id>|del]",
+};
static clib_error_t *
p2p_ethernet_init (vlib_main_t * vm)
diff --git a/src/vnet/ethernet/p2p_ethernet_api.c b/src/vnet/ethernet/p2p_ethernet_api.c
index a9a8cc0a444..903678ce445 100644
--- a/src/vnet/ethernet/p2p_ethernet_api.c
+++ b/src/vnet/ethernet/p2p_ethernet_api.c
@@ -58,14 +58,12 @@ vl_api_p2p_ethernet_add_t_handler (vl_api_p2p_ethernet_add_t * mp)
BAD_SW_IF_INDEX_LABEL;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_P2P_ETHERNET_ADD_REPLY,
({
rmp->sw_if_index = htonl(p2pe_if_index);
}));
- /* *INDENT-ON* */
}
void
diff --git a/src/vnet/ethernet/p2p_ethernet_input.c b/src/vnet/ethernet/p2p_ethernet_input.c
index 3e9589e0e19..3d81e99cff2 100644
--- a/src/vnet/ethernet/p2p_ethernet_input.c
+++ b/src/vnet/ethernet/p2p_ethernet_input.c
@@ -235,7 +235,6 @@ VLIB_NODE_FN (p2p_ethernet_input_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (p2p_ethernet_input_node) = {
.name = "p2p-ethernet-input",
.vector_size = sizeof (u32),
@@ -253,7 +252,6 @@ VLIB_REGISTER_NODE (p2p_ethernet_input_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ethernet/packet.h b/src/vnet/ethernet/packet.h
index e1e42badd06..007f93596f3 100644
--- a/src/vnet/ethernet/packet.h
+++ b/src/vnet/ethernet/packet.h
@@ -184,7 +184,6 @@ typedef struct
#define ETHERNET_N_PBB (1 << 24)
} ethernet_pbb_header_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
/* Backbone source/destination address. */
@@ -201,7 +200,6 @@ typedef CLIB_PACKED (struct
/* 3 bit priority, 1 bit DEI, 1 bit UCA, 3 bit RES and 24 bit I_SID (service identifier) */
u32 priority_dei_uca_res_sid;
}) ethernet_pbb_header_packed_t;
-/* *INDENT-ON* */
#endif /* included_ethernet_packet_h */
diff --git a/src/vnet/feature/feature.c b/src/vnet/feature/feature.c
index c93f586c349..a7246fbb16a 100644
--- a/src/vnet/feature/feature.c
+++ b/src/vnet/feature/feature.c
@@ -293,9 +293,10 @@ vnet_feature_enable_disable_with_index (u8 arc_index, u32 feature_index,
fm->sw_if_index_has_features[arc_index] =
clib_bitmap_set (fm->sw_if_index_has_features[arc_index], sw_if_index,
(feature_count > 0));
+ fm->feature_count_by_sw_if_index[arc_index][sw_if_index] = feature_count;
+
vnet_feature_reg_invoke (sw_if_index, arc_index, (feature_count > 0));
- fm->feature_count_by_sw_if_index[arc_index][sw_if_index] = feature_count;
return 0;
}
@@ -375,6 +376,52 @@ vnet_feature_is_enabled (const char *arc_name, const char *feature_node_name,
return 0;
}
+u32
+vnet_feature_get_end_node (u8 arc_index, u32 sw_if_index)
+{
+ vnet_feature_main_t *fm = &feature_main;
+ vnet_feature_config_main_t *cm;
+ u32 ci;
+
+ if (arc_index == (u8) ~0)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ cm = &fm->feature_config_mains[arc_index];
+ vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
+ ci = cm->config_index_by_sw_if_index[sw_if_index];
+
+ return (vnet_config_get_end_node (vlib_get_main (), &cm->config_main, ci));
+}
+
+u32
+vnet_feature_reset_end_node (u8 arc_index, u32 sw_if_index)
+{
+ vnet_feature_main_t *fm = &feature_main;
+ vnet_feature_config_main_t *cm;
+ u32 ci;
+
+ cm = &fm->feature_config_mains[arc_index];
+ vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
+ ci = cm->config_index_by_sw_if_index[sw_if_index];
+
+ ci = vnet_config_reset_end_node (vlib_get_main (), &cm->config_main, ci);
+
+ if (ci != ~0)
+ cm->config_index_by_sw_if_index[sw_if_index] = ci;
+
+ i16 feature_count;
+
+ if (NULL == fm->feature_count_by_sw_if_index ||
+ vec_len (fm->feature_count_by_sw_if_index) <= arc_index ||
+ vec_len (fm->feature_count_by_sw_if_index[arc_index]) <= sw_if_index)
+ feature_count = 0;
+ else
+ feature_count = fm->feature_count_by_sw_if_index[arc_index][sw_if_index];
+
+ vnet_feature_reg_invoke (sw_if_index, arc_index, (feature_count > 0));
+
+ return ci;
+}
u32
vnet_feature_modify_end_node (u8 arc_index,
@@ -400,6 +447,17 @@ vnet_feature_modify_end_node (u8 arc_index,
if (ci != ~0)
cm->config_index_by_sw_if_index[sw_if_index] = ci;
+ i16 feature_count;
+
+ if (NULL == fm->feature_count_by_sw_if_index ||
+ vec_len (fm->feature_count_by_sw_if_index) <= arc_index ||
+ vec_len (fm->feature_count_by_sw_if_index[arc_index]) <= sw_if_index)
+ feature_count = 0;
+ else
+ feature_count = fm->feature_count_by_sw_if_index[arc_index][sw_if_index];
+
+ vnet_feature_reg_invoke (sw_if_index, arc_index, (feature_count > 0));
+
return ci;
}
@@ -475,13 +533,11 @@ show_features_command_fn (vlib_main_t * vm,
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_features_command, static) = {
.path = "show features",
.short_help = "show features [verbose]",
.function = show_features_command_fn,
};
-/* *INDENT-ON* */
/** Display the set of driver features configured on a specific interface
* Called by "show interface" handler
@@ -642,14 +698,12 @@ done:
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_feature_command, static) = {
.path = "set interface feature",
.short_help = "set interface feature <intfc> <feature_name> arc <arc_name> "
"[disable]",
.function = set_interface_features_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
vnet_feature_add_del_sw_interface (vnet_main_t * vnm, u32 sw_if_index,
diff --git a/src/vnet/feature/feature.h b/src/vnet/feature/feature.h
index 9aa32182ef6..b1917e8df13 100644
--- a/src/vnet/feature/feature.h
+++ b/src/vnet/feature/feature.h
@@ -222,6 +222,10 @@ vnet_feature_enable_disable (const char *arc_name, const char *node_name,
u32
vnet_feature_modify_end_node (u8 arc_index, u32 sw_if_index, u32 node_index);
+u32 vnet_feature_get_end_node (u8 arc_index, u32 sw_if_index);
+
+u32 vnet_feature_reset_end_node (u8 arc_index, u32 sw_if_index);
+
static_always_inline u32
vnet_get_feature_count (u8 arc, u32 sw_if_index)
{
@@ -340,83 +344,8 @@ vnet_device_input_have_features (u32 sw_if_index)
}
static_always_inline void
-vnet_feature_start_device_input_x1 (u32 sw_if_index, u32 * next0,
- vlib_buffer_t * b0)
-{
- vnet_feature_main_t *fm = &feature_main;
- vnet_feature_config_main_t *cm;
- u8 feature_arc_index = fm->device_input_feature_arc_index;
- cm = &fm->feature_config_mains[feature_arc_index];
-
- if (PREDICT_FALSE
- (clib_bitmap_get
- (fm->sw_if_index_has_features[feature_arc_index], sw_if_index)))
- {
- /*
- * Save next0 so that the last feature in the chain
- * can skip ethernet-input if indicated...
- */
- u16 adv;
-
- adv = device_input_next_node_advance[*next0];
- vlib_buffer_advance (b0, -adv);
-
- vnet_buffer (b0)->feature_arc_index = feature_arc_index;
- b0->current_config_index =
- vec_elt (cm->config_index_by_sw_if_index, sw_if_index);
- vnet_get_config_data (&cm->config_main, &b0->current_config_index,
- next0, /* # bytes of config data */ 0);
- }
-}
-
-static_always_inline void
-vnet_feature_start_device_input_x2 (u32 sw_if_index,
- u32 * next0,
- u32 * next1,
- vlib_buffer_t * b0, vlib_buffer_t * b1)
-{
- vnet_feature_main_t *fm = &feature_main;
- vnet_feature_config_main_t *cm;
- u8 feature_arc_index = fm->device_input_feature_arc_index;
- cm = &fm->feature_config_mains[feature_arc_index];
-
- if (PREDICT_FALSE
- (clib_bitmap_get
- (fm->sw_if_index_has_features[feature_arc_index], sw_if_index)))
- {
- /*
- * Save next0 so that the last feature in the chain
- * can skip ethernet-input if indicated...
- */
- u16 adv;
-
- adv = device_input_next_node_advance[*next0];
- vlib_buffer_advance (b0, -adv);
-
- adv = device_input_next_node_advance[*next1];
- vlib_buffer_advance (b1, -adv);
-
- vnet_buffer (b0)->feature_arc_index = feature_arc_index;
- vnet_buffer (b1)->feature_arc_index = feature_arc_index;
- b0->current_config_index =
- vec_elt (cm->config_index_by_sw_if_index, sw_if_index);
- b1->current_config_index = b0->current_config_index;
- vnet_get_config_data (&cm->config_main, &b0->current_config_index,
- next0, /* # bytes of config data */ 0);
- vnet_get_config_data (&cm->config_main, &b1->current_config_index,
- next1, /* # bytes of config data */ 0);
- }
-}
-
-static_always_inline void
-vnet_feature_start_device_input_x4 (u32 sw_if_index,
- u32 * next0,
- u32 * next1,
- u32 * next2,
- u32 * next3,
- vlib_buffer_t * b0,
- vlib_buffer_t * b1,
- vlib_buffer_t * b2, vlib_buffer_t * b3)
+vnet_feature_start_device_input (u32 sw_if_index, u32 *next0,
+ vlib_buffer_t *b0)
{
vnet_feature_main_t *fm = &feature_main;
vnet_feature_config_main_t *cm;
@@ -427,43 +356,11 @@ vnet_feature_start_device_input_x4 (u32 sw_if_index,
(clib_bitmap_get
(fm->sw_if_index_has_features[feature_arc_index], sw_if_index)))
{
- /*
- * Save next0 so that the last feature in the chain
- * can skip ethernet-input if indicated...
- */
- u16 adv;
-
- adv = device_input_next_node_advance[*next0];
- vlib_buffer_advance (b0, -adv);
-
- adv = device_input_next_node_advance[*next1];
- vlib_buffer_advance (b1, -adv);
-
- adv = device_input_next_node_advance[*next2];
- vlib_buffer_advance (b2, -adv);
-
- adv = device_input_next_node_advance[*next3];
- vlib_buffer_advance (b3, -adv);
-
vnet_buffer (b0)->feature_arc_index = feature_arc_index;
- vnet_buffer (b1)->feature_arc_index = feature_arc_index;
- vnet_buffer (b2)->feature_arc_index = feature_arc_index;
- vnet_buffer (b3)->feature_arc_index = feature_arc_index;
-
b0->current_config_index =
vec_elt (cm->config_index_by_sw_if_index, sw_if_index);
- b1->current_config_index = b0->current_config_index;
- b2->current_config_index = b0->current_config_index;
- b3->current_config_index = b0->current_config_index;
-
vnet_get_config_data (&cm->config_main, &b0->current_config_index,
next0, /* # bytes of config data */ 0);
- vnet_get_config_data (&cm->config_main, &b1->current_config_index,
- next1, /* # bytes of config data */ 0);
- vnet_get_config_data (&cm->config_main, &b2->current_config_index,
- next2, /* # bytes of config data */ 0);
- vnet_get_config_data (&cm->config_main, &b3->current_config_index,
- next3, /* # bytes of config data */ 0);
}
}
diff --git a/src/vnet/feature/registration.c b/src/vnet/feature/registration.c
index 537a4ada6e4..bc20412b9cf 100644
--- a/src/vnet/feature/registration.c
+++ b/src/vnet/feature/registration.c
@@ -351,12 +351,10 @@ again:
*in_feature_nodes = feature_nodes;
/* Finally, clean up all the shit we allocated */
- /* *INDENT-OFF* */
hash_foreach_pair (hp, index_by_name,
({
vec_add1 (keys_to_delete, (u8 *)hp->key);
}));
- /* *INDENT-ON* */
hash_free (index_by_name);
for (i = 0; i < vec_len (keys_to_delete); i++)
vec_free (keys_to_delete[i]);
diff --git a/src/vnet/fib/fib.c b/src/vnet/fib/fib.c
index ddfa830bb0f..cce03b4b49c 100644
--- a/src/vnet/fib/fib.c
+++ b/src/vnet/fib/fib.c
@@ -32,9 +32,7 @@ fib_module_init (vlib_main_t * vm)
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (fib_module_init) =
{
.runs_after = VLIB_INITS("dpo_module_init", "adj_module_init"),
};
-/* *INDENT-ON* */
diff --git a/src/vnet/fib/fib_api.c b/src/vnet/fib/fib_api.c
index 75a17cfca02..1b1c0d113c0 100644
--- a/src/vnet/fib/fib_api.c
+++ b/src/vnet/fib/fib_api.c
@@ -69,7 +69,7 @@ fib_api_next_hop_decode (const vl_api_fib_path_t *in,
*out = to_ip46 (FIB_API_PATH_NH_PROTO_IP6 == in->proto, (void *)&in->nh.address);
}
-static vl_api_fib_path_nh_proto_t
+vl_api_fib_path_nh_proto_t
fib_api_path_dpo_proto_to_nh (dpo_proto_t dproto)
{
switch (dproto)
@@ -108,7 +108,7 @@ fib_api_next_hop_encode (const fib_route_path_t *rpath,
sizeof (rpath->frp_addr.ip6));
}
-static int
+int
fib_api_path_nh_proto_to_dpo (vl_api_fib_path_nh_proto_t pp,
dpo_proto_t *dproto)
{
@@ -190,6 +190,7 @@ fib_api_path_decode (vl_api_fib_path_t *in,
break;
case FIB_API_PATH_TYPE_DROP:
out->frp_flags |= FIB_ROUTE_PATH_DROP;
+ out->frp_sw_if_index = ntohl(in->sw_if_index);
break;
case FIB_API_PATH_TYPE_LOCAL:
out->frp_flags |= FIB_ROUTE_PATH_LOCAL;
@@ -448,6 +449,9 @@ fib_api_route_add_del (u8 is_add,
fib_entry_flag_t entry_flags,
fib_route_path_t *rpaths)
{
+ if (!fib_prefix_validate(prefix)) {
+ return (VNET_API_ERROR_INVALID_PREFIX_LENGTH);
+ }
if (is_multipath)
{
if (vec_len(rpaths) == 0)
diff --git a/src/vnet/fib/fib_api.h b/src/vnet/fib/fib_api.h
index 7fd7d16cb33..0c59531b438 100644
--- a/src/vnet/fib/fib_api.h
+++ b/src/vnet/fib/fib_api.h
@@ -29,6 +29,8 @@ struct _vl_api_fib_prefix;
/**
* Encode and decode functions from the API types to internal types
*/
+extern vl_api_fib_path_nh_proto_t fib_api_path_dpo_proto_to_nh (dpo_proto_t dproto);
+extern int fib_api_path_nh_proto_to_dpo (vl_api_fib_path_nh_proto_t pp, dpo_proto_t *dproto);
extern void fib_api_path_encode(const fib_route_path_t * api_rpath,
vl_api_fib_path_t *out);
extern int fib_api_path_decode(vl_api_fib_path_t *in,
diff --git a/src/vnet/fib/fib_attached_export.c b/src/vnet/fib/fib_attached_export.c
index 206d10e7140..c6ba0575a04 100644
--- a/src/vnet/fib/fib_attached_export.c
+++ b/src/vnet/fib/fib_attached_export.c
@@ -378,6 +378,7 @@ fib_attached_export_purge (fib_entry_t *fib_entry)
*/
if (0 == --export->faee_locks)
{
+ vec_free (export->faee_importers);
pool_put(fib_ae_export_pool, export);
fib_entry_delegate_remove(export_entry,
FIB_ENTRY_DELEGATE_ATTACHED_EXPORT);
diff --git a/src/vnet/fib/fib_bfd.c b/src/vnet/fib/fib_bfd.c
index b02fbc67a63..6bfd29ae2cc 100644
--- a/src/vnet/fib/fib_bfd.c
+++ b/src/vnet/fib/fib_bfd.c
@@ -188,9 +188,7 @@ fib_bfd_main_init (vlib_main_t * vm)
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (fib_bfd_main_init) =
{
.runs_after = VLIB_INITS("bfd_main_init"),
};
-/* *INDENT-ON* */
diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c
index dfa0cb285b4..b78346ce45a 100644
--- a/src/vnet/fib/fib_entry.c
+++ b/src/vnet/fib/fib_entry.c
@@ -293,58 +293,6 @@ fib_entry_get_flags (fib_node_index_t fib_entry_index)
return (fib_entry_get_flags_i(fib_entry_get(fib_entry_index)));
}
-/*
- * fib_entry_back_walk_notify
- *
- * A back walk has reach this entry.
- */
-static fib_node_back_walk_rc_t
-fib_entry_back_walk_notify (fib_node_t *node,
- fib_node_back_walk_ctx_t *ctx)
-{
- fib_entry_t *fib_entry;
-
- fib_entry = fib_entry_from_fib_node(node);
-
- if (FIB_NODE_BW_REASON_FLAG_EVALUATE & ctx->fnbw_reason ||
- FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason ||
- FIB_NODE_BW_REASON_FLAG_ADJ_DOWN & ctx->fnbw_reason ||
- FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason ||
- FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason ||
- FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason)
- {
- fib_entry_src_action_reactivate(fib_entry,
- fib_entry_get_best_source(
- fib_entry_get_index(fib_entry)));
- }
-
- /*
- * all other walk types can be reclassifed to a re-evaluate to
- * all recursive dependents.
- * By reclassifying we ensure that should any of these walk types meet
- * they can be merged.
- */
- ctx->fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE;
-
- /*
- * ... and nothing is forced sync from now on.
- */
- ctx->fnbw_flags &= ~FIB_NODE_BW_FLAG_FORCE_SYNC;
-
- FIB_ENTRY_DBG(fib_entry, "bw:%U",
- format_fib_node_bw_reason, ctx->fnbw_reason);
-
- /*
- * propagate the backwalk further if we haven't already reached the
- * maximum depth.
- */
- fib_walk_sync(FIB_NODE_TYPE_ENTRY,
- fib_entry_get_index(fib_entry),
- ctx);
-
- return (FIB_NODE_BACK_WALK_CONTINUE);
-}
-
static void
fib_entry_show_memory (void)
{
@@ -373,16 +321,6 @@ fib_entry_show_memory (void)
sizeof(fib_path_ext_t));
}
-/*
- * The FIB path-list's graph node virtual function table
- */
-static const fib_node_vft_t fib_entry_vft = {
- .fnv_get = fib_entry_get_node,
- .fnv_last_lock = fib_entry_last_lock_gone,
- .fnv_back_walk = fib_entry_back_walk_notify,
- .fnv_mem_show = fib_entry_show_memory,
-};
-
/**
* @brief Contribute the set of Adjacencies that this entry forwards with
* to build the uRPF list of its children
@@ -599,11 +537,10 @@ fib_entry_alloc (u32 fib_index,
{
fib_entry_t *fib_entry;
fib_prefix_t *fep;
- u8 need_barrier_sync = 0;
+ u8 need_barrier_sync = pool_get_will_expand (fib_entry_pool);
vlib_main_t *vm = vlib_get_main();
ASSERT (vm->thread_index == 0);
- pool_get_will_expand (fib_entry_pool, need_barrier_sync );
if (need_barrier_sync)
vlib_worker_thread_barrier_sync (vm);
@@ -645,7 +582,8 @@ fib_entry_alloc (u32 fib_index,
static fib_entry_t*
fib_entry_post_flag_update_actions (fib_entry_t *fib_entry,
- fib_entry_flag_t old_flags)
+ fib_entry_flag_t old_flags,
+ u32 new_fib_index)
{
fib_node_index_t fei;
@@ -670,12 +608,14 @@ fib_entry_post_flag_update_actions (fib_entry_t *fib_entry,
* there is an assumption here that the entry resolves via only
* one interface and that it is the cross VRF interface.
*/
- u32 sw_if_index = fib_path_list_get_resolving_interface(fib_entry->fe_parent);
-
- fib_attached_export_import(fib_entry,
- fib_table_get_index_for_sw_if_index(
- fib_entry_get_proto(fib_entry),
- sw_if_index));
+ if (~0 == new_fib_index)
+ {
+ u32 sw_if_index = fib_path_list_get_resolving_interface(fib_entry->fe_parent);
+ new_fib_index = fib_table_get_index_for_sw_if_index(
+ fib_entry_get_proto(fib_entry),
+ sw_if_index);
+ }
+ fib_attached_export_import(fib_entry, new_fib_index);
}
else if (was_import && !is_import)
{
@@ -684,6 +624,14 @@ fib_entry_post_flag_update_actions (fib_entry_t *fib_entry,
*/
fib_attached_export_purge(fib_entry);
}
+ else if (was_import && is_import && ~0 != new_fib_index)
+ {
+ /*
+ * transition from export from one table to another
+ */
+ fib_attached_export_purge(fib_entry);
+ fib_attached_export_import(fib_entry, new_fib_index);
+ }
/*
* else
* no change. nothing to do.
@@ -717,8 +665,7 @@ fib_entry_post_install_actions (fib_entry_t *fib_entry,
fib_source_t source,
fib_entry_flag_t old_flags)
{
- fib_entry = fib_entry_post_flag_update_actions(fib_entry,
- old_flags);
+ fib_entry = fib_entry_post_flag_update_actions(fib_entry, old_flags, ~0);
fib_entry = fib_entry_src_action_installed(fib_entry, source);
return (fib_entry);
@@ -990,7 +937,7 @@ fib_entry_source_removed (fib_entry_t *fib_entry,
/*
* no more sources left. this entry is toast.
*/
- fib_entry = fib_entry_post_flag_update_actions(fib_entry, old_flags);
+ fib_entry = fib_entry_post_flag_update_actions(fib_entry, old_flags, ~0);
fib_entry_src_action_uninstall(fib_entry);
return (FIB_ENTRY_SRC_FLAG_NONE);
@@ -1164,7 +1111,7 @@ fib_entry_special_remove (fib_node_index_t fib_entry_index,
/*
* no more sources left. this entry is toast.
*/
- fib_entry = fib_entry_post_flag_update_actions(fib_entry, bflags);
+ fib_entry = fib_entry_post_flag_update_actions(fib_entry, bflags, ~0);
fib_entry_src_action_uninstall(fib_entry);
return (FIB_ENTRY_SRC_FLAG_NONE);
}
@@ -1481,6 +1428,126 @@ fib_entry_recursive_loop_detect (fib_node_index_t entry_index,
return (is_looped);
}
+/*
+ * fib_entry_attached_cross_table
+ *
+ * Return true if the route is attached via an interface that
+ * is not in the same table as the route
+ */
+static int
+fib_entry_attached_cross_table (const fib_entry_t *fib_entry,
+ u32 fib_index)
+{
+ const fib_prefix_t *pfx = &fib_entry->fe_prefix;
+
+ switch (pfx->fp_proto)
+ {
+ case FIB_PROTOCOL_MPLS:
+ /* MPLS routes are never imported/exported */
+ return (0);
+ case FIB_PROTOCOL_IP6:
+ /* Ignore link local addresses these also can't be imported/exported */
+ if (ip6_address_is_link_local_unicast (&pfx->fp_addr.ip6))
+ {
+ return (0);
+ }
+ break;
+ case FIB_PROTOCOL_IP4:
+ break;
+ }
+
+ return (fib_entry->fe_fib_index != fib_index);
+}
+
+/*
+ * fib_entry_back_walk_notify
+ *
+ * A back walk has reach this entry.
+ */
+static fib_node_back_walk_rc_t
+fib_entry_back_walk_notify (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ fib_source_t best_source;
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *bsrc;
+
+ fib_entry = fib_entry_from_fib_node(node);
+ bsrc = fib_entry_get_best_src_i(fib_entry);
+ best_source = fib_entry_src_get_source(bsrc);
+
+ if (FIB_NODE_BW_REASON_FLAG_INTERFACE_BIND & ctx->fnbw_reason)
+ {
+ fib_entry_flag_t bflags;
+
+ bflags = fib_entry_src_get_flags(bsrc);
+
+ fib_entry_src_action_reactivate(fib_entry, best_source);
+
+ /* re-evaluate whether the prefix is cross table */
+ if (fib_entry_attached_cross_table(
+ fib_entry, ctx->interface_bind.fnbw_to_fib_index) &&
+ !(bsrc->fes_entry_flags & FIB_ENTRY_FLAG_NO_ATTACHED_EXPORT))
+ {
+ bsrc->fes_entry_flags |= FIB_ENTRY_FLAG_IMPORT;
+ }
+ else
+ {
+ bsrc->fes_entry_flags &= ~FIB_ENTRY_FLAG_IMPORT;
+ }
+
+ fib_entry = fib_entry_post_flag_update_actions(
+ fib_entry, bflags,
+ ctx->interface_bind.fnbw_to_fib_index);
+ }
+ else if (FIB_NODE_BW_REASON_FLAG_EVALUATE & ctx->fnbw_reason ||
+ FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason ||
+ FIB_NODE_BW_REASON_FLAG_ADJ_DOWN & ctx->fnbw_reason ||
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason ||
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason ||
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_BIND & ctx->fnbw_reason ||
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason)
+ {
+ fib_entry_src_action_reactivate(fib_entry, best_source);
+ }
+
+ /*
+ * all other walk types can be reclassifed to a re-evaluate to
+ * all recursive dependents.
+ * By reclassifying we ensure that should any of these walk types meet
+ * they can be merged.
+ */
+ ctx->fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE;
+
+ /*
+ * ... and nothing is forced sync from now on.
+ */
+ ctx->fnbw_flags &= ~FIB_NODE_BW_FLAG_FORCE_SYNC;
+
+ FIB_ENTRY_DBG(fib_entry, "bw:%U",
+ format_fib_node_bw_reason, ctx->fnbw_reason);
+
+ /*
+ * propagate the backwalk further if we haven't already reached the
+ * maximum depth.
+ */
+ fib_walk_sync(FIB_NODE_TYPE_ENTRY,
+ fib_entry_get_index(fib_entry),
+ ctx);
+
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/*
+ * The FIB path-list's graph node virtual function table
+ */
+static const fib_node_vft_t fib_entry_vft = {
+ .fnv_get = fib_entry_get_node,
+ .fnv_last_lock = fib_entry_last_lock_gone,
+ .fnv_back_walk = fib_entry_back_walk_notify,
+ .fnv_mem_show = fib_entry_show_memory,
+};
+
u32
fib_entry_get_resolving_interface (fib_node_index_t entry_index)
{
diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h
index 4053ff65181..7331f803ec4 100644
--- a/src/vnet/fib/fib_entry.h
+++ b/src/vnet/fib/fib_entry.h
@@ -154,9 +154,13 @@ typedef enum fib_entry_src_attribute_t_ {
*/
FIB_ENTRY_SRC_ATTRIBUTE_INHERITED,
/**
+ * the source is currently used as glean src address
+ */
+ FIB_ENTRY_SRC_ATTRIBUTE_PROVIDES_GLEAN,
+ /**
* Marker. add new entries before this one.
*/
- FIB_ENTRY_SRC_ATTRIBUTE_LAST = FIB_ENTRY_SRC_ATTRIBUTE_INHERITED,
+ FIB_ENTRY_SRC_ATTRIBUTE_LAST = FIB_ENTRY_SRC_ATTRIBUTE_PROVIDES_GLEAN,
} fib_entry_src_attribute_t;
@@ -166,6 +170,7 @@ typedef enum fib_entry_src_attribute_t_ {
[FIB_ENTRY_SRC_ATTRIBUTE_ACTIVE] = "active", \
[FIB_ENTRY_SRC_ATTRIBUTE_STALE] = "stale", \
[FIB_ENTRY_SRC_ATTRIBUTE_INHERITED] = "inherited", \
+ [FIB_ENTRY_SRC_ATTRIBUTE_PROVIDES_GLEAN] = "provides-glean", \
}
#define FOR_EACH_FIB_SRC_ATTRIBUTE(_item) \
@@ -180,6 +185,7 @@ typedef enum fib_entry_src_flag_t_ {
FIB_ENTRY_SRC_FLAG_ACTIVE = (1 << FIB_ENTRY_SRC_ATTRIBUTE_ACTIVE),
FIB_ENTRY_SRC_FLAG_STALE = (1 << FIB_ENTRY_SRC_ATTRIBUTE_STALE),
FIB_ENTRY_SRC_FLAG_INHERITED = (1 << FIB_ENTRY_SRC_ATTRIBUTE_INHERITED),
+ FIB_ENTRY_SRC_FLAG_PROVIDES_GLEAN = (1 << FIB_ENTRY_SRC_ATTRIBUTE_PROVIDES_GLEAN),
} __attribute__ ((packed)) fib_entry_src_flag_t;
extern u8 * format_fib_entry_src_flags(u8 *s, va_list *args);
@@ -421,6 +427,9 @@ extern const int fib_entry_get_dpo_for_source (
fib_node_index_t fib_entry_index,
fib_source_t source,
dpo_id_t *dpo);
+extern fib_node_index_t fib_entry_get_path_list_for_source (
+ fib_node_index_t fib_entry_index,
+ fib_source_t source);
extern adj_index_t fib_entry_get_adj(fib_node_index_t fib_entry_index);
diff --git a/src/vnet/fib/fib_entry_src.c b/src/vnet/fib/fib_entry_src.c
index a4a4f1ae0b5..c79b745b5b5 100644
--- a/src/vnet/fib/fib_entry_src.c
+++ b/src/vnet/fib/fib_entry_src.c
@@ -46,6 +46,7 @@ fib_entry_src_get_vft (const fib_entry_src_t *esrc)
return (&fib_entry_src_bh_vft[FIB_SOURCE_BH_INTERPOSE]);
}
+ ASSERT(bh < FIB_SOURCE_BH_MAX);
return (&fib_entry_src_bh_vft[bh]);
}
@@ -257,6 +258,7 @@ typedef struct fib_entry_src_collect_forwarding_ctx_t_
fib_forward_chain_type_t fct;
int n_recursive_constrained;
u16 preference;
+ dpo_proto_t payload_proto;
} fib_entry_src_collect_forwarding_ctx_t;
/**
@@ -289,47 +291,6 @@ fib_entry_src_valid_out_label (mpls_label_t label)
MPLS_IETF_IMPLICIT_NULL_LABEL == label));
}
-/**
- * @brief Turn the chain type requested by the client into the one they
- * really wanted
- */
-fib_forward_chain_type_t
-fib_entry_chain_type_fixup (const fib_entry_t *entry,
- fib_forward_chain_type_t fct)
-{
- /*
- * The EOS chain is a tricky since one cannot know the adjacency
- * to link to without knowing what the packets payload protocol
- * will be once the label is popped.
- */
- fib_forward_chain_type_t dfct;
-
- if (FIB_FORW_CHAIN_TYPE_MPLS_EOS != fct)
- {
- return (fct);
- }
-
- dfct = fib_entry_get_default_chain_type(entry);
-
- if (FIB_FORW_CHAIN_TYPE_MPLS_EOS == dfct)
- {
- /*
- * If the entry being asked is a eos-MPLS label entry,
- * then use the payload-protocol field, that we stashed there
- * for just this purpose
- */
- return (fib_forw_chain_type_from_dpo_proto(
- entry->fe_prefix.fp_payload_proto));
- }
- /*
- * else give them what this entry would be by default. i.e. if it's a v6
- * entry, then the label its local labelled should be carrying v6 traffic.
- * If it's a non-EOS label entry, then there are more labels and we want
- * a non-eos chain.
- */
- return (dfct);
-}
-
static dpo_proto_t
fib_prefix_get_payload_proto (const fib_prefix_t *pfx)
{
@@ -371,7 +332,8 @@ fib_entry_src_get_path_forwarding (fib_node_index_t path_index,
nh->path_index = path_index;
nh->path_weight = fib_path_get_weight(path_index);
- fib_path_contribute_forwarding(path_index, ctx->fct, &nh->path_dpo);
+ fib_path_contribute_forwarding(path_index, ctx->fct,
+ ctx->payload_proto, &nh->path_dpo);
break;
case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
@@ -384,6 +346,7 @@ fib_entry_src_get_path_forwarding (fib_node_index_t path_index,
nh->path_weight = fib_path_get_weight(path_index);
fib_path_contribute_forwarding(path_index,
FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ ctx->payload_proto,
&nh->path_dpo);
}
break;
@@ -397,11 +360,11 @@ fib_entry_src_get_path_forwarding (fib_node_index_t path_index,
nh->path_index = path_index;
nh->path_weight = fib_path_get_weight(path_index);
fib_path_contribute_forwarding(path_index,
- fib_entry_chain_type_fixup(ctx->fib_entry,
- ctx->fct),
+ ctx->fct,
+ ctx->payload_proto,
&nh->path_dpo);
fib_path_stack_mpls_disp(path_index,
- fib_prefix_get_payload_proto(&ctx->fib_entry->fe_prefix),
+ ctx->payload_proto,
FIB_MPLS_LSP_MODE_PIPE,
&nh->path_dpo);
@@ -480,9 +443,8 @@ fib_entry_src_collect_forwarding (fib_node_index_t pl_index,
*/
ctx->next_hops =
fib_path_ext_stack(path_ext,
+ ctx->payload_proto,
ctx->fct,
- fib_entry_chain_type_fixup(ctx->fib_entry,
- ctx->fct),
ctx->next_hops);
}
else
@@ -609,6 +571,7 @@ fib_entry_src_mk_lb (fib_entry_t *fib_entry,
.preference = 0xffff,
.start_source_index = start,
.end_source_index = end,
+ .payload_proto = fib_prefix_get_payload_proto(&fib_entry->fe_prefix),
};
/*
@@ -794,6 +757,7 @@ fib_entry_src_action_uninstall (fib_entry_t *fib_entry)
&fib_entry->fe_prefix,
&fib_entry->fe_lb);
+ vlib_worker_wait_one_loop();
dpo_reset(&fib_entry->fe_lb);
}
}
@@ -1493,7 +1457,7 @@ fib_entry_src_action_remove (fib_entry_t *fib_entry,
* Return true the the route is attached via an interface that
* is not in the same table as the route
*/
-static inline int
+static int
fib_route_attached_cross_table (const fib_entry_t *fib_entry,
const fib_route_path_t *rpath)
{
@@ -1508,7 +1472,7 @@ fib_route_attached_cross_table (const fib_entry_t *fib_entry,
/* Ignore link local addresses these also can't be imported/exported */
if (ip6_address_is_link_local_unicast (&pfx->fp_addr.ip6))
{
- return (!0);
+ return (0);
}
break;
case FIB_PROTOCOL_IP4:
@@ -1834,6 +1798,25 @@ fib_entry_get_dpo_for_source (fib_node_index_t fib_entry_index,
return (0);
}
+fib_node_index_t
+fib_entry_get_path_list_for_source (fib_node_index_t fib_entry_index,
+ fib_source_t source)
+{
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *esrc;
+
+ if (FIB_NODE_INDEX_INVALID == fib_entry_index)
+ return FIB_NODE_INDEX_INVALID;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+ esrc = fib_entry_src_find(fib_entry, source);
+
+ if (esrc)
+ return esrc->fes_pl;
+
+ return FIB_NODE_INDEX_INVALID;
+}
+
u32
fib_entry_get_resolving_interface_for_source (fib_node_index_t entry_index,
fib_source_t source)
diff --git a/src/vnet/fib/fib_entry_src.h b/src/vnet/fib/fib_entry_src.h
index ced6b5c42fc..1f348baeacb 100644
--- a/src/vnet/fib/fib_entry_src.h
+++ b/src/vnet/fib/fib_entry_src.h
@@ -326,9 +326,6 @@ extern fib_entry_flag_t fib_entry_get_flags_i(const fib_entry_t *fib_entry);
extern fib_path_list_flags_t fib_entry_src_flags_2_path_list_flags(
fib_entry_flag_t eflags);
-extern fib_forward_chain_type_t fib_entry_chain_type_fixup(const fib_entry_t *entry,
- fib_forward_chain_type_t fct);
-
extern void fib_entry_src_mk_lb (fib_entry_t *fib_entry,
fib_source_t source,
fib_forward_chain_type_t fct,
diff --git a/src/vnet/fib/fib_entry_src_interface.c b/src/vnet/fib/fib_entry_src_interface.c
index 402369d1dfc..c5028dc8798 100644
--- a/src/vnet/fib/fib_entry_src_interface.c
+++ b/src/vnet/fib/fib_entry_src_interface.c
@@ -87,8 +87,16 @@ fib_entry_src_interface_update_glean (fib_entry_t *cover,
if (fib_prefix_is_cover(&adj->sub_type.glean.rx_pfx,
&local->fe_prefix))
{
- adj->sub_type.glean.rx_pfx.fp_addr = local->fe_prefix.fp_addr;
- return (1);
+ fib_entry_src_t *local_src;
+
+ local_src = fib_entry_src_find (local, FIB_SOURCE_INTERFACE);
+ if (local_src != NULL)
+ {
+ adj->sub_type.glean.rx_pfx.fp_addr =
+ local->fe_prefix.fp_addr;
+ local_src->fes_flags |= FIB_ENTRY_SRC_FLAG_PROVIDES_GLEAN;
+ return (1);
+ }
}
}
}
@@ -116,6 +124,52 @@ fib_entry_src_interface_path_swap (fib_entry_src_t *src,
src->fes_pl = fib_path_list_create(pl_flags, paths);
}
+typedef struct fesi_find_glean_ctx_t_ {
+ fib_node_index_t glean_node_index;
+} fesi_find_glean_ctx_t;
+
+static walk_rc_t
+fib_entry_src_interface_find_glean_walk (fib_entry_t *cover,
+ fib_node_index_t covered,
+ void *ctx)
+{
+ fesi_find_glean_ctx_t *find_glean_ctx = ctx;
+ fib_entry_t *covered_entry;
+ fib_entry_src_t *covered_src;
+
+ covered_entry = fib_entry_get (covered);
+ covered_src = fib_entry_src_find (covered_entry, FIB_SOURCE_INTERFACE);
+ if ((covered_src != NULL) &&
+ (covered_src->fes_flags & FIB_ENTRY_SRC_FLAG_PROVIDES_GLEAN))
+ {
+ find_glean_ctx->glean_node_index = covered;
+ return WALK_STOP;
+ }
+
+ return WALK_CONTINUE;
+}
+
+static fib_entry_t *
+fib_entry_src_interface_find_glean (fib_entry_t *cover)
+{
+ fib_entry_src_t *src;
+
+ src = fib_entry_src_find (cover, FIB_SOURCE_INTERFACE);
+ if (src == NULL)
+ /* the cover is not an interface source */
+ return NULL;
+
+ fesi_find_glean_ctx_t ctx = {
+ .glean_node_index = ~0,
+ };
+
+ fib_entry_cover_walk (cover, fib_entry_src_interface_find_glean_walk,
+ &ctx);
+
+ return (ctx.glean_node_index == ~0) ? NULL :
+ fib_entry_get (ctx.glean_node_index);
+}
+
/*
* Source activate.
* Called when the source is teh new longer best source on the entry
@@ -128,6 +182,8 @@ fib_entry_src_interface_activate (fib_entry_src_t *src,
if (FIB_ENTRY_FLAG_LOCAL & src->fes_entry_flags)
{
+ u8 update_glean;
+
/*
* Track the covering attached/connected cover. This is so that
* during an attached export of the cover, this local prefix is
@@ -141,10 +197,17 @@ fib_entry_src_interface_activate (fib_entry_src_t *src,
cover = fib_entry_get(src->u.interface.fesi_cover);
+ /*
+ * Before adding as a child of the cover, check whether an existing
+ * child has already been used to populate the glean adjacency. If so,
+ * we don't need to update the adjacency.
+ */
+ update_glean = (fib_entry_src_interface_find_glean (cover) == NULL);
src->u.interface.fesi_sibling =
fib_entry_cover_track(cover, fib_entry_get_index(fib_entry));
- fib_entry_src_interface_update_glean(cover, fib_entry);
+ if (update_glean)
+ fib_entry_src_interface_update_glean(cover, fib_entry);
}
return (!0);
@@ -167,15 +230,19 @@ fib_entry_src_interface_deactivate (fib_entry_src_t *src,
if (FIB_NODE_INDEX_INVALID != src->u.interface.fesi_cover)
{
cover = fib_entry_get(src->u.interface.fesi_cover);
-
fib_entry_cover_untrack(cover, src->u.interface.fesi_sibling);
src->u.interface.fesi_cover = FIB_NODE_INDEX_INVALID;
src->u.interface.fesi_sibling = ~0;
- fib_entry_cover_walk(cover,
- fib_entry_src_interface_update_glean_walk,
- NULL);
+ /* If this was the glean address, find a new one */
+ if (src->fes_flags & FIB_ENTRY_SRC_FLAG_PROVIDES_GLEAN)
+ {
+ fib_entry_cover_walk(cover,
+ fib_entry_src_interface_update_glean_walk,
+ NULL);
+ src->fes_flags &= ~FIB_ENTRY_SRC_FLAG_PROVIDES_GLEAN;
+ }
}
}
diff --git a/src/vnet/fib/fib_node.c b/src/vnet/fib/fib_node.c
index 1d3abd50a9d..e668c4fc51f 100644
--- a/src/vnet/fib/fib_node.c
+++ b/src/vnet/fib/fib_node.c
@@ -31,23 +31,20 @@ static fib_node_type_t last_new_type = FIB_NODE_TYPE_LAST;
/*
* the node type names
*/
-static const char *fn_type_names[] = FIB_NODE_TYPES;
+static const char *fn_type_builtin_names[] = FIB_NODE_TYPES;
+static const char **fn_type_names;
const char*
fib_node_type_get_name (fib_node_type_t type)
{
- if (type < FIB_NODE_TYPE_LAST)
- return (fn_type_names[type]);
+ if ((type < vec_len(fn_type_names)) &&
+ (NULL != fn_type_names[type]))
+ {
+ return (fn_type_names[type]);
+ }
else
{
- if (NULL != fn_vfts[type].fnv_format)
- {
- return ("fixme");
- }
- else
- {
- return ("unknown");
- }
+ return ("unknown");
}
}
@@ -56,9 +53,10 @@ fib_node_type_get_name (fib_node_type_t type)
*
* Register the function table for a given type
*/
-void
-fib_node_register_type (fib_node_type_t type,
- const fib_node_vft_t *vft)
+static void
+fib_node_register_type_i (fib_node_type_t type,
+ const char *name,
+ const fib_node_vft_t *vft)
{
/*
* assert that one only registration is made per-node type
@@ -74,16 +72,31 @@ fib_node_register_type (fib_node_type_t type,
vec_validate(fn_vfts, type);
fn_vfts[type] = *vft;
+ vec_validate(fn_type_names, type);
+ fn_type_names[type] = name;
+}
+
+/**
+ * fib_node_register_type
+ *
+ * Register the function table for a given type
+ */
+void
+fib_node_register_type (fib_node_type_t type,
+ const fib_node_vft_t *vft)
+{
+ fib_node_register_type_i(type, fn_type_builtin_names[type], vft);
}
fib_node_type_t
-fib_node_register_new_type (const fib_node_vft_t *vft)
+fib_node_register_new_type (const char *name,
+ const fib_node_vft_t *vft)
{
fib_node_type_t new_type;
new_type = ++last_new_type;
- fib_node_register_type(new_type, vft);
+ fib_node_register_type_i(new_type, name, vft);
return (new_type);
}
@@ -255,7 +268,6 @@ fib_memory_show (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
/*?
* The '<em>sh fib memory </em>' command displays the memory usage for each
* FIB object type.
@@ -288,4 +300,3 @@ VLIB_CLI_COMMAND (show_fib_memory, static) = {
.function = fib_memory_show,
.short_help = "show fib memory",
};
-/* *INDENT-ON* */
diff --git a/src/vnet/fib/fib_node.h b/src/vnet/fib/fib_node.h
index 27e67b11c87..6639c39bcd2 100644
--- a/src/vnet/fib/fib_node.h
+++ b/src/vnet/fib/fib_node.h
@@ -53,8 +53,7 @@ typedef enum fib_node_type_t_ {
/**
* Marker. New types before this one. leave the test last.
*/
- FIB_NODE_TYPE_TEST,
- FIB_NODE_TYPE_LAST = FIB_NODE_TYPE_TEST,
+ FIB_NODE_TYPE_LAST = FIB_NODE_TYPE_ENTRY_TRACK,
} __attribute__ ((packed)) fib_node_type_t;
#define FIB_NODE_TYPE_MAX (FIB_NODE_TYPE_LAST + 1)
@@ -110,6 +109,10 @@ typedef enum fib_node_back_walk_reason_t_ {
*/
FIB_NODE_BW_REASON_INTERFACE_DOWN,
/**
+ * A resolving interface has been bound to another table
+ */
+ FIB_NODE_BW_REASON_INTERFACE_BIND,
+ /**
* A resolving interface has been deleted.
*/
FIB_NODE_BW_REASON_INTERFACE_DELETE,
@@ -138,6 +141,7 @@ typedef enum fib_node_back_walk_reason_t_ {
[FIB_NODE_BW_REASON_INTERFACE_UP] = "if-up", \
[FIB_NODE_BW_REASON_INTERFACE_DOWN] = "if-down", \
[FIB_NODE_BW_REASON_INTERFACE_DELETE] = "if-delete", \
+ [FIB_NODE_BW_REASON_INTERFACE_BIND] = "if-bind", \
[FIB_NODE_BW_REASON_ADJ_UPDATE] = "adj-update", \
[FIB_NODE_BW_REASON_ADJ_MTU] = "adj-mtu", \
[FIB_NODE_BW_REASON_ADJ_DOWN] = "adj-down", \
@@ -157,14 +161,15 @@ typedef enum fib_node_bw_reason_flag_t_ {
FIB_NODE_BW_REASON_FLAG_EVALUATE = (1 << FIB_NODE_BW_REASON_EVALUATE),
FIB_NODE_BW_REASON_FLAG_INTERFACE_UP = (1 << FIB_NODE_BW_REASON_INTERFACE_UP),
FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN = (1 << FIB_NODE_BW_REASON_INTERFACE_DOWN),
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_BIND = (1 << FIB_NODE_BW_REASON_INTERFACE_BIND),
FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE = (1 << FIB_NODE_BW_REASON_INTERFACE_DELETE),
FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE = (1 << FIB_NODE_BW_REASON_ADJ_UPDATE),
FIB_NODE_BW_REASON_FLAG_ADJ_MTU = (1 << FIB_NODE_BW_REASON_ADJ_MTU),
FIB_NODE_BW_REASON_FLAG_ADJ_DOWN = (1 << FIB_NODE_BW_REASON_ADJ_DOWN),
} __attribute__ ((packed)) fib_node_bw_reason_flag_t;
-STATIC_ASSERT(sizeof(fib_node_bw_reason_flag_t) < 2,
- "BW Reason enum < 2 byte. Consequences for cover_upd_res_t");
+STATIC_ASSERT(sizeof(fib_node_bw_reason_flag_t) < 3,
+ "BW Reason enum < 2 byte. Consequences for fib_entry_src_cover_res_t");
extern u8 *format_fib_node_bw_reason(u8 *s, va_list *args);
@@ -229,6 +234,17 @@ typedef struct fib_node_back_walk_ctx_t_ {
* in the graph.
*/
u32 fnbw_depth;
+
+ /**
+ * Additional data associated with the reason the walk is occuring
+ */
+ union
+ {
+ struct {
+ u32 fnbw_from_fib_index;
+ u32 fnbw_to_fib_index;
+ } interface_bind;
+ };
} fib_node_back_walk_ctx_t;
/**
@@ -289,7 +305,6 @@ typedef struct fib_node_vft_t_ {
fib_node_get_t fnv_get;
fib_node_last_lock_gone_t fnv_last_lock;
fib_node_back_walk_t fnv_back_walk;
- format_function_t *fnv_format;
fib_node_memory_show_t fnv_mem_show;
} fib_node_vft_t;
@@ -340,12 +355,13 @@ extern void fib_node_register_type (fib_node_type_t ft,
* @brief
* Create a new FIB node type and Register the function table for it.
*
- * @param vft
- * virtual function table
+ * @param name Name of the type (as display when printing children)
+ * @param vft virtual function table
*
* @return new FIB node type
*/
-extern fib_node_type_t fib_node_register_new_type (const fib_node_vft_t *vft);
+extern fib_node_type_t fib_node_register_new_type (const char *name,
+ const fib_node_vft_t *vft);
/**
* @brief Show the memory usage for a type
diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c
index 209cf403c6e..95e7cb6ba7d 100644
--- a/src/vnet/fib/fib_path.c
+++ b/src/vnet/fib/fib_path.c
@@ -501,11 +501,9 @@ format_fib_path (u8 * s, va_list * args)
else
{
s = format (s, " %U",
- format_vnet_sw_interface_name,
+ format_vnet_sw_if_index_name,
vnm,
- vnet_get_sw_interface(
- vnm,
- path->attached_next_hop.fp_interface));
+ path->attached_next_hop.fp_interface);
if (vnet_sw_interface_is_p2p(vnet_get_main(),
path->attached_next_hop.fp_interface))
{
@@ -532,11 +530,8 @@ format_fib_path (u8 * s, va_list * args)
else
{
s = format (s, " %U",
- format_vnet_sw_interface_name,
- vnm,
- vnet_get_sw_interface(
- vnm,
- path->attached.fp_interface));
+ format_vnet_sw_if_index_name,
+ vnm, path->attached.fp_interface);
}
break;
case FIB_PATH_TYPE_RECURSIVE:
@@ -587,11 +582,8 @@ format_fib_path (u8 * s, va_list * args)
break;
case FIB_PATH_TYPE_DVR:
s = format (s, " %U",
- format_vnet_sw_interface_name,
- vnm,
- vnet_get_sw_interface(
- vnm,
- path->dvr.fp_interface));
+ format_vnet_sw_if_index_name,
+ vnm, path->dvr.fp_interface);
break;
case FIB_PATH_TYPE_DEAG:
s = format (s, " %sfib-index:%d",
@@ -1161,6 +1153,11 @@ FIXME comment
fib_path_unresolve(path);
path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP;
}
+ if (FIB_NODE_BW_REASON_FLAG_INTERFACE_BIND & ctx->fnbw_reason)
+ {
+ /* bind walks should appear here and pass silently up to
+ * to the fib_entry */
+ }
break;
case FIB_PATH_TYPE_UDP_ENCAP:
{
@@ -1360,7 +1357,8 @@ fib_path_create (fib_node_index_t pl_index,
dpo_copy(&path->exclusive.fp_ex_dpo, &rpath->dpo);
}
else if ((path->fp_cfg_flags & FIB_PATH_CFG_FLAG_ICMP_PROHIBIT) ||
- (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_ICMP_UNREACH))
+ (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_ICMP_UNREACH) ||
+ (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_DROP))
{
path->fp_type = FIB_PATH_TYPE_SPECIAL;
}
@@ -1502,6 +1500,12 @@ fib_path_copy (fib_node_index_t path_index,
clib_memset(&path->fp_dpo, 0, sizeof(path->fp_dpo));
dpo_reset(&path->fp_dpo);
+ if (path->fp_type == FIB_PATH_TYPE_EXCLUSIVE)
+ {
+ clib_memset(&path->exclusive.fp_ex_dpo, 0, sizeof(dpo_id_t));
+ dpo_copy(&path->exclusive.fp_ex_dpo, &orig_path->exclusive.fp_ex_dpo);
+ }
+
return (fib_path_get_index(path));
}
@@ -1987,7 +1991,11 @@ fib_path_resolve (fib_node_index_t path_index)
}
else
{
- fib_prefix_from_ip46_addr(&path->recursive.fp_nh.fp_ip, &pfx);
+ ASSERT(!ip46_address_is_zero(&path->recursive.fp_nh.fp_ip));
+
+ fib_protocol_t fp = (ip46_address_is_ip4(&path->recursive.fp_nh.fp_ip) ?
+ FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6);
+ fib_prefix_from_ip46_addr(fp, &path->recursive.fp_nh.fp_ip, &pfx);
}
fib_table_lock(path->recursive.fp_tbl_id,
@@ -2416,6 +2424,7 @@ fib_path_stack_mpls_disp (fib_node_index_t path_index,
void
fib_path_contribute_forwarding (fib_node_index_t path_index,
fib_forward_chain_type_t fct,
+ dpo_proto_t payload_proto,
dpo_id_t *dpo)
{
fib_path_t *path;
@@ -2423,7 +2432,6 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
path = fib_path_get(path_index);
ASSERT(path);
- ASSERT(FIB_FORW_CHAIN_TYPE_MPLS_EOS != fct);
/*
* The DPO stored in the path was created when the path was resolved.
@@ -2441,9 +2449,19 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
switch (fct)
{
+ case FIB_FORW_CHAIN_TYPE_MPLS_EOS: {
+ dpo_id_t tmp = DPO_INVALID;
+ dpo_copy (&tmp, dpo);
+ path = fib_path_attached_next_hop_get_adj(
+ path,
+ dpo_proto_to_link(payload_proto),
+ &tmp);
+ dpo_copy (dpo, &tmp);
+ dpo_reset(&tmp);
+ break;
+ }
case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
- case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
case FIB_FORW_CHAIN_TYPE_ETHERNET:
case FIB_FORW_CHAIN_TYPE_NSH:
@@ -2555,10 +2573,25 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
case FIB_PATH_TYPE_ATTACHED:
switch (fct)
{
+ case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+ /*
+ * End of stack traffic via an attacehd path (a glean)
+ * must forace an IP lookup so that the IP packet can
+ * match against any installed adj-fibs
+ */
+ lookup_dpo_add_or_lock_w_fib_index(
+ fib_table_get_index_for_sw_if_index(
+ dpo_proto_to_fib(payload_proto),
+ path->attached.fp_interface),
+ payload_proto,
+ LOOKUP_UNICAST,
+ LOOKUP_INPUT_DST_ADDR,
+ LOOKUP_TABLE_FROM_CONFIG,
+ dpo);
+ break;
case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
- case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
case FIB_FORW_CHAIN_TYPE_ETHERNET:
case FIB_FORW_CHAIN_TYPE_NSH:
case FIB_FORW_CHAIN_TYPE_BIER:
@@ -2604,8 +2637,8 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
/*
* Create the adj needed for sending IP multicast traffic
*/
- interface_rx_dpo_add_or_lock(fib_forw_chain_type_to_dpo_proto(fct),
- path->attached.fp_interface,
+ interface_rx_dpo_add_or_lock(payload_proto,
+ path->intf_rx.fp_interface,
dpo);
break;
case FIB_PATH_TYPE_UDP_ENCAP:
@@ -2625,6 +2658,7 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
load_balance_path_t *
fib_path_append_nh_for_multipath_hash (fib_node_index_t path_index,
fib_forward_chain_type_t fct,
+ dpo_proto_t payload_proto,
load_balance_path_t *hash_key)
{
load_balance_path_t *mnh;
@@ -2641,7 +2675,7 @@ fib_path_append_nh_for_multipath_hash (fib_node_index_t path_index,
if (fib_path_is_resolved(path_index))
{
- fib_path_contribute_forwarding(path_index, fct, &mnh->path_dpo);
+ fib_path_contribute_forwarding(path_index, fct, payload_proto, &mnh->path_dpo);
}
else
{
diff --git a/src/vnet/fib/fib_path.h b/src/vnet/fib/fib_path.h
index c0f76411390..f3442c23dd6 100644
--- a/src/vnet/fib/fib_path.h
+++ b/src/vnet/fib/fib_path.h
@@ -191,6 +191,7 @@ extern uword fib_path_hash(fib_node_index_t path_index);
extern load_balance_path_t * fib_path_append_nh_for_multipath_hash(
fib_node_index_t path_index,
fib_forward_chain_type_t fct,
+ dpo_proto_t payload_proto,
load_balance_path_t *hash_key);
extern void fib_path_stack_mpls_disp(fib_node_index_t path_index,
dpo_proto_t payload_proto,
@@ -198,6 +199,7 @@ extern void fib_path_stack_mpls_disp(fib_node_index_t path_index,
dpo_id_t *dpo);
extern void fib_path_contribute_forwarding(fib_node_index_t path_index,
fib_forward_chain_type_t type,
+ dpo_proto_t payload_proto,
dpo_id_t *dpo);
extern void fib_path_contribute_urpf(fib_node_index_t path_index,
index_t urpf);
diff --git a/src/vnet/fib/fib_path_ext.c b/src/vnet/fib/fib_path_ext.c
index 209b6273a85..f5611f92271 100644
--- a/src/vnet/fib/fib_path_ext.c
+++ b/src/vnet/fib/fib_path_ext.c
@@ -163,8 +163,8 @@ fib_path_ext_mpls_flags_to_mpls_label (fib_path_ext_mpls_flags_t fpe_flags)
load_balance_path_t *
fib_path_ext_stack (fib_path_ext_t *path_ext,
+ dpo_proto_t payload_proto,
fib_forward_chain_type_t child_fct,
- fib_forward_chain_type_t imp_null_fct,
load_balance_path_t *nhs)
{
fib_forward_chain_type_t parent_fct;
@@ -189,7 +189,7 @@ fib_path_ext_stack (fib_path_ext_t *path_ext,
*/
if (fib_path_ext_is_imp_null(path_ext))
{
- parent_fct = imp_null_fct;
+ parent_fct = fib_forw_chain_type_from_dpo_proto(payload_proto);
}
else
{
@@ -240,6 +240,7 @@ fib_path_ext_stack (fib_path_ext_t *path_ext,
*/
fib_path_contribute_forwarding(path_ext->fpe_path_index,
parent_fct,
+ payload_proto,
&via_dpo);
if (dpo_is_drop(&via_dpo) ||
diff --git a/src/vnet/fib/fib_path_ext.h b/src/vnet/fib/fib_path_ext.h
index b49fd977a20..2850a588608 100644
--- a/src/vnet/fib/fib_path_ext.h
+++ b/src/vnet/fib/fib_path_ext.h
@@ -141,8 +141,8 @@ extern void fib_path_ext_resolve(fib_path_ext_t *path_ext,
fib_node_index_t path_list_index);
extern load_balance_path_t *fib_path_ext_stack(fib_path_ext_t *path_ext,
+ dpo_proto_t payload_proto,
fib_forward_chain_type_t fct,
- fib_forward_chain_type_t imp_null_fct,
load_balance_path_t *nhs);
extern fib_path_ext_t * fib_path_ext_list_push_back (fib_path_ext_list_t *list,
diff --git a/src/vnet/fib/fib_path_list.c b/src/vnet/fib/fib_path_list.c
index 81751695f47..ebd2c0e9be1 100644
--- a/src/vnet/fib/fib_path_list.c
+++ b/src/vnet/fib/fib_path_list.c
@@ -378,8 +378,10 @@ fib_path_list_mk_lb (fib_path_list_t *path_list,
if ((flags & FIB_PATH_LIST_FWD_FLAG_STICKY) ||
fib_path_is_resolved(*path_index))
{
- nhs = fib_path_append_nh_for_multipath_hash(*path_index,
- fct, nhs);
+ nhs = fib_path_append_nh_for_multipath_hash(
+ *path_index, fct,
+ fib_forw_chain_type_to_dpo_proto(fct),
+ nhs);
}
}
@@ -962,8 +964,7 @@ fib_path_list_copy_and_path_add (fib_node_index_t orig_path_list_index,
}
if (duplicate)
{
- _vec_len(path_list->fpl_paths) =
- vec_len(path_list->fpl_paths) - 1;
+ vec_set_len(path_list->fpl_paths, vec_len(path_list->fpl_paths) - 1);
fib_path_destroy(new_path_index);
}
else
diff --git a/src/vnet/fib/fib_sas.c b/src/vnet/fib/fib_sas.c
index 8abac8672fd..c9d469379f1 100644
--- a/src/vnet/fib/fib_sas.c
+++ b/src/vnet/fib/fib_sas.c
@@ -112,7 +112,7 @@ fib_sas6_get (u32 sw_if_index,
/*
* if the dst is v6 and link local, use the source link local
*/
- if (ip6_address_is_link_local_unicast (dst))
+ if (dst && ip6_address_is_link_local_unicast (dst))
{
const ip6_address_t *ll = ip6_get_link_local_address (sw_if_index);
if (NULL == ll)
diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c
index 3a46d226ebd..b2a32d0da56 100644
--- a/src/vnet/fib/fib_table.c
+++ b/src/vnet/fib/fib_table.c
@@ -25,6 +25,13 @@
const static char * fib_table_flags_strings[] = FIB_TABLE_ATTRIBUTES;
+/*
+ * Default names for IP4, IP6, and MPLS FIB table index 0.
+ * Nominally like "ipv6-VRF:0", but this will override that name if set
+ * in a config section of the startup.conf file.
+ */
+char *fib_table_default_names[FIB_PROTOCOL_MAX];
+
fib_table_t *
fib_table_get (fib_node_index_t index,
fib_protocol_t proto)
@@ -534,7 +541,11 @@ fib_table_route_path_fixup (const fib_prefix_t *prefix,
else if (fib_route_path_is_attached(path))
{
path->frp_flags |= FIB_ROUTE_PATH_GLEAN;
- fib_prefix_normalize(prefix, &path->frp_connected);
+ /*
+ * attached prefixes are not suitable as the source of ARP requests
+ * so don't save the prefix in the glean adj
+ */
+ clib_memset(&path->frp_connected, 0, sizeof(path->frp_connected));
}
if (*eflags & FIB_ENTRY_FLAG_DROP)
{
@@ -1149,21 +1160,29 @@ fib_table_find_or_create_and_lock_i (fib_protocol_t proto,
fib_table = fib_table_get(fi, proto);
- if (NULL == fib_table->ft_desc)
+ if (fib_table->ft_desc)
+ return fi;
+
+ if (name && name[0])
{
- if (name && name[0])
- {
- fib_table->ft_desc = format(NULL, "%s", name);
- }
- else
- {
- fib_table->ft_desc = format(NULL, "%U-VRF:%d",
- format_fib_protocol, proto,
- table_id);
- }
+ fib_table->ft_desc = format(NULL, "%s", name);
+ return fi;
}
- return (fi);
+ if (table_id == 0)
+ {
+ char *default_name = fib_table_default_names[proto];
+ if (default_name && default_name[0])
+ {
+ fib_table->ft_desc = format(NULL, "%s", default_name);
+ return fi;
+ }
+ }
+
+ fib_table->ft_desc = format(NULL, "%U-VRF:%d",
+ format_fib_protocol, proto,
+ table_id);
+ return fi;
}
u32
diff --git a/src/vnet/fib/fib_table.h b/src/vnet/fib/fib_table.h
index 11137e173cf..0eaaa67eea2 100644
--- a/src/vnet/fib/fib_table.h
+++ b/src/vnet/fib/fib_table.h
@@ -122,6 +122,15 @@ typedef struct fib_table_t_
u8* ft_desc;
} fib_table_t;
+
+/**
+ * @brief
+ * Default names for IP4, IP6, and MPLS FIB table index 0.
+ * Nominally like "ipv4-VRF:0", but this will override that name if set
+ * in a config section of the startup.conf file.
+ */
+extern char *fib_table_default_names[FIB_PROTOCOL_MAX];
+
/**
* @brief
* Format the description/name of the table
diff --git a/src/vnet/fib/fib_types.c b/src/vnet/fib/fib_types.c
index 15e795a72c9..9abb89bc6a0 100644
--- a/src/vnet/fib/fib_types.c
+++ b/src/vnet/fib/fib_types.c
@@ -78,16 +78,15 @@ format_fib_mpls_label (u8 *s, va_list *ap)
}
void
-fib_prefix_from_ip46_addr (const ip46_address_t *addr,
+fib_prefix_from_ip46_addr (fib_protocol_t fproto,
+ const ip46_address_t *addr,
fib_prefix_t *pfx)
{
- ASSERT(!ip46_address_is_zero(addr));
+ ASSERT(FIB_PROTOCOL_MPLS != fproto);
- pfx->fp_proto = ((ip46_address_is_ip4(addr) ?
- FIB_PROTOCOL_IP4 :
- FIB_PROTOCOL_IP6));
- pfx->fp_len = ((ip46_address_is_ip4(addr) ?
- 32 : 128));
+ pfx->fp_proto = fproto;
+ pfx->fp_len = ((FIB_PROTOCOL_IP4 == fproto) ?
+ 32 : 128);
pfx->fp_addr = *addr;
pfx->___fp___pad = 0;
}
@@ -541,6 +540,7 @@ unformat_fib_route_path (unformat_input_t * input, va_list * args)
{
fib_route_path_t *rpath = va_arg (*args, fib_route_path_t *);
dpo_proto_t *payload_proto = va_arg (*args, void*);
+ dpo_proto_t explicit_proto = DPO_PROTO_NONE;
u32 weight, preference, udp_encap_id, fi;
mpls_label_t out_label;
vnet_main_t *vnm;
@@ -708,6 +708,17 @@ unformat_fib_route_path (unformat_input_t * input, va_list * args)
rpath->frp_proto = DPO_PROTO_IP4;
rpath->frp_flags = FIB_ROUTE_PATH_INTF_RX;
}
+ else if (unformat (input, "rx-ip6 %U",
+ unformat_vnet_sw_interface, vnm,
+ &rpath->frp_sw_if_index))
+ {
+ rpath->frp_proto = DPO_PROTO_IP6;
+ rpath->frp_flags = FIB_ROUTE_PATH_INTF_RX;
+ }
+ else if (unformat (input, "drop"))
+ {
+ rpath->frp_flags = FIB_ROUTE_PATH_DROP;
+ }
else if (unformat (input, "local"))
{
clib_memset (&rpath->frp_addr, 0, sizeof (rpath->frp_addr));
@@ -726,6 +737,14 @@ unformat_fib_route_path (unformat_input_t * input, va_list * args)
vec_add1(rpath->frp_label_stack, fml);
}
}
+ else if (unformat (input, "ip4"))
+ {
+ explicit_proto = DPO_PROTO_IP4;
+ }
+ else if (unformat (input, "ip6"))
+ {
+ explicit_proto = DPO_PROTO_IP6;
+ }
else if (unformat (input, "%U",
unformat_vnet_sw_interface, vnm,
&rpath->frp_sw_if_index))
@@ -750,6 +769,9 @@ unformat_fib_route_path (unformat_input_t * input, va_list * args)
}
}
+ if (DPO_PROTO_NONE != explicit_proto)
+ *payload_proto = rpath->frp_proto = explicit_proto;
+
return (1);
}
@@ -764,6 +786,7 @@ fib_route_path_is_attached (const fib_route_path_t *rpath)
* L3 game with these
*/
if (rpath->frp_flags & (FIB_ROUTE_PATH_DVR |
+ FIB_ROUTE_PATH_INTF_RX |
FIB_ROUTE_PATH_UDP_ENCAP))
{
return (0);
diff --git a/src/vnet/fib/fib_types.h b/src/vnet/fib/fib_types.h
index dbd4e97e867..b9346c75108 100644
--- a/src/vnet/fib/fib_types.h
+++ b/src/vnet/fib/fib_types.h
@@ -276,8 +276,9 @@ extern void fib_prefix_normalize(const fib_prefix_t *p,
/**
* \brief Host prefix from ip
*/
-extern void fib_prefix_from_ip46_addr (const ip46_address_t *addr,
- fib_prefix_t *pfx);
+extern void fib_prefix_from_ip46_addr (fib_protocol_t fproto,
+ const ip46_address_t *addr,
+ fib_prefix_t *pfx);
extern u8 * format_fib_prefix(u8 * s, va_list * args);
extern u8 * format_fib_forw_chain_type(u8 * s, va_list * args);
@@ -632,7 +633,7 @@ extern int fib_route_path_is_attached (const fib_route_path_t *rpath);
/**
* A help string to list the FIB path options
*/
-#define FIB_ROUTE_PATH_HELP "[next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight <value>] [preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-connected] [rx-ip4 <interface>] [out-labels <value value value>]"
+#define FIB_ROUTE_PATH_HELP "[next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight <value>] [preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-connected] [rx-ip4|rx-ip6 <interface>] [out-labels <value value value>]"
/**
* return code to control pat-hlist walk
diff --git a/src/vnet/fib/fib_urpf_list.c b/src/vnet/fib/fib_urpf_list.c
index f9790b59031..67be6699a0e 100644
--- a/src/vnet/fib/fib_urpf_list.c
+++ b/src/vnet/fib/fib_urpf_list.c
@@ -55,11 +55,10 @@ index_t
fib_urpf_list_alloc_and_lock (void)
{
fib_urpf_list_t *urpf;
- u8 need_barrier_sync = 0;
+ u8 need_barrier_sync = pool_get_will_expand (fib_urpf_list_pool);
vlib_main_t *vm = vlib_get_main();
ASSERT (vm->thread_index == 0);
- pool_get_will_expand (fib_urpf_list_pool, need_barrier_sync );
if (need_barrier_sync)
vlib_worker_thread_barrier_sync (vm);
@@ -174,7 +173,7 @@ fib_urpf_list_bake (index_t ui)
if (urpf->furpf_itfs[i] != urpf->furpf_itfs[j])
urpf->furpf_itfs[++i] = urpf->furpf_itfs[j];
/* set the length of the vector to the number of unique itfs */
- _vec_len(urpf->furpf_itfs) = i+1;
+ vec_set_len (urpf->furpf_itfs, i+1);
}
urpf->furpf_flags |= FIB_URPF_LIST_BAKED;
@@ -229,7 +228,6 @@ show_fib_urpf_list_command (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
/*?
* The '<em>sh fib uRPF [index] </em>' command displays the uRPF lists
*
@@ -247,4 +245,3 @@ VLIB_CLI_COMMAND (show_fib_urpf_list, static) = {
.function = show_fib_urpf_list_command,
.short_help = "show fib uRPF",
};
-/* *INDENT-OFF* */
diff --git a/src/vnet/fib/fib_walk.c b/src/vnet/fib/fib_walk.c
index b3b2b1e7944..236607cb891 100644
--- a/src/vnet/fib/fib_walk.c
+++ b/src/vnet/fib/fib_walk.c
@@ -611,13 +611,11 @@ fib_walk_process (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (fib_walk_process_node,static) = {
.function = fib_walk_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "fib-walk",
};
-/* *INDENT-ON* */
/**
* @brief Allocate a new walk object
diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c
index 2fa5d7e00ca..0eff8d0d485 100644
--- a/src/vnet/fib/ip4_fib.c
+++ b/src/vnet/fib/ip4_fib.c
@@ -216,6 +216,7 @@ ip4_fib_table_destroy (u32 fib_index)
hash_unset (ip4_main.fib_index_by_table_id, fib_table->ft_table_id);
}
+ vec_free (fib_table->ft_locks);
vec_free(fib_table->ft_src_route_counts);
ip4_fib_table_free(v4_fib);
@@ -620,10 +621,29 @@ ip4_show_fib (vlib_main_t * vm,
* 32 4
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip4_show_fib_command, static) = {
.path = "show ip fib",
.short_help = "show ip fib [summary] [table <table-id>] [index <fib-id>] [<ip4-addr>[/<mask>]] [mtrie] [detail]",
.function = ip4_show_fib,
};
-/* *INDENT-ON* */
+
+static clib_error_t *
+ip_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ char *default_name = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "default-table-name %s", &default_name))
+ ;
+ else
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+
+ fib_table_default_names[FIB_PROTOCOL_IP4] = default_name;
+
+ return 0;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (ip_config, "ip");
diff --git a/src/vnet/fib/ip6_fib.c b/src/vnet/fib/ip6_fib.c
index 708fddce66a..d37b77e08a4 100644
--- a/src/vnet/fib/ip6_fib.c
+++ b/src/vnet/fib/ip6_fib.c
@@ -174,6 +174,7 @@ ip6_fib_table_destroy (u32 fib_index)
{
hash_unset (ip6_main.fib_index_by_table_id, fib_table->ft_table_id);
}
+ vec_free (fib_table->ft_locks);
vec_free(fib_table->ft_src_route_counts);
pool_put_index(ip6_main.v6_fibs, fib_table->ft_index);
pool_put(ip6_main.fibs, fib_table);
@@ -861,19 +862,18 @@ ip6_show_fib (vlib_main_t * vm,
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_show_fib_command, static) = {
.path = "show ip6 fib",
.short_help = "show ip6 fib [summary] [table <table-id>] [index <fib-id>] [<ip6-addr>[/<width>]] [detail]",
.function = ip6_show_fib,
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_config (vlib_main_t * vm, unformat_input_t * input)
{
uword heapsize = 0;
u32 nbuckets = 0;
+ char *default_name = 0;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
@@ -882,6 +882,8 @@ ip6_config (vlib_main_t * vm, unformat_input_t * input)
else if (unformat (input, "heap-size %U",
unformat_memory_size, &heapsize))
;
+ else if (unformat (input, "default-table-name %s", &default_name))
+ ;
else
return clib_error_return (0, "unknown input '%U'",
format_unformat_error, input);
@@ -889,6 +891,7 @@ ip6_config (vlib_main_t * vm, unformat_input_t * input)
ip6_fib_table_nbuckets = nbuckets;
ip6_fib_table_size = heapsize;
+ fib_table_default_names[FIB_PROTOCOL_IP6] = default_name;
return 0;
}
diff --git a/src/vnet/fib/mpls_fib.c b/src/vnet/fib/mpls_fib.c
index 0ed2413242f..767fc84c8a8 100644
--- a/src/vnet/fib/mpls_fib.c
+++ b/src/vnet/fib/mpls_fib.c
@@ -275,6 +275,7 @@ mpls_fib_table_destroy (u32 fib_index)
}
hash_free(mf->mf_entries);
+ vec_free (fib_table->ft_locks);
vec_free(fib_table->ft_src_route_counts);
pool_put(mpls_main.mpls_fibs, mf);
pool_put(mpls_main.fibs, fib_table);
@@ -450,7 +451,7 @@ mpls_fib_show (vlib_main_t * vm,
continue;
s = format (s, "%v, fib_index:%d locks:[",
- fib_table->ft_desc, mpls_main.fibs - fib_table);
+ fib_table->ft_desc, fib_table - mpls_main.fibs);
vec_foreach_index(source, fib_table->ft_locks)
{
if (0 != fib_table->ft_locks[source])
@@ -480,3 +481,24 @@ VLIB_CLI_COMMAND (mpls_fib_show_command, static) = {
.short_help = "show mpls fib [summary] [table <n>]",
.function = mpls_fib_show,
};
+
+static clib_error_t *
+mpls_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ char *default_name = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "default-table-name %s", &default_name))
+ ;
+ else
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+
+ fib_table_default_names[FIB_PROTOCOL_MPLS] = default_name;
+
+ return 0;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (mpls_config, "mpls");
diff --git a/src/vnet/fib/mpls_fib.h b/src/vnet/fib/mpls_fib.h
index e7ea5d583d1..e9fee9990ac 100644
--- a/src/vnet/fib/mpls_fib.h
+++ b/src/vnet/fib/mpls_fib.h
@@ -31,7 +31,7 @@
* Type exposure is to allow the DP fast/inlined access
*/
#define MPLS_FIB_KEY_SIZE 21
-#define MPLS_FIB_DB_SIZE (1 << (MPLS_FIB_KEY_SIZE-1))
+#define MPLS_FIB_DB_SIZE (1 << MPLS_FIB_KEY_SIZE)
/**
* There are no options for controlling the MPLS flow hash,
diff --git a/src/vnet/flow/FEATURE.yaml b/src/vnet/flow/FEATURE.yaml
index a26571c35e8..8633f4febdd 100644
--- a/src/vnet/flow/FEATURE.yaml
+++ b/src/vnet/flow/FEATURE.yaml
@@ -16,13 +16,15 @@ features:
- FLOW_TYPE_IP4_VXLAN,
- FLOW_TYPE_IP6_VXLAN,
- FLOW_TYPE_IP4_GTPC,
- - FLOW_TYPE_IP4_GTPU
+ - FLOW_TYPE_IP4_GTPU,
+ - FLOW_TYPE_GENERIC
- The below flow actions can be specified for the flows:
- FLOW_ACTION_COUNT,
- FLOW_ACTION_MARK,
- FLOW_ACTION_BUFFER_ADVANCE,
- FLOW_ACTION_REDIRECT_TO_NODE,
- FLOW_ACTION_REDIRECT_TO_QUEUE,
+ - FLOW_ACTION_RSS,
- FLOW_ACTION_DROP
description: "Flow infrastructure to provide hardware offload capabilities"
state: development
diff --git a/src/vnet/flow/flow.api b/src/vnet/flow/flow.api
index 7bb21cdcd72..1e807b539d5 100644
--- a/src/vnet/flow/flow.api
+++ b/src/vnet/flow/flow.api
@@ -13,7 +13,7 @@
* limitations under the License.
*/
-option version = "0.0.2";
+option version = "1.0.3";
import "vnet/interface_types.api";
import "vnet/ip/ip_types.api";
@@ -26,12 +26,27 @@ import "vnet/flow/flow_types.api";
*/
define flow_add
{
+ option deprecated;
+
u32 client_index;
u32 context;
vl_api_flow_rule_t flow;
option vat_help = "test flow add [src-ip <ip-addr/mask>] [dst-ip <ip-addr/mask>] [src-port <port/mask>] [dst-port <port/mask>] [proto <ip-proto>]";
};
+/** \brief flow add request v2
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param flow - flow rule v2
+*/
+define flow_add_v2
+{
+ u32 client_index;
+ u32 context;
+ vl_api_flow_rule_v2_t flow;
+ option vat_help = "test flow add [src-ip <ip-addr/mask>] [dst-ip <ip-addr/mask>] [src-port <port/mask>] [dst-port <port/mask>] [proto <ip-proto>] [spec <spec-string>] [mask <mask-string>]";
+};
+
/** \brief reply for adding flow
@param context - sender context, to match reply w/ request
@param retval - return code
@@ -39,6 +54,20 @@ define flow_add
*/
define flow_add_reply
{
+ option deprecated;
+
+ u32 context;
+ i32 retval;
+ u32 flow_index;
+};
+
+/** \brief reply for adding flow v2
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param flow_index - flow index, can be used for flow del/enable/disable
+*/
+define flow_add_v2_reply
+{
u32 context;
i32 retval;
u32 flow_index;
diff --git a/src/vnet/flow/flow.c b/src/vnet/flow/flow.c
index 9b6a376af3e..eda15356958 100644
--- a/src/vnet/flow/flow.c
+++ b/src/vnet/flow/flow.c
@@ -74,12 +74,10 @@ vnet_flow_del (vnet_main_t * vnm, u32 flow_index)
if (f == 0)
return VNET_FLOW_ERROR_NO_SUCH_ENTRY;
- /* *INDENT-OFF* */
hash_foreach (hw_if_index, private_data, f->private_data,
({
vnet_flow_disable (vnm, flow_index, hw_if_index);
}));
- /* *INDENT-ON* */
hash_free (f->private_data);
clib_memset (f, 0, sizeof (*f));
diff --git a/src/vnet/flow/flow.h b/src/vnet/flow/flow.h
index 76c1df8a22f..ada822257e3 100644
--- a/src/vnet/flow/flow.h
+++ b/src/vnet/flow/flow.h
@@ -18,31 +18,43 @@
#include <vppinfra/clib.h>
#include <vppinfra/pcap.h>
+#include <vnet/vnet.h>
#include <vnet/l3_types.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
#include <vnet/ethernet/packet.h>
-#define foreach_flow_type \
- /* l2 flow*/ \
- _(ETHERNET, ethernet, "ethernet") \
- /* l3 IP flow */ \
- _(IP4, ip4, "ipv4") \
- _(IP6, ip6, "ipv6") \
- /* IP tunnel flow */ \
- _(IP4_L2TPV3OIP, ip4_l2tpv3oip, "ipv4-l2tpv3oip") \
- _(IP4_IPSEC_ESP, ip4_ipsec_esp, "ipv4-ipsec-esp") \
- _(IP4_IPSEC_AH, ip4_ipsec_ah, "ipv4-ipsec-ah") \
- /* l4 flow*/ \
- _(IP4_N_TUPLE, ip4_n_tuple, "ipv4-n-tuple") \
- _(IP6_N_TUPLE, ip6_n_tuple, "ipv6-n-tuple") \
- _(IP4_N_TUPLE_TAGGED, ip4_n_tuple_tagged, "ipv4-n-tuple-tagged") \
- _(IP6_N_TUPLE_TAGGED, ip6_n_tuple_tagged, "ipv6-n-tuple-tagged") \
- /* L4 tunnel flow*/ \
- _(IP4_VXLAN, ip4_vxlan, "ipv4-vxlan") \
- _(IP6_VXLAN, ip6_vxlan, "ipv6-vxlan") \
- _(IP4_GTPC, ip4_gtpc, "ipv4-gtpc") \
- _(IP4_GTPU, ip4_gtpu, "ipv4-gtpu")
+#define foreach_flow_type \
+ /* l2 flow*/ \
+ _ (ETHERNET, ethernet, "ethernet") \
+ /* l3 IP flow */ \
+ _ (IP4, ip4, "ipv4") \
+ _ (IP6, ip6, "ipv6") \
+ /* IP tunnel flow */ \
+ _ (IP4_L2TPV3OIP, ip4_l2tpv3oip, "ipv4-l2tpv3oip") \
+ _ (IP4_IPSEC_ESP, ip4_ipsec_esp, "ipv4-ipsec-esp") \
+ _ (IP4_IPSEC_AH, ip4_ipsec_ah, "ipv4-ipsec-ah") \
+ /* l4 flow*/ \
+ _ (IP4_N_TUPLE, ip4_n_tuple, "ipv4-n-tuple") \
+ _ (IP6_N_TUPLE, ip6_n_tuple, "ipv6-n-tuple") \
+ _ (IP4_N_TUPLE_TAGGED, ip4_n_tuple_tagged, "ipv4-n-tuple-tagged") \
+ _ (IP6_N_TUPLE_TAGGED, ip6_n_tuple_tagged, "ipv6-n-tuple-tagged") \
+ /* L4 tunnel flow*/ \
+ _ (IP4_VXLAN, ip4_vxlan, "ipv4-vxlan") \
+ _ (IP6_VXLAN, ip6_vxlan, "ipv6-vxlan") \
+ _ (IP4_GTPC, ip4_gtpc, "ipv4-gtpc") \
+ _ (IP4_GTPU, ip4_gtpu, "ipv4-gtpu") \
+ /* generic flow */ \
+ _ (GENERIC, generic, "generic") \
+ /* IP in IP */ \
+ _ (IP6_IP6, ip6_ip6, "ipv6-ipv6") \
+ _ (IP6_IP4, ip6_ip4, "ipv6-ipv4") \
+ _ (IP4_IP6, ip4_ip6, "ipv4-ipv6") \
+ _ (IP4_IP4, ip4_ip4, "ipv4-ipv4") \
+ _ (IP6_IP6_N_TUPLE, ip6_ip6_n_tuple, "ipv6-ipv6-n-tuple") \
+ _ (IP6_IP4_N_TUPLE, ip6_ip4_n_tuple, "ipv6-ipv4-n-tuple") \
+ _ (IP4_IP6_N_TUPLE, ip4_ip6_n_tuple, "ipv4-ipv6-n-tuple") \
+ _ (IP4_IP4_N_TUPLE, ip4_ip4_n_tuple, "ipv4-ipv4-n-tuple")
#define foreach_flow_entry_ethernet \
_fe(ethernet_header_t, eth_hdr)
@@ -103,6 +115,44 @@
foreach_flow_entry_ip4_n_tuple \
_fe(u32, teid)
+#define foreach_flow_entry_ip6_ip6 \
+ foreach_flow_entry_ip6 _fe (ip6_address_and_mask_t, in_src_addr) \
+ _fe (ip6_address_and_mask_t, in_dst_addr) \
+ _fe (ip_prot_and_mask_t, in_protocol)
+
+#define foreach_flow_entry_ip6_ip6_n_tuple \
+ foreach_flow_entry_ip6_ip6 _fe (ip_port_and_mask_t, in_src_port) \
+ _fe (ip_port_and_mask_t, in_dst_port)
+
+#define foreach_flow_entry_ip6_ip4 \
+ foreach_flow_entry_ip6 _fe (ip4_address_and_mask_t, in_src_addr) \
+ _fe (ip4_address_and_mask_t, in_dst_addr) \
+ _fe (ip_prot_and_mask_t, in_protocol)
+
+#define foreach_flow_entry_ip6_ip4_n_tuple \
+ foreach_flow_entry_ip6_ip4 _fe (ip_port_and_mask_t, in_src_port) \
+ _fe (ip_port_and_mask_t, in_dst_port)
+
+#define foreach_flow_entry_ip4_ip6 \
+ foreach_flow_entry_ip4 _fe (ip6_address_and_mask_t, in_src_addr) \
+ _fe (ip6_address_and_mask_t, in_dst_addr) \
+ _fe (ip_prot_and_mask_t, in_protocol)
+
+#define foreach_flow_entry_ip4_ip6_n_tuple \
+ foreach_flow_entry_ip4_ip6 _fe (ip_port_and_mask_t, in_src_port) \
+ _fe (ip_port_and_mask_t, in_dst_port)
+
+#define foreach_flow_entry_ip4_ip4 \
+ foreach_flow_entry_ip4 _fe (ip4_address_and_mask_t, in_src_addr) \
+ _fe (ip4_address_and_mask_t, in_dst_addr) \
+ _fe (ip_prot_and_mask_t, in_protocol)
+
+#define foreach_flow_entry_ip4_ip4_n_tuple \
+ foreach_flow_entry_ip4_ip4 _fe (ip_port_and_mask_t, in_src_port) \
+ _fe (ip_port_and_mask_t, in_dst_port)
+
+#define foreach_flow_entry_generic _fe (generic_pattern_t, pattern)
+
#define foreach_flow_action \
_(0, COUNT, "count") \
_(1, MARK, "mark") \
@@ -150,6 +200,7 @@ typedef enum
_ (19, NVGRE, "nvgre") \
_ (20, GTPU, "gtpu") \
_ (21, ESP, "esp") \
+ _ (22, L2TPV3, "l2tpv3") \
_ (60, L4_DST_ONLY, "l4-dst-only") \
_ (61, L4_SRC_ONLY, "l4-src-only") \
_ (62, L3_DST_ONLY, "l3-dst-only") \
@@ -189,6 +240,12 @@ typedef struct
u8 mask;
} ip_prot_and_mask_t;
+typedef struct
+{
+ u8 spec[1024];
+ u8 mask[1024];
+} generic_pattern_t;
+
typedef enum
{
VNET_FLOW_TYPE_UNKNOWN,
@@ -241,6 +298,10 @@ typedef struct
/* queue for VNET_FLOW_ACTION_REDIRECT_TO_QUEUE */
u32 redirect_queue;
+ /* start queue index and queue numbers for RSS queue group */
+ u32 queue_index;
+ u32 queue_num;
+
/* buffer offset for VNET_FLOW_ACTION_BUFFER_ADVANCE */
i32 buffer_advance;
diff --git a/src/vnet/flow/flow_api.c b/src/vnet/flow/flow_api.c
index 6f08f0314a4..bfe97ec2978 100644
--- a/src/vnet/flow/flow_api.c
+++ b/src/vnet/flow/flow_api.c
@@ -215,6 +215,16 @@ ipv4_gtpc_flow_convert (vl_api_flow_ip4_gtpc_t * vl_api_flow,
f->teid = ntohl (vl_api_flow->teid);
}
+static inline void
+generic_flow_convert (vl_api_flow_generic_t *vl_api_flow,
+ vnet_flow_generic_t *f)
+{
+ clib_memcpy (f->pattern.spec, vl_api_flow->pattern.spec,
+ sizeof (vl_api_flow->pattern.spec));
+ clib_memcpy (f->pattern.mask, vl_api_flow->pattern.mask,
+ sizeof (vl_api_flow->pattern.mask));
+}
+
static void
vl_api_flow_add_t_handler (vl_api_flow_add_t * mp)
{
@@ -289,12 +299,95 @@ vl_api_flow_add_t_handler (vl_api_flow_add_t * mp)
rv = vnet_flow_add (vnm, &flow, &flow_index);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_FLOW_ADD_REPLY,
({
rmp->flow_index = ntohl (flow_index);
}));
- /* *INDENT-ON* */
+}
+
+static void
+vl_api_flow_add_v2_t_handler (vl_api_flow_add_v2_t *mp)
+{
+ vl_api_flow_add_v2_reply_t *rmp;
+ int rv = 0;
+ vnet_flow_t flow;
+ u32 flow_index = ~0;
+ vl_api_flow_rule_v2_t *f = &mp->flow;
+
+ vnet_main_t *vnm = vnet_get_main ();
+
+ flow.type = ntohl (f->type);
+ flow.actions = ntohl (f->actions);
+ flow.mark_flow_id = ntohl (f->mark_flow_id);
+ flow.redirect_node_index = ntohl (f->redirect_node_index);
+ flow.redirect_device_input_next_index =
+ ntohl (f->redirect_device_input_next_index);
+ flow.redirect_queue = ntohl (f->redirect_queue);
+ flow.buffer_advance = ntohl (f->buffer_advance);
+ flow.queue_index = ntohl (f->queue_index);
+ flow.queue_num = ntohl (f->queue_num);
+ flow.rss_types = clib_net_to_host_u64 (f->rss_types);
+ flow.rss_fun = ntohl (f->rss_fun);
+
+ switch (flow.type)
+ {
+ case VNET_FLOW_TYPE_IP4:
+ ipv4_flow_convert (&f->flow.ip4, &flow.ip4);
+ break;
+ case VNET_FLOW_TYPE_IP6:
+ ipv6_flow_convert (&f->flow.ip6, &flow.ip6);
+ break;
+ case VNET_FLOW_TYPE_IP4_N_TUPLE:
+ ipv4_n_tuple_flow_convert (&f->flow.ip4_n_tuple, &flow.ip4_n_tuple);
+ break;
+ case VNET_FLOW_TYPE_IP6_N_TUPLE:
+ ipv6_n_tuple_flow_convert (&f->flow.ip6_n_tuple, &flow.ip6_n_tuple);
+ break;
+ case VNET_FLOW_TYPE_IP4_N_TUPLE_TAGGED:
+ ipv4_n_tuple_tagged_flow_convert (&f->flow.ip4_n_tuple_tagged,
+ &flow.ip4_n_tuple_tagged);
+ break;
+ case VNET_FLOW_TYPE_IP6_N_TUPLE_TAGGED:
+ ipv6_n_tuple_tagged_flow_convert (&f->flow.ip6_n_tuple_tagged,
+ &flow.ip6_n_tuple_tagged);
+ break;
+ case VNET_FLOW_TYPE_IP4_L2TPV3OIP:
+ ipv4_l2tpv3oip_flow_convert (&f->flow.ip4_l2tpv3oip,
+ &flow.ip4_l2tpv3oip);
+ break;
+ case VNET_FLOW_TYPE_IP4_IPSEC_ESP:
+ ipv4_ipsec_esp_flow_convert (&f->flow.ip4_ipsec_esp,
+ &flow.ip4_ipsec_esp);
+ break;
+ case VNET_FLOW_TYPE_IP4_IPSEC_AH:
+ ipv4_ipsec_ah_flow_convert (&f->flow.ip4_ipsec_ah, &flow.ip4_ipsec_ah);
+ break;
+ case VNET_FLOW_TYPE_IP4_VXLAN:
+ ipv4_vxlan_flow_convert (&f->flow.ip4_vxlan, &flow.ip4_vxlan);
+ break;
+ case VNET_FLOW_TYPE_IP6_VXLAN:
+ ipv6_vxlan_flow_convert (&f->flow.ip6_vxlan, &flow.ip6_vxlan);
+ break;
+ case VNET_FLOW_TYPE_IP4_GTPU:
+ ipv4_gtpu_flow_convert (&f->flow.ip4_gtpu, &flow.ip4_gtpu);
+ break;
+ case VNET_FLOW_TYPE_IP4_GTPC:
+ ipv4_gtpc_flow_convert (&f->flow.ip4_gtpc, &flow.ip4_gtpc);
+ break;
+ case VNET_FLOW_TYPE_GENERIC:
+ generic_flow_convert (&f->flow.generic, &flow.generic);
+ break;
+ default:
+ rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
+ goto out;
+ break;
+ }
+
+ rv = vnet_flow_add (vnm, &flow, &flow_index);
+
+out:
+ REPLY_MACRO2 (VL_API_FLOW_ADD_V2_REPLY,
+ ({ rmp->flow_index = ntohl (flow_index); }));
}
static void
diff --git a/src/vnet/flow/flow_cli.c b/src/vnet/flow/flow_cli.c
index e2a3141c551..e4b73717241 100644
--- a/src/vnet/flow/flow_cli.c
+++ b/src/vnet/flow/flow_cli.c
@@ -138,13 +138,11 @@ format_flow_enabled_hw (u8 * s, va_list * args)
u32 hw_if_index;
uword private_data;
vnet_main_t *vnm = vnet_get_main ();
- /* *INDENT-OFF* */
hash_foreach (hw_if_index, private_data, f->private_data,
({
t = format (t, "%s%U", t ? ", " : "",
format_vnet_hw_if_index_name, vnm, hw_if_index);
}));
- /* *INDENT-ON* */
s = format (s, "%v", t);
vec_free (t);
return s;
@@ -223,7 +221,11 @@ show_flow_entry (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "%-10s: %u", "index", f->index);
vlib_cli_output (vm, "%-10s: %s", "type", flow_type_strings[f->type]);
vlib_cli_output (vm, "%-10s: %U", "match", format_flow, f);
- /* *INDENT-OFF* */
+ if (f->type == VNET_FLOW_TYPE_GENERIC)
+ {
+ vlib_cli_output (vm, "%s: %s", "spec", f->generic.pattern.spec);
+ vlib_cli_output (vm, "%s: %s", "mask", f->generic.pattern.mask);
+ }
hash_foreach (hw_if_index, private_data, f->private_data,
({
hi = vnet_get_hw_interface (vnm, hw_if_index);
@@ -234,28 +236,28 @@ show_flow_entry (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, " %U\n", dev_class->format_flow,
hi->dev_instance, f->index, private_data);
}));
- /* *INDENT-ON* */
return 0;
}
no_args:
- /* *INDENT-OFF* */
pool_foreach (f, fm->global_flow_pool)
{
vlib_cli_output (vm, "%U\n", format_flow, f);
+ if (f->type == VNET_FLOW_TYPE_GENERIC)
+ {
+ vlib_cli_output (vm, "%s: %s", "spec", f->generic.pattern.spec);
+ vlib_cli_output (vm, "%s: %s", "mask", f->generic.pattern.mask);
+ }
}
- /* *INDENT-ON* */
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_flow_entry_command, static) = {
.path = "show flow entry",
.short_help = "show flow entry [index <index>]",
.function = show_flow_entry,
};
-/* *INDENT-ON* */
static clib_error_t *
show_flow_ranges (vlib_main_t * vm, unformat_input_t * input,
@@ -266,22 +268,18 @@ show_flow_ranges (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "%8s %8s %s", "Start", "Count", "Owner");
- /* *INDENT-OFF* */
vec_foreach (r, fm->ranges)
{
vlib_cli_output (vm, "%8u %8u %s", r->start, r->count, r->owner);
};
- /* *INDENT-ON* */
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_flow_ranges_command, static) = {
.path = "show flow ranges",
.short_help = "show flow ranges",
.function = show_flow_ranges,
};
-/* *INDENT-ON* */
static clib_error_t *
show_flow_interface (vlib_main_t * vm, unformat_input_t * input,
@@ -319,13 +317,11 @@ show_flow_interface (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_flow_interface_command, static) = {
.path = "show flow interface",
.short_help = "show flow interface <interface name>",
.function = show_flow_interface,
};
-/* *INDENT-ON* */
static clib_error_t *
test_flow (vlib_main_t * vm, unformat_input_t * input,
@@ -354,16 +350,18 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
int rv;
u32 teid = 0, session_id = 0, spi = 0;
u32 vni = 0;
+ u32 queue_start = 0, queue_end = 0;
vnet_flow_type_t type = VNET_FLOW_TYPE_UNKNOWN;
- ip4_address_and_mask_t ip4s = { };
- ip4_address_and_mask_t ip4d = { };
- ip6_address_and_mask_t ip6s = { };
- ip6_address_and_mask_t ip6d = { };
- ip_port_and_mask_t sport = { };
- ip_port_and_mask_t dport = { };
- ip_prot_and_mask_t protocol = { };
+ ip4_address_and_mask_t ip4s = {}, in_ip4s = {};
+ ip4_address_and_mask_t ip4d = {}, in_ip4d = {};
+ ip6_address_and_mask_t ip6s = {}, in_ip6s = {};
+ ip6_address_and_mask_t ip6d = {}, in_ip6d = {};
+ ip_port_and_mask_t sport = {}, in_sport = {};
+ ip_port_and_mask_t dport = {}, in_dport = {};
+ ip_prot_and_mask_t protocol = {}, in_proto = {};
u16 eth_type;
- bool tcp_udp_port_set = false;
+ bool inner_ip4_set = false, inner_ip6_set = false;
+ bool tcp_udp_port_set = false, inner_port_set = false;
bool gtpc_set = false;
bool gtpu_set = false;
bool vni_set = false;
@@ -371,6 +369,8 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
bool ipsec_esp_set = false, ipsec_ah_set = false;
u8 *rss_type[3] = { };
u8 *type_str = NULL;
+ u8 *spec = NULL;
+ u8 *mask = NULL;
clib_memset (&flow, 0, sizeof (vnet_flow_t));
flow.index = ~0;
@@ -389,6 +389,10 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
action = FLOW_ENABLE;
else if (unformat (line_input, "disable"))
action = FLOW_DISABLE;
+ else if (unformat (line_input, "spec %s", &spec))
+ ;
+ else if (unformat (line_input, "mask %s", &mask))
+ ;
else if (unformat (line_input, "eth-type %U",
unformat_ethernet_type_host_byte_order, &eth_type))
flow_class = FLOW_ETHERNET_CLASS;
@@ -398,12 +402,24 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
else if (unformat (line_input, "dst-ip %U",
unformat_ip4_address_and_mask, &ip4d))
flow_class = FLOW_IPV4_CLASS;
+ else if (unformat (line_input, "in-src-ip %U",
+ unformat_ip4_address_and_mask, &in_ip4s))
+ inner_ip4_set = true;
+ else if (unformat (line_input, "in-dst-ip %U",
+ unformat_ip4_address_and_mask, &in_ip4d))
+ inner_ip4_set = true;
else if (unformat (line_input, "ip6-src-ip %U",
unformat_ip6_address_and_mask, &ip6s))
flow_class = FLOW_IPV6_CLASS;
else if (unformat (line_input, "ip6-dst-ip %U",
unformat_ip6_address_and_mask, &ip6d))
flow_class = FLOW_IPV6_CLASS;
+ else if (unformat (line_input, "in-ip6-src-ip %U",
+ unformat_ip6_address_and_mask, &in_ip6s))
+ inner_ip6_set = true;
+ else if (unformat (line_input, "in-ip6-dst-ip %U",
+ unformat_ip6_address_and_mask, &in_ip6d))
+ inner_ip6_set = true;
else if (unformat (line_input, "src-port %U", unformat_ip_port_and_mask,
&sport))
tcp_udp_port_set = true;
@@ -415,6 +431,15 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
(line_input, "proto %U", unformat_ip_protocol_and_mask,
&protocol))
;
+ else if (unformat (line_input, "in-src-port %U",
+ unformat_ip_port_and_mask, &in_sport))
+ inner_port_set = true;
+ else if (unformat (line_input, "in-dst-port %U",
+ unformat_ip_port_and_mask, &in_dport))
+ inner_port_set = true;
+ else if (unformat (line_input, "in-proto %U",
+ unformat_ip_protocol_and_mask, &in_proto))
+ ;
else if (unformat (line_input, "gtpc teid %u", &teid))
gtpc_set = true;
else if (unformat (line_input, "gtpu teid %u", &teid))
@@ -506,6 +531,21 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
#undef _
flow.actions |= VNET_FLOW_ACTION_RSS;
}
+ else if (unformat (line_input, "rss queues"))
+ {
+ if (unformat (line_input, "%d to %d", &queue_start, &queue_end))
+ ;
+ else
+ {
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+
+ flow.queue_index = queue_start;
+ flow.queue_num = queue_end - queue_start + 1;
+
+ flow.actions |= VNET_FLOW_ACTION_RSS;
+ }
else if (unformat (line_input, "%U", unformat_vnet_hw_interface, vnm,
&hw_if_index))
;
@@ -560,6 +600,22 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
type = VNET_FLOW_TYPE_IP4_IPSEC_AH;
else if (tcp_udp_port_set)
type = VNET_FLOW_TYPE_IP4_N_TUPLE;
+ else if (inner_ip4_set)
+ {
+ if (inner_port_set)
+ type = VNET_FLOW_TYPE_IP4_IP4_N_TUPLE;
+ else
+ type = VNET_FLOW_TYPE_IP4_IP4;
+ protocol.prot = IP_PROTOCOL_IP_IN_IP;
+ }
+ else if (inner_ip6_set)
+ {
+ if (inner_port_set)
+ type = VNET_FLOW_TYPE_IP4_IP6_N_TUPLE;
+ else
+ type = VNET_FLOW_TYPE_IP4_IP6;
+ protocol.prot = IP_PROTOCOL_IPV6;
+ }
else
type = VNET_FLOW_TYPE_IP4;
break;
@@ -568,11 +624,32 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
type = VNET_FLOW_TYPE_IP6_N_TUPLE;
else if (vni_set)
type = VNET_FLOW_TYPE_IP6_VXLAN;
+ else if (inner_ip4_set)
+ {
+ if (inner_port_set)
+ type = VNET_FLOW_TYPE_IP6_IP4_N_TUPLE;
+ else
+ type = VNET_FLOW_TYPE_IP6_IP4;
+ protocol.prot = IP_PROTOCOL_IP_IN_IP;
+ }
+ else if (inner_ip6_set)
+ {
+ if (inner_port_set)
+ type = VNET_FLOW_TYPE_IP6_IP6_N_TUPLE;
+ else
+ type = VNET_FLOW_TYPE_IP6_IP6;
+ protocol.prot = IP_PROTOCOL_IPV6;
+ }
else
type = VNET_FLOW_TYPE_IP6;
break;
default:
+ if (spec && mask)
+ {
+ type = VNET_FLOW_TYPE_GENERIC;
+ break;
+ }
return clib_error_return (0,
"Please specify a supported flow type");
}
@@ -623,6 +700,30 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
case IP_PROTOCOL_IPSEC_AH:
flow.ip4_ipsec_esp.spi = spi;
break;
+ case IP_PROTOCOL_IP_IN_IP:
+ clib_memcpy (&flow.ip4_ip4.in_src_addr, &in_ip4s,
+ sizeof (ip4_address_and_mask_t));
+ clib_memcpy (&flow.ip4_ip4.in_dst_addr, &in_ip4d,
+ sizeof (ip4_address_and_mask_t));
+ if (type == VNET_FLOW_TYPE_IP4_IP4_N_TUPLE)
+ {
+ flow.ip4_ip4.in_protocol.prot = in_proto.prot;
+ flow.ip4_ip4_n_tuple.in_src_port = in_sport;
+ flow.ip4_ip4_n_tuple.in_dst_port = in_dport;
+ }
+ break;
+ case IP_PROTOCOL_IPV6:
+ clib_memcpy (&flow.ip4_ip6.in_src_addr, &in_ip6s,
+ sizeof (ip6_address_and_mask_t));
+ clib_memcpy (&flow.ip4_ip6.in_dst_addr, &in_ip6d,
+ sizeof (ip6_address_and_mask_t));
+ if (type == VNET_FLOW_TYPE_IP4_IP6_N_TUPLE)
+ {
+ flow.ip4_ip6.in_protocol.prot = in_proto.prot;
+ flow.ip4_ip6_n_tuple.in_src_port = in_sport;
+ flow.ip4_ip6_n_tuple.in_dst_port = in_dport;
+ }
+ break;
default:
break;
}
@@ -656,10 +757,41 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
if (type == VNET_FLOW_TYPE_IP6_VXLAN)
flow.ip6_vxlan.vni = vni;
break;
+ case IP_PROTOCOL_IP_IN_IP:
+ clib_memcpy (&flow.ip6_ip4.in_src_addr, &in_ip4s,
+ sizeof (ip4_address_and_mask_t));
+ clib_memcpy (&flow.ip6_ip4.in_dst_addr, &in_ip4d,
+ sizeof (ip4_address_and_mask_t));
+ if (type == VNET_FLOW_TYPE_IP6_IP4_N_TUPLE)
+ {
+ flow.ip6_ip4.in_protocol.prot = in_proto.prot;
+ flow.ip6_ip4_n_tuple.in_src_port = in_sport;
+ flow.ip6_ip4_n_tuple.in_dst_port = in_dport;
+ }
+ break;
+ case IP_PROTOCOL_IPV6:
+ clib_memcpy (&flow.ip6_ip6.in_src_addr, &in_ip6s,
+ sizeof (ip6_address_and_mask_t));
+ clib_memcpy (&flow.ip6_ip6.in_dst_addr, &in_ip6d,
+ sizeof (ip6_address_and_mask_t));
+ if (type == VNET_FLOW_TYPE_IP6_IP6_N_TUPLE)
+ {
+ flow.ip6_ip6.in_protocol.prot = in_proto.prot;
+ flow.ip6_ip6_n_tuple.in_src_port = in_sport;
+ flow.ip6_ip6_n_tuple.in_dst_port = in_dport;
+ }
+ break;
default:
break;
}
}
+ if (type == VNET_FLOW_TYPE_GENERIC)
+ {
+ clib_memcpy (flow.generic.pattern.spec, spec,
+ sizeof (flow.generic.pattern.spec));
+ clib_memcpy (flow.generic.pattern.mask, mask,
+ sizeof (flow.generic.pattern.mask));
+ }
flow.type = type;
rv = vnet_flow_add (vnm, &flow, &flow_index);
@@ -687,22 +819,22 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_flow_command, static) = {
- .path = "test flow",
- .short_help = "test flow [add|del|enable|disable] [index <id>] "
- "[src-ip <ip-addr/mask>] [dst-ip <ip-addr/mask>] "
- "[ip6-src-ip <ip-addr/mask>] [ip6-dst-ip <ip-addr/mask>] "
- "[src-port <port/mask>] [dst-port <port/mask>] "
- "[proto <ip-proto>] "
- "[gtpc teid <teid>] [gtpu teid <teid>] [vxlan <vni>] "
- "[session id <session>] [spi <spi>]"
- "[next-node <node>] [mark <id>] [buffer-advance <len>] "
- "[redirect-to-queue <queue>] [drop] "
- "[rss function <name>] [rss types <flow type>]",
- .function = test_flow,
+ .path = "test flow",
+ .short_help = "test flow [add|del|enable|disable] [index <id>] "
+ "[src-ip <ip-addr/mask>] [dst-ip <ip-addr/mask>] "
+ "[ip6-src-ip <ip-addr/mask>] [ip6-dst-ip <ip-addr/mask>] "
+ "[src-port <port/mask>] [dst-port <port/mask>] "
+ "[proto <ip-proto>] "
+ "[gtpc teid <teid>] [gtpu teid <teid>] [vxlan <vni>] "
+ "[session id <session>] [spi <spi>]"
+ "[spec <spec string>] [mask <mask string>]"
+ "[next-node <node>] [mark <id>] [buffer-advance <len>] "
+ "[redirect-to-queue <queue>] [drop] "
+ "[rss function <name>] [rss types <flow type>]"
+ "[rss queues <queue_start> to <queue_end>]",
+ .function = test_flow,
};
-/* *INDENT-ON* */
static u8 *
format_flow_match_element (u8 * s, va_list * args)
diff --git a/src/vnet/flow/flow_types.api b/src/vnet/flow/flow_types.api
index 86f7ce128cb..1696001d975 100644
--- a/src/vnet/flow/flow_types.api
+++ b/src/vnet/flow/flow_types.api
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-option version = "0.0.3";
+option version = "0.0.4";
import "vnet/ethernet/ethernet_types.api";
import "vnet/ip/ip_types.api";
@@ -36,6 +36,25 @@ enum flow_type
FLOW_TYPE_IP4_GTPU,
};
+enum flow_type_v2
+{
+ FLOW_TYPE_ETHERNET_V2 = 1,
+ FLOW_TYPE_IP4_V2,
+ FLOW_TYPE_IP6_V2,
+ FLOW_TYPE_IP4_L2TPV3OIP_V2,
+ FLOW_TYPE_IP4_IPSEC_ESP_V2,
+ FLOW_TYPE_IP4_IPSEC_AH_V2,
+ FLOW_TYPE_IP4_N_TUPLE_V2,
+ FLOW_TYPE_IP6_N_TUPLE_V2,
+ FLOW_TYPE_IP4_N_TUPLE_TAGGED_V2,
+ FLOW_TYPE_IP6_N_TUPLE_TAGGED_V2,
+ FLOW_TYPE_IP4_VXLAN_V2,
+ FLOW_TYPE_IP6_VXLAN_V2,
+ FLOW_TYPE_IP4_GTPC_V2,
+ FLOW_TYPE_IP4_GTPU_V2,
+ FLOW_TYPE_GENERIC_V2,
+};
+
enum flow_action
{
FLOW_ACTION_COUNT = 1,
@@ -46,6 +65,31 @@ enum flow_action
FLOW_ACTION_DROP = 64,
};
+enum flow_action_v2
+{
+ FLOW_ACTION_COUNT_V2 = 1,
+ FLOW_ACTION_MARK_V2 = 2,
+ FLOW_ACTION_BUFFER_ADVANCE_V2 = 4,
+ FLOW_ACTION_REDIRECT_TO_NODE_V2 = 8,
+ FLOW_ACTION_REDIRECT_TO_QUEUE_V2 = 16,
+ FLOW_ACTION_RSS_V2 = 32,
+ FLOW_ACTION_DROP_V2 = 64,
+};
+
+enum rss_function
+{
+ RSS_FUNC_DEFAULT,
+ RSS_FUNC_TOEPLITZ,
+ RSS_FUNC_SIMPLE_XOR,
+ RSS_FUNC_SYMMETRIC_TOEPLITZ,
+};
+
+typedef generic_pattern
+{
+ u8 spec[1024];
+ u8 mask[1024];
+};
+
typedef ip_port_and_mask
{
u16 port;
@@ -193,6 +237,12 @@ typedef flow_ip4_gtpu
u32 teid;
};
+typedef flow_generic
+{
+ i32 foo;
+ vl_api_generic_pattern_t pattern;
+};
+
union flow
{
vl_api_flow_ethernet_t ethernet;
@@ -211,6 +261,25 @@ union flow
vl_api_flow_ip4_gtpu_t ip4_gtpu;
};
+union flow_v2
+{
+ vl_api_flow_ethernet_t ethernet;
+ vl_api_flow_ip4_t ip4;
+ vl_api_flow_ip6_t ip6;
+ vl_api_flow_ip4_l2tpv3oip_t ip4_l2tpv3oip;
+ vl_api_flow_ip4_ipsec_esp_t ip4_ipsec_esp;
+ vl_api_flow_ip4_ipsec_ah_t ip4_ipsec_ah;
+ vl_api_flow_ip4_n_tuple_t ip4_n_tuple;
+ vl_api_flow_ip6_n_tuple_t ip6_n_tuple;
+ vl_api_flow_ip4_n_tuple_tagged_t ip4_n_tuple_tagged;
+ vl_api_flow_ip6_n_tuple_tagged_t ip6_n_tuple_tagged;
+ vl_api_flow_ip4_vxlan_t ip4_vxlan;
+ vl_api_flow_ip6_vxlan_t ip6_vxlan;
+ vl_api_flow_ip4_gtpc_t ip4_gtpc;
+ vl_api_flow_ip4_gtpu_t ip4_gtpu;
+ vl_api_flow_generic_t generic;
+};
+
/* main flow struct */
typedef flow_rule
{
@@ -240,3 +309,41 @@ typedef flow_rule
vl_api_flow_t flow;
};
+/* main flow struct */
+typedef flow_rule_v2
+{
+ /* flow type */
+ vl_api_flow_type_v2_t type;
+
+ /* flow index */
+ u32 index;
+
+ /* bitmap of flow actions (FLOW_ACTION_*) */
+ vl_api_flow_action_v2_t actions;
+
+ /* flow id for VNET_FLOW_ACTION_MARK */
+ u32 mark_flow_id;
+
+ /* node index and next index for FLOW_ACTION_REDIRECT_TO_NODE */
+ u32 redirect_node_index;
+ u32 redirect_device_input_next_index;
+
+ /* queue for FLOW_ACTION_REDIRECT_TO_QUEUE */
+ u32 redirect_queue;
+
+ /* start queue index and queue numbers for RSS queue group with FLOW_ACTION_RSS */
+ u32 queue_index;
+ u32 queue_num;
+
+ /* buffer offset for FLOW_ACTION_BUFFER_ADVANCE */
+ i32 buffer_advance;
+
+ /* RSS types, including IPv4/IPv6/TCP/UDP... */
+ u64 rss_types;
+
+ /* RSS functions, including IPv4/IPv6/TCP/UDP... */
+ vl_api_rss_function_t rss_fun;
+
+ /* flow enum */
+ vl_api_flow_v2_t flow;
+};
diff --git a/src/vnet/gre/FEATURE.yaml b/src/vnet/gre/FEATURE.yaml
deleted file mode 100644
index 4b35b870dc3..00000000000
--- a/src/vnet/gre/FEATURE.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
----
-name: Generic Routing Encapsulation
-maintainer: Neale Ranns <nranns@cisco.com>
-features:
- - L3 tunnels, all combinations of IPv4 and IPv6
- - Encap/Decap flags to control the copying of DSCP, ECN, DF from overlay to
- underlay and vice-versa.
- - L2 tunnels
-missing:
- - GRE keys
-description: "An implementation of Generic Routing Encapsulation (GRE)"
-state: production
-properties: [API, CLI, MULTITHREAD]
diff --git a/src/vnet/gre/error.def b/src/vnet/gre/error.def
deleted file mode 100644
index 161ecc1d874..00000000000
--- a/src/vnet/gre/error.def
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * gre_error.def: gre errors
- *
- * Copyright (c) 2012 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-gre_error (NONE, "no error")
-gre_error (UNKNOWN_PROTOCOL, "unknown protocol")
-gre_error (UNSUPPORTED_VERSION, "unsupported version")
-gre_error (PKTS_DECAP, "GRE input packets decapsulated")
-gre_error (PKTS_ENCAP, "GRE output packets encapsulated")
-gre_error (NO_SUCH_TUNNEL, "GRE input packets dropped due to missing tunnel")
diff --git a/src/vnet/gre/gre.api b/src/vnet/gre/gre.api
deleted file mode 100644
index 9c69ba4007d..00000000000
--- a/src/vnet/gre/gre.api
+++ /dev/null
@@ -1,110 +0,0 @@
-/* Hey Emacs use -*- mode: C -*- */
-/*
- * Copyright (c) 2015-2020 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-option version = "2.1.1";
-
-import "vnet/interface_types.api";
-import "vnet/tunnel/tunnel_types.api";
-import "vnet/ip/ip_types.api";
-
-/** \brief A GRE tunnel type
-*/
-enum gre_tunnel_type : u8
-{
- GRE_API_TUNNEL_TYPE_L3 = 0,
- /* L2 Transparent Ethernet Bridge */
- GRE_API_TUNNEL_TYPE_TEB,
- /* Encapsulated Remote Switched Port ANalyzer */
- GRE_API_TUNNEL_TYPE_ERSPAN,
-};
-
-/** \brief A composite type uniquely defining a GRE tunnel.
- @param type - tunnel type (see enum definition), 0: L3, 1: TEB, 2: ERSPAN
- @param mode - P2P or P2MP
- @param flags - to control encap/decap behaviour
- @param session_id - session for ERSPAN tunnel, range 0-1023
- @param instance - optional unique custom device instance, else ~0.
- @param outer_table_id - Encap FIB table ID
- @param sw_if_index - ignored on create/delete, present in details.
- @param src - Source IP address
- @param dst - Destination IP address, can be multicast
-*/
-typedef gre_tunnel
-{
- vl_api_gre_tunnel_type_t type;
- vl_api_tunnel_mode_t mode;
- vl_api_tunnel_encap_decap_flags_t flags;
- u16 session_id;
- u32 instance;
- u32 outer_table_id;
- vl_api_interface_index_t sw_if_index;
- vl_api_address_t src;
- vl_api_address_t dst;
-};
-
-/** \brief Add or delete a single GRE tunnel.
- @param client_index - opaque cookie to identify the sender.
- @param context - sender context, to match reply w/ request.
- @param is_add - add if true, delete if false.
- @param tunnel - tunnel definition to add or delete.
-*/
-define gre_tunnel_add_del
-{
- u32 client_index;
- u32 context;
- bool is_add;
- vl_api_gre_tunnel_t tunnel;
-};
-
-/** \brief Add or delete a single GRE tunnel.
- @param context - sender context, to match reply w/ request.
- @param retval - return code for the request.
- @param sw_if_index - the interface corresponding to the affected tunnel.
-*/
-define gre_tunnel_add_del_reply
-{
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
-};
-
-/** \brief Dump details of all or just a single GRE tunnel.
- @param client_index - opaque cookie to identify the sender.
- @param context - sender context, to match reply w/ request.
- @param sw_if_index - filter for tunnel of this interface index, ~0 for all.
-*/
-define gre_tunnel_dump
-{
- u32 client_index;
- u32 context;
- vl_api_interface_index_t sw_if_index;
-};
-
-/** \brief Details response for one of the requested GRE tunnels.
- @param context - sender context, to match reply w/ request.
- @param tunnel - definition of the dumped tunnel.
-*/
-define gre_tunnel_details
-{
- u32 context;
- vl_api_gre_tunnel_t tunnel;
-};
-
-/*
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/gre/gre.c b/src/vnet/gre/gre.c
deleted file mode 100644
index fcdf9c0d6bc..00000000000
--- a/src/vnet/gre/gre.c
+++ /dev/null
@@ -1,870 +0,0 @@
-/*
- * gre.c: gre
- *
- * Copyright (c) 2012 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vnet/vnet.h>
-#include <vnet/gre/gre.h>
-#include <vnet/adj/adj_midchain.h>
-#include <vnet/tunnel/tunnel_dp.h>
-
-extern gre_main_t gre_main;
-
-#ifndef CLIB_MARCH_VARIANT
-gre_main_t gre_main;
-
-typedef struct
-{
- union
- {
- ip4_and_gre_header_t ip4_and_gre;
- u64 as_u64[3];
- };
-} ip4_and_gre_union_t;
-
-typedef struct
-{
- union
- {
- ip6_and_gre_header_t ip6_and_gre;
- u64 as_u64[3];
- };
-} ip6_and_gre_union_t;
-#endif /* CLIB_MARCH_VARIANT */
-
-
-/* Packet trace structure */
-typedef struct
-{
- /* Tunnel-id / index in tunnel vector */
- u32 tunnel_id;
-
- /* pkt length */
- u32 length;
-
- /* tunnel ip addresses */
- ip46_address_t src;
- ip46_address_t dst;
-} gre_tx_trace_t;
-
-extern u8 *format_gre_tx_trace (u8 * s, va_list * args);
-
-#ifndef CLIB_MARCH_VARIANT
-u8 *
-format_gre_tx_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- gre_tx_trace_t *t = va_arg (*args, gre_tx_trace_t *);
-
- s = format (s, "GRE: tunnel %d len %d src %U dst %U",
- t->tunnel_id, t->length,
- format_ip46_address, &t->src, IP46_TYPE_ANY,
- format_ip46_address, &t->dst, IP46_TYPE_ANY);
- return s;
-}
-
-u8 *
-format_gre_protocol (u8 * s, va_list * args)
-{
- gre_protocol_t p = va_arg (*args, u32);
- gre_main_t *gm = &gre_main;
- gre_protocol_info_t *pi = gre_get_protocol_info (gm, p);
-
- if (pi)
- s = format (s, "%s", pi->name);
- else
- s = format (s, "0x%04x", p);
-
- return s;
-}
-
-u8 *
-format_gre_header_with_length (u8 * s, va_list * args)
-{
- gre_main_t *gm = &gre_main;
- gre_header_t *h = va_arg (*args, gre_header_t *);
- u32 max_header_bytes = va_arg (*args, u32);
- gre_protocol_t p = clib_net_to_host_u16 (h->protocol);
- u32 indent, header_bytes;
-
- header_bytes = sizeof (h[0]);
- if (max_header_bytes != 0 && header_bytes > max_header_bytes)
- return format (s, "gre header truncated");
-
- indent = format_get_indent (s);
-
- s = format (s, "GRE %U", format_gre_protocol, p);
-
- if (max_header_bytes != 0 && header_bytes < max_header_bytes)
- {
- gre_protocol_info_t *pi = gre_get_protocol_info (gm, p);
- vlib_node_t *node = vlib_get_node (gm->vlib_main, pi->node_index);
- if (node->format_buffer)
- s = format (s, "\n%U%U",
- format_white_space, indent,
- node->format_buffer, (void *) (h + 1),
- max_header_bytes - header_bytes);
- }
-
- return s;
-}
-
-u8 *
-format_gre_header (u8 * s, va_list * args)
-{
- gre_header_t *h = va_arg (*args, gre_header_t *);
- return format (s, "%U", format_gre_header_with_length, h, 0);
-}
-
-/* Returns gre protocol as an int in host byte order. */
-uword
-unformat_gre_protocol_host_byte_order (unformat_input_t * input,
- va_list * args)
-{
- u16 *result = va_arg (*args, u16 *);
- gre_main_t *gm = &gre_main;
- int i;
-
- /* Named type. */
- if (unformat_user (input, unformat_vlib_number_by_name,
- gm->protocol_info_by_name, &i))
- {
- gre_protocol_info_t *pi = vec_elt_at_index (gm->protocol_infos, i);
- *result = pi->protocol;
- return 1;
- }
-
- return 0;
-}
-
-uword
-unformat_gre_protocol_net_byte_order (unformat_input_t * input,
- va_list * args)
-{
- u16 *result = va_arg (*args, u16 *);
- if (!unformat_user (input, unformat_gre_protocol_host_byte_order, result))
- return 0;
- *result = clib_host_to_net_u16 ((u16) * result);
- return 1;
-}
-
-uword
-unformat_gre_header (unformat_input_t * input, va_list * args)
-{
- u8 **result = va_arg (*args, u8 **);
- gre_header_t _h, *h = &_h;
- u16 p;
-
- if (!unformat (input, "%U", unformat_gre_protocol_host_byte_order, &p))
- return 0;
-
- h->protocol = clib_host_to_net_u16 (p);
-
- /* Add header to result. */
- {
- void *p;
- u32 n_bytes = sizeof (h[0]);
-
- vec_add2 (*result, p, n_bytes);
- clib_memcpy (p, h, n_bytes);
- }
-
- return 1;
-}
-
-static int
-gre_proto_from_vnet_link (vnet_link_t link)
-{
- switch (link)
- {
- case VNET_LINK_IP4:
- return (GRE_PROTOCOL_ip4);
- case VNET_LINK_IP6:
- return (GRE_PROTOCOL_ip6);
- case VNET_LINK_MPLS:
- return (GRE_PROTOCOL_mpls_unicast);
- case VNET_LINK_ETHERNET:
- return (GRE_PROTOCOL_teb);
- case VNET_LINK_ARP:
- return (GRE_PROTOCOL_arp);
- case VNET_LINK_NSH:
- ASSERT (0);
- break;
- }
- ASSERT (0);
- return (GRE_PROTOCOL_ip4);
-}
-
-static u8 *
-gre_build_rewrite (vnet_main_t * vnm,
- u32 sw_if_index,
- vnet_link_t link_type, const void *dst_address)
-{
- gre_main_t *gm = &gre_main;
- const ip46_address_t *dst;
- ip4_and_gre_header_t *h4;
- ip6_and_gre_header_t *h6;
- gre_header_t *gre;
- u8 *rewrite = NULL;
- gre_tunnel_t *t;
- u32 ti;
- u8 is_ipv6;
-
- dst = dst_address;
- ti = gm->tunnel_index_by_sw_if_index[sw_if_index];
-
- if (~0 == ti)
- /* not one of ours */
- return (0);
-
- t = pool_elt_at_index (gm->tunnels, ti);
-
- is_ipv6 = t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6 ? 1 : 0;
-
- if (!is_ipv6)
- {
- vec_validate (rewrite, sizeof (*h4) - 1);
- h4 = (ip4_and_gre_header_t *) rewrite;
- gre = &h4->gre;
- h4->ip4.ip_version_and_header_length = 0x45;
- h4->ip4.ttl = 254;
- h4->ip4.protocol = IP_PROTOCOL_GRE;
- /* fixup ip4 header length and checksum after-the-fact */
- h4->ip4.src_address.as_u32 = t->tunnel_src.ip4.as_u32;
- h4->ip4.dst_address.as_u32 = dst->ip4.as_u32;
- h4->ip4.checksum = ip4_header_checksum (&h4->ip4);
- }
- else
- {
- vec_validate (rewrite, sizeof (*h6) - 1);
- h6 = (ip6_and_gre_header_t *) rewrite;
- gre = &h6->gre;
- h6->ip6.ip_version_traffic_class_and_flow_label =
- clib_host_to_net_u32 (6 << 28);
- h6->ip6.hop_limit = 255;
- h6->ip6.protocol = IP_PROTOCOL_GRE;
- /* fixup ip6 header length and checksum after-the-fact */
- h6->ip6.src_address.as_u64[0] = t->tunnel_src.ip6.as_u64[0];
- h6->ip6.src_address.as_u64[1] = t->tunnel_src.ip6.as_u64[1];
- h6->ip6.dst_address.as_u64[0] = dst->ip6.as_u64[0];
- h6->ip6.dst_address.as_u64[1] = dst->ip6.as_u64[1];
- }
-
- if (PREDICT_FALSE (t->type == GRE_TUNNEL_TYPE_ERSPAN))
- {
- gre->protocol = clib_host_to_net_u16 (GRE_PROTOCOL_erspan);
- gre->flags_and_version = clib_host_to_net_u16 (GRE_FLAGS_SEQUENCE);
- }
- else
- gre->protocol =
- clib_host_to_net_u16 (gre_proto_from_vnet_link (link_type));
-
- return (rewrite);
-}
-
-static void
-gre44_fixup (vlib_main_t * vm,
- const ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data)
-{
- tunnel_encap_decap_flags_t flags;
- ip4_and_gre_header_t *ip0;
-
- ip0 = vlib_buffer_get_current (b0);
- flags = pointer_to_uword (data);
-
- /* Fixup the checksum and len fields in the GRE tunnel encap
- * that was applied at the midchain node */
- ip0->ip4.length =
- clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
- tunnel_encap_fixup_4o4 (flags, (ip4_header_t *) (ip0 + 1), &ip0->ip4);
- ip0->ip4.checksum = ip4_header_checksum (&ip0->ip4);
-}
-
-static void
-gre64_fixup (vlib_main_t * vm,
- const ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data)
-{
- tunnel_encap_decap_flags_t flags;
- ip4_and_gre_header_t *ip0;
-
- ip0 = vlib_buffer_get_current (b0);
- flags = pointer_to_uword (data);
-
- /* Fixup the checksum and len fields in the GRE tunnel encap
- * that was applied at the midchain node */
- ip0->ip4.length =
- clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
- tunnel_encap_fixup_6o4 (flags, (ip6_header_t *) (ip0 + 1), &ip0->ip4);
- ip0->ip4.checksum = ip4_header_checksum (&ip0->ip4);
-}
-
-static void
-grex4_fixup (vlib_main_t * vm,
- const ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data)
-{
- ip4_header_t *ip0;
-
- ip0 = vlib_buffer_get_current (b0);
-
- /* Fixup the checksum and len fields in the GRE tunnel encap
- * that was applied at the midchain node */
- ip0->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
- ip0->checksum = ip4_header_checksum (ip0);
-}
-
-static void
-gre46_fixup (vlib_main_t * vm,
- const ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data)
-{
- tunnel_encap_decap_flags_t flags;
- ip6_and_gre_header_t *ip0;
-
- ip0 = vlib_buffer_get_current (b0);
- flags = pointer_to_uword (data);
-
- /* Fixup the payload length field in the GRE tunnel encap that was applied
- * at the midchain node */
- ip0->ip6.payload_length =
- clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
- sizeof (ip0->ip6));
- tunnel_encap_fixup_4o6 (flags, b0, (ip4_header_t *) (ip0 + 1), &ip0->ip6);
-}
-
-static void
-gre66_fixup (vlib_main_t * vm,
- const ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data)
-{
- tunnel_encap_decap_flags_t flags;
- ip6_and_gre_header_t *ip0;
-
- ip0 = vlib_buffer_get_current (b0);
- flags = pointer_to_uword (data);
-
- /* Fixup the payload length field in the GRE tunnel encap that was applied
- * at the midchain node */
- ip0->ip6.payload_length =
- clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
- sizeof (ip0->ip6));
- tunnel_encap_fixup_6o6 (flags, (ip6_header_t *) (ip0 + 1), &ip0->ip6);
-}
-
-static void
-grex6_fixup (vlib_main_t * vm,
- const ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data)
-{
- ip6_and_gre_header_t *ip0;
-
- ip0 = vlib_buffer_get_current (b0);
-
- /* Fixup the payload length field in the GRE tunnel encap that was applied
- * at the midchain node */
- ip0->ip6.payload_length =
- clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
- sizeof (ip0->ip6));
-}
-
-/**
- * return the appropriate fixup function given the overlay (link-type) and
- * underlay (fproto) combination
- */
-static adj_midchain_fixup_t
-gre_get_fixup (fib_protocol_t fproto, vnet_link_t lt)
-{
- if (fproto == FIB_PROTOCOL_IP6 && lt == VNET_LINK_IP6)
- return (gre66_fixup);
- if (fproto == FIB_PROTOCOL_IP6 && lt == VNET_LINK_IP4)
- return (gre46_fixup);
- if (fproto == FIB_PROTOCOL_IP4 && lt == VNET_LINK_IP6)
- return (gre64_fixup);
- if (fproto == FIB_PROTOCOL_IP4 && lt == VNET_LINK_IP4)
- return (gre44_fixup);
- if (fproto == FIB_PROTOCOL_IP6)
- return (grex6_fixup);
- if (fproto == FIB_PROTOCOL_IP4)
- return (grex4_fixup);
-
- ASSERT (0);
- return (gre44_fixup);
-}
-
-void
-gre_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
-{
- gre_main_t *gm = &gre_main;
- gre_tunnel_t *t;
- adj_flags_t af;
- u32 ti;
-
- ti = gm->tunnel_index_by_sw_if_index[sw_if_index];
- t = pool_elt_at_index (gm->tunnels, ti);
- af = ADJ_FLAG_NONE;
-
- /*
- * the user has not requested that the load-balancing be based on
- * a flow hash of the inner packet. so use the stacking to choose
- * a path.
- */
- if (!(t->flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_INNER_HASH))
- af |= ADJ_FLAG_MIDCHAIN_IP_STACK;
-
- if (VNET_LINK_ETHERNET == adj_get_link_type (ai))
- af |= ADJ_FLAG_MIDCHAIN_NO_COUNT;
-
- adj_nbr_midchain_update_rewrite
- (ai, gre_get_fixup (t->tunnel_dst.fp_proto,
- adj_get_link_type (ai)),
- uword_to_pointer (t->flags, void *), af,
- gre_build_rewrite (vnm, sw_if_index, adj_get_link_type (ai),
- &t->tunnel_dst.fp_addr));
-
- gre_tunnel_stack (ai);
-}
-
-adj_walk_rc_t
-mgre_mk_complete_walk (adj_index_t ai, void *data)
-{
- mgre_walk_ctx_t *ctx = data;
- adj_flags_t af;
-
- af = ADJ_FLAG_NONE;
-
- /*
- * the user has not requested that the load-balancing be based on
- * a flow hash of the inner packet. so use the stacking to choose
- * a path.
- */
- if (!(ctx->t->flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_INNER_HASH))
- af |= ADJ_FLAG_MIDCHAIN_IP_STACK;
-
- adj_nbr_midchain_update_rewrite
- (ai, gre_get_fixup (ctx->t->tunnel_dst.fp_proto,
- adj_get_link_type (ai)),
- uword_to_pointer (ctx->t->flags, void *),
- af,
- gre_build_rewrite (vnet_get_main (),
- ctx->t->sw_if_index,
- adj_get_link_type (ai),
- &teib_entry_get_nh (ctx->ne)->fp_addr));
-
- teib_entry_adj_stack (ctx->ne, ai);
-
- return (ADJ_WALK_RC_CONTINUE);
-}
-
-adj_walk_rc_t
-mgre_mk_incomplete_walk (adj_index_t ai, void *data)
-{
- gre_tunnel_t *t = data;
-
- adj_nbr_midchain_update_rewrite (ai, gre_get_fixup (t->tunnel_dst.fp_proto,
- adj_get_link_type (ai)),
- NULL, ADJ_FLAG_NONE, NULL);
-
- adj_midchain_delegate_unstack (ai);
-
- return (ADJ_WALK_RC_CONTINUE);
-}
-
-void
-mgre_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
-{
- gre_main_t *gm = &gre_main;
- ip_adjacency_t *adj;
- teib_entry_t *ne;
- gre_tunnel_t *t;
- u32 ti;
-
- adj = adj_get (ai);
- ti = gm->tunnel_index_by_sw_if_index[sw_if_index];
- t = pool_elt_at_index (gm->tunnels, ti);
-
- ne = teib_entry_find_46 (sw_if_index,
- adj->ia_nh_proto, &adj->sub_type.nbr.next_hop);
-
- if (NULL == ne)
- {
- // no TEIB entry to provide the next-hop
- adj_nbr_midchain_update_rewrite (
- ai, gre_get_fixup (t->tunnel_dst.fp_proto, adj_get_link_type (ai)),
- uword_to_pointer (t->flags, void *), ADJ_FLAG_NONE, NULL);
- return;
- }
-
- mgre_walk_ctx_t ctx = {
- .t = t,
- .ne = ne
- };
- adj_nbr_walk_nh (sw_if_index,
- adj->ia_nh_proto,
- &adj->sub_type.nbr.next_hop, mgre_mk_complete_walk, &ctx);
-}
-#endif /* CLIB_MARCH_VARIANT */
-
-typedef enum
-{
- GRE_ENCAP_NEXT_L2_MIDCHAIN,
- GRE_ENCAP_N_NEXT,
-} gre_encap_next_t;
-
-/**
- * @brief TX function. Only called for L2 payload including TEB or ERSPAN.
- * L3 traffic uses the adj-midchains.
- */
-static_always_inline u32
-gre_encap_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, gre_tunnel_type_t type)
-{
- gre_main_t *gm = &gre_main;
- u32 *from, n_left_from;
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
- u32 sw_if_index[2] = { ~0, ~0 };
- const gre_tunnel_t *gt[2] = { 0 };
- adj_index_t adj_index[2] = { ADJ_INDEX_INVALID, ADJ_INDEX_INVALID };
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- vlib_get_buffers (vm, from, bufs, n_left_from);
-
- while (n_left_from >= 2)
- {
-
- if (PREDICT_FALSE
- (sw_if_index[0] != vnet_buffer (b[0])->sw_if_index[VLIB_TX]))
- {
- const vnet_hw_interface_t *hi;
- sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
- hi = vnet_get_sup_hw_interface (gm->vnet_main, sw_if_index[0]);
- gt[0] = &gm->tunnels[hi->dev_instance];
- adj_index[0] = gt[0]->l2_adj_index;
- }
- if (PREDICT_FALSE
- (sw_if_index[1] != vnet_buffer (b[1])->sw_if_index[VLIB_TX]))
- {
- const vnet_hw_interface_t *hi;
- sw_if_index[1] = vnet_buffer (b[1])->sw_if_index[VLIB_TX];
- hi = vnet_get_sup_hw_interface (gm->vnet_main, sw_if_index[1]);
- gt[1] = &gm->tunnels[hi->dev_instance];
- adj_index[1] = gt[1]->l2_adj_index;
- }
-
- vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = adj_index[0];
- vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = adj_index[1];
-
- if (type == GRE_TUNNEL_TYPE_ERSPAN)
- {
- /* Encap GRE seq# and ERSPAN type II header */
- erspan_t2_t *h0;
- u32 seq_num;
- u64 hdr;
- vlib_buffer_advance (b[0], -sizeof (erspan_t2_t));
- h0 = vlib_buffer_get_current (b[0]);
- seq_num = clib_atomic_fetch_add (&gt[0]->gre_sn->seq_num, 1);
- hdr = clib_host_to_net_u64 (ERSPAN_HDR2);
- h0->seq_num = clib_host_to_net_u32 (seq_num);
- h0->t2_u64 = hdr;
- h0->t2.cos_en_t_session |= clib_host_to_net_u16 (gt[0]->session_id);
- }
- if (type == GRE_TUNNEL_TYPE_ERSPAN)
- {
- /* Encap GRE seq# and ERSPAN type II header */
- erspan_t2_t *h0;
- u32 seq_num;
- u64 hdr;
- vlib_buffer_advance (b[1], -sizeof (erspan_t2_t));
- h0 = vlib_buffer_get_current (b[1]);
- seq_num = clib_atomic_fetch_add (&gt[1]->gre_sn->seq_num, 1);
- hdr = clib_host_to_net_u64 (ERSPAN_HDR2);
- h0->seq_num = clib_host_to_net_u32 (seq_num);
- h0->t2_u64 = hdr;
- h0->t2.cos_en_t_session |= clib_host_to_net_u16 (gt[1]->session_id);
- }
-
- if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
- {
- gre_tx_trace_t *tr = vlib_add_trace (vm, node,
- b[0], sizeof (*tr));
- tr->tunnel_id = gt[0] - gm->tunnels;
- tr->src = gt[0]->tunnel_src;
- tr->dst = gt[0]->tunnel_dst.fp_addr;
- tr->length = vlib_buffer_length_in_chain (vm, b[0]);
- }
- if (PREDICT_FALSE (b[1]->flags & VLIB_BUFFER_IS_TRACED))
- {
- gre_tx_trace_t *tr = vlib_add_trace (vm, node,
- b[1], sizeof (*tr));
- tr->tunnel_id = gt[1] - gm->tunnels;
- tr->src = gt[1]->tunnel_src;
- tr->dst = gt[1]->tunnel_dst.fp_addr;
- tr->length = vlib_buffer_length_in_chain (vm, b[1]);
- }
-
- b += 2;
- n_left_from -= 2;
- }
-
- while (n_left_from >= 1)
- {
-
- if (PREDICT_FALSE
- (sw_if_index[0] != vnet_buffer (b[0])->sw_if_index[VLIB_TX]))
- {
- const vnet_hw_interface_t *hi;
- sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
- hi = vnet_get_sup_hw_interface (gm->vnet_main, sw_if_index[0]);
- gt[0] = &gm->tunnels[hi->dev_instance];
- adj_index[0] = gt[0]->l2_adj_index;
- }
-
- vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = adj_index[0];
-
- if (type == GRE_TUNNEL_TYPE_ERSPAN)
- {
- /* Encap GRE seq# and ERSPAN type II header */
- erspan_t2_t *h0;
- u32 seq_num;
- u64 hdr;
- ASSERT (gt[0]->type == GRE_TUNNEL_TYPE_ERSPAN);
- vlib_buffer_advance (b[0], -sizeof (erspan_t2_t));
- h0 = vlib_buffer_get_current (b[0]);
- seq_num = clib_atomic_fetch_add (&gt[0]->gre_sn->seq_num, 1);
- hdr = clib_host_to_net_u64 (ERSPAN_HDR2);
- h0->seq_num = clib_host_to_net_u32 (seq_num);
- h0->t2_u64 = hdr;
- h0->t2.cos_en_t_session |= clib_host_to_net_u16 (gt[0]->session_id);
- }
-
- if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
- {
- gre_tx_trace_t *tr = vlib_add_trace (vm, node,
- b[0], sizeof (*tr));
- tr->tunnel_id = gt[0] - gm->tunnels;
- tr->src = gt[0]->tunnel_src;
- tr->dst = gt[0]->tunnel_dst.fp_addr;
- tr->length = vlib_buffer_length_in_chain (vm, b[0]);
- }
-
- b += 1;
- n_left_from -= 1;
- }
-
- vlib_buffer_enqueue_to_single_next (vm, node, from,
- GRE_ENCAP_NEXT_L2_MIDCHAIN,
- frame->n_vectors);
-
- vlib_node_increment_counter (vm, node->node_index,
- GRE_ERROR_PKTS_ENCAP, frame->n_vectors);
-
- return frame->n_vectors;
-}
-
-static char *gre_error_strings[] = {
-#define gre_error(n,s) s,
-#include "error.def"
-#undef gre_error
-};
-
-VLIB_NODE_FN (gre_teb_encap_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gre_encap_inline (vm, node, frame, GRE_TUNNEL_TYPE_TEB));
-}
-
-VLIB_NODE_FN (gre_erspan_encap_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gre_encap_inline (vm, node, frame, GRE_TUNNEL_TYPE_ERSPAN));
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (gre_teb_encap_node) =
-{
- .name = "gre-teb-encap",
- .vector_size = sizeof (u32),
- .format_trace = format_gre_tx_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = GRE_N_ERROR,
- .error_strings = gre_error_strings,
- .n_next_nodes = GRE_ENCAP_N_NEXT,
- .next_nodes = {
- [GRE_ENCAP_NEXT_L2_MIDCHAIN] = "adj-l2-midchain",
- },
-};
-VLIB_REGISTER_NODE (gre_erspan_encap_node) =
-{
- .name = "gre-erspan-encap",
- .vector_size = sizeof (u32),
- .format_trace = format_gre_tx_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = GRE_N_ERROR,
- .error_strings = gre_error_strings,
- .n_next_nodes = GRE_ENCAP_N_NEXT,
- .next_nodes = {
- [GRE_ENCAP_NEXT_L2_MIDCHAIN] = "adj-l2-midchain",
- },
-};
-/* *INDENT-ON* */
-
-#ifndef CLIB_MARCH_VARIANT
-static u8 *
-format_gre_tunnel_name (u8 * s, va_list * args)
-{
- u32 dev_instance = va_arg (*args, u32);
- gre_main_t *gm = &gre_main;
- gre_tunnel_t *t;
-
- if (dev_instance >= vec_len (gm->tunnels))
- return format (s, "<improperly-referenced>");
-
- t = pool_elt_at_index (gm->tunnels, dev_instance);
- return format (s, "gre%d", t->user_instance);
-}
-
-static u8 *
-format_gre_device (u8 * s, va_list * args)
-{
- u32 dev_instance = va_arg (*args, u32);
- CLIB_UNUSED (int verbose) = va_arg (*args, int);
-
- s = format (s, "GRE tunnel: id %d\n", dev_instance);
- return s;
-}
-
-static int
-gre_tunnel_desc (u32 sw_if_index,
- ip46_address_t * src, ip46_address_t * dst, u8 * is_l2)
-{
- gre_main_t *gm = &gre_main;
- gre_tunnel_t *t;
- u32 ti;
-
- ti = gm->tunnel_index_by_sw_if_index[sw_if_index];
-
- if (~0 == ti)
- /* not one of ours */
- return -1;
-
- t = pool_elt_at_index (gm->tunnels, ti);
-
- *src = t->tunnel_src;
- *dst = t->tunnel_dst.fp_addr;
- *is_l2 = t->type == GRE_TUNNEL_TYPE_TEB;
-
- return (0);
-}
-
-/* *INDENT-OFF* */
-VNET_DEVICE_CLASS (gre_device_class) = {
- .name = "GRE tunnel device",
- .format_device_name = format_gre_tunnel_name,
- .format_device = format_gre_device,
- .format_tx_trace = format_gre_tx_trace,
- .admin_up_down_function = gre_interface_admin_up_down,
- .ip_tun_desc = gre_tunnel_desc,
-#ifdef SOON
- .clear counter = 0;
-#endif
-};
-
-VNET_HW_INTERFACE_CLASS (gre_hw_interface_class) = {
- .name = "GRE",
- .format_header = format_gre_header_with_length,
- .unformat_header = unformat_gre_header,
- .build_rewrite = gre_build_rewrite,
- .update_adjacency = gre_update_adj,
- .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
-};
-
-VNET_HW_INTERFACE_CLASS (mgre_hw_interface_class) = {
- .name = "mGRE",
- .format_header = format_gre_header_with_length,
- .unformat_header = unformat_gre_header,
- .build_rewrite = gre_build_rewrite,
- .update_adjacency = mgre_update_adj,
- .flags = VNET_HW_INTERFACE_CLASS_FLAG_NBMA,
-};
-/* *INDENT-ON* */
-#endif /* CLIB_MARCH_VARIANT */
-
-static void
-add_protocol (gre_main_t * gm, gre_protocol_t protocol, char *protocol_name)
-{
- gre_protocol_info_t *pi;
- u32 i;
-
- vec_add2 (gm->protocol_infos, pi, 1);
- i = pi - gm->protocol_infos;
-
- pi->name = protocol_name;
- pi->protocol = protocol;
- pi->next_index = pi->node_index = ~0;
-
- hash_set (gm->protocol_info_by_protocol, protocol, i);
- hash_set_mem (gm->protocol_info_by_name, pi->name, i);
-}
-
-static clib_error_t *
-gre_init (vlib_main_t * vm)
-{
- gre_main_t *gm = &gre_main;
- clib_error_t *error;
- ip_main_t *im = &ip_main;
- ip_protocol_info_t *pi;
-
- clib_memset (gm, 0, sizeof (gm[0]));
- gm->vlib_main = vm;
- gm->vnet_main = vnet_get_main ();
-
- if ((error = vlib_call_init_function (vm, ip_main_init)))
- return error;
-
- if ((error = vlib_call_init_function (vm, ip4_lookup_init)))
- return error;
-
- if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
- return error;
-
- /* Set up the ip packet generator */
- pi = ip_get_protocol_info (im, IP_PROTOCOL_GRE);
- pi->format_header = format_gre_header;
- pi->unformat_pg_edit = unformat_pg_gre_header;
-
- gm->protocol_info_by_name = hash_create_string (0, sizeof (uword));
- gm->protocol_info_by_protocol = hash_create (0, sizeof (uword));
- gm->tunnel_by_key4 =
- hash_create_mem (0, sizeof (gre_tunnel_key4_t), sizeof (uword));
- gm->tunnel_by_key6 =
- hash_create_mem (0, sizeof (gre_tunnel_key6_t), sizeof (uword));
- gm->seq_num_by_key =
- hash_create_mem (0, sizeof (gre_sn_key_t), sizeof (uword));
-
-#define _(n,s) add_protocol (gm, GRE_PROTOCOL_##s, #s);
- foreach_gre_protocol
-#undef _
- return vlib_call_init_function (vm, gre_input_init);
-}
-
-VLIB_INIT_FUNCTION (gre_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/gre/gre.h b/src/vnet/gre/gre.h
deleted file mode 100644
index ea085bf0fa1..00000000000
--- a/src/vnet/gre/gre.h
+++ /dev/null
@@ -1,443 +0,0 @@
-/*
- * gre.h: types/functions for gre.
- *
- * Copyright (c) 2012 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef included_gre_h
-#define included_gre_h
-
-#include <vnet/vnet.h>
-#include <vnet/gre/packet.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ip/format.h>
-#include <vnet/adj/adj_types.h>
-#include <vnet/tunnel/tunnel.h>
-#include <vnet/teib/teib.h>
-
-extern vnet_hw_interface_class_t gre_hw_interface_class;
-extern vnet_hw_interface_class_t mgre_hw_interface_class;
-
-typedef enum
-{
-#define gre_error(n,s) GRE_ERROR_##n,
-#include <vnet/gre/error.def>
-#undef gre_error
- GRE_N_ERROR,
-} gre_error_t;
-
-/**
- * L3: GRE (i.e. this tunnel is in L3 mode)
- * TEB: Transparent Ethernet Bridging - the tunnel is in L2 mode
- * ERSPAN: type 2 - the tunnel is for port mirror SPAN output. Each tunnel is
- * associated with a session ID and expected to be used for encap
- * and output of mirrored packet from a L2 network only. There is
- * no support for receiving ERSPAN packets from a GRE ERSPAN tunnel
- */
-#define foreach_gre_tunnel_type \
- _(L3, "L3") \
- _(TEB, "TEB") \
- _(ERSPAN, "ERSPAN") \
-
-/**
- * @brief The GRE tunnel type
- */
-typedef enum gre_tunnel_type_t_
-{
-#define _(n, s) GRE_TUNNEL_TYPE_##n,
- foreach_gre_tunnel_type
-#undef _
-} __clib_packed gre_tunnel_type_t;
-
-extern u8 *format_gre_tunnel_type (u8 * s, va_list * args);
-
-
-/**
- * A GRE payload protocol registration
- */
-typedef struct
-{
- /** Name (a c string). */
- char *name;
-
- /** GRE protocol type in host byte order. */
- gre_protocol_t protocol;
-
- /** GRE tunnel type */
- gre_tunnel_type_t tunnel_type;
-
- /** Node which handles this type. */
- u32 node_index;
-
- /** Next index for this type. */
- u32 next_index;
-} gre_protocol_info_t;
-
-/**
- * Elements of the GRE key that are common for v6 and v6 addresses
- */
-typedef struct gre_tunnel_key_common_t_
-{
- union
- {
- struct
- {
- u32 fib_index;
- u16 session_id;
- gre_tunnel_type_t type;
- tunnel_mode_t mode;
- };
- u64 as_u64;
- };
-} gre_tunnel_key_common_t;
-
-STATIC_ASSERT_SIZEOF (gre_tunnel_key_common_t, sizeof (u64));
-
-/**
- * @brief Key for a IPv4 GRE Tunnel
- */
-typedef struct gre_tunnel_key4_t_
-{
- /**
- * Source and destination IP addresses
- */
- union
- {
- struct
- {
- ip4_address_t gtk_src;
- ip4_address_t gtk_dst;
- };
- u64 gtk_as_u64;
- };
-
- /** address independent attributes */
- gre_tunnel_key_common_t gtk_common;
-} __attribute__ ((packed)) gre_tunnel_key4_t;
-
-STATIC_ASSERT_SIZEOF (gre_tunnel_key4_t, 2 * sizeof (u64));
-
-/**
- * @brief Key for a IPv6 GRE Tunnel
- * We use a different type so that the V4 key hash is as small as possible
- */
-typedef struct gre_tunnel_key6_t_
-{
- /**
- * Source and destination IP addresses
- */
- ip6_address_t gtk_src;
- ip6_address_t gtk_dst;
-
- /** address independent attributes */
- gre_tunnel_key_common_t gtk_common;
-} __attribute__ ((packed)) gre_tunnel_key6_t;
-
-STATIC_ASSERT_SIZEOF (gre_tunnel_key6_t, 5 * sizeof (u64));
-
-/**
- * Union of the two possible key types
- */
-typedef union gre_tunnel_key_t_
-{
- gre_tunnel_key4_t gtk_v4;
- gre_tunnel_key6_t gtk_v6;
-} gre_tunnel_key_t;
-
-/**
- * The session ID is only a 10 bit value
- */
-#define GTK_SESSION_ID_MAX (0x3ff)
-
-/**
- * Used for GRE header seq number generation for ERSPAN encap
- */
-typedef struct
-{
- u32 seq_num;
- u32 ref_count;
-} gre_sn_t;
-
-/**
- * Hash key for GRE header seq number generation for ERSPAN encap
- */
-typedef struct
-{
- ip46_address_t src;
- ip46_address_t dst;
- u32 fib_index;
-} gre_sn_key_t;
-
-/**
- * @brief A representation of a GRE tunnel
- */
-typedef struct
-{
- /**
- * Required for pool_get_aligned
- */
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
-
- /**
- * The tunnel's source/local address
- */
- ip46_address_t tunnel_src;
- /**
- * The tunnel's destination/remote address
- */
- fib_prefix_t tunnel_dst;
- /**
- * The FIB in which the src.dst address are present
- */
- u32 outer_fib_index;
- u32 hw_if_index;
- u32 sw_if_index;
- gre_tunnel_type_t type;
- tunnel_mode_t mode;
- tunnel_encap_decap_flags_t flags;
-
- /**
- * an L2 tunnel always rquires an L2 midchain. cache here for DP.
- */
- adj_index_t l2_adj_index;
-
- /**
- * ERSPAN type 2 session ID, least significant 10 bits of u16
- */
- u16 session_id;
-
- /**
- * GRE header sequence number (SN) used for ERSPAN type 2 header, must be
- * bumped automically to be thread safe. As multiple GRE tunnels are created
- * for the same fib-idx/DIP/SIP with different ERSPAN session number, they all
- * share the same SN which is kept per FIB/DIP/SIP, as specified by RFC2890.
- */
- gre_sn_t *gre_sn;
-
-
- u32 dev_instance; /* Real device instance in tunnel vector */
- u32 user_instance; /* Instance name being shown to user */
-} gre_tunnel_t;
-
-typedef struct
-{
- u8 next_index;
- u8 tunnel_type;
-} next_info_t;
-
-/**
- * @brief GRE related global data
- */
-typedef struct
-{
- /**
- * pool of tunnel instances
- */
- gre_tunnel_t *tunnels;
-
- /**
- * GRE payload protocol registrations
- */
- gre_protocol_info_t *protocol_infos;
-
- /**
- * Hash tables mapping name/protocol to protocol info index.
- */
- uword *protocol_info_by_name, *protocol_info_by_protocol;
-
- /**
- * Hash mapping to tunnels with ipv4 src/dst addr
- */
- uword *tunnel_by_key4;
-
- /**
- * Hash mapping to tunnels with ipv6 src/dst addr
- */
- uword *tunnel_by_key6;
-
- /**
- * Hash mapping tunnel src/dst addr and fib-idx to sequence number
- */
- uword *seq_num_by_key;
-
- /**
- * Mapping from sw_if_index to tunnel index
- */
- u32 *tunnel_index_by_sw_if_index;
-
- /* Sparse vector mapping gre protocol in network byte order
- to next index. */
- next_info_t *next_by_protocol;
-
- /* convenience */
- vlib_main_t *vlib_main;
- vnet_main_t *vnet_main;
-
- /* Record used instances */
- uword *instance_used;
-
- u16 msg_id_base;
-} gre_main_t;
-
-/**
- * @brief IPv4 and GRE header.
- */
-/* *INDENT-OFF* */
-typedef CLIB_PACKED (struct {
- ip4_header_t ip4;
- gre_header_t gre;
-}) ip4_and_gre_header_t;
-/* *INDENT-ON* */
-
-/**
- * @brief IPv6 and GRE header.
- */
-/* *INDENT-OFF* */
-typedef CLIB_PACKED (struct {
- ip6_header_t ip6;
- gre_header_t gre;
-}) ip6_and_gre_header_t;
-/* *INDENT-ON* */
-
-always_inline gre_protocol_info_t *
-gre_get_protocol_info (gre_main_t * em, gre_protocol_t protocol)
-{
- uword *p = hash_get (em->protocol_info_by_protocol, protocol);
- return p ? vec_elt_at_index (em->protocol_infos, p[0]) : 0;
-}
-
-extern gre_main_t gre_main;
-
-extern clib_error_t *gre_interface_admin_up_down (vnet_main_t * vnm,
- u32 hw_if_index, u32 flags);
-
-extern void gre_tunnel_stack (adj_index_t ai);
-extern void gre_update_adj (vnet_main_t * vnm,
- u32 sw_if_index, adj_index_t ai);
-
-typedef struct mgre_walk_ctx_t_
-{
- const gre_tunnel_t *t;
- const teib_entry_t *ne;
-} mgre_walk_ctx_t;
-
-adj_walk_rc_t mgre_mk_complete_walk (adj_index_t ai, void *data);
-adj_walk_rc_t mgre_mk_incomplete_walk (adj_index_t ai, void *data);
-
-format_function_t format_gre_protocol;
-format_function_t format_gre_header;
-format_function_t format_gre_header_with_length;
-
-extern vlib_node_registration_t gre4_input_node;
-extern vlib_node_registration_t gre6_input_node;
-extern vlib_node_registration_t gre_erspan_encap_node;
-extern vlib_node_registration_t gre_teb_encap_node;
-extern vnet_device_class_t gre_device_class;
-
-/* Parse gre protocol as 0xXXXX or protocol name.
- In either host or network byte order. */
-unformat_function_t unformat_gre_protocol_host_byte_order;
-unformat_function_t unformat_gre_protocol_net_byte_order;
-
-/* Parse gre header. */
-unformat_function_t unformat_gre_header;
-unformat_function_t unformat_pg_gre_header;
-
-void
-gre_register_input_protocol (vlib_main_t * vm, gre_protocol_t protocol,
- u32 node_index, gre_tunnel_type_t tunnel_type);
-
-/* manually added to the interface output node in gre.c */
-#define GRE_OUTPUT_NEXT_LOOKUP 1
-
-typedef struct
-{
- u8 is_add;
- gre_tunnel_type_t type;
- tunnel_mode_t mode;
- u8 is_ipv6;
- u32 instance;
- ip46_address_t src, dst;
- u32 outer_table_id;
- u16 session_id;
- tunnel_encap_decap_flags_t flags;
-} vnet_gre_tunnel_add_del_args_t;
-
-extern int vnet_gre_tunnel_add_del (vnet_gre_tunnel_add_del_args_t * a,
- u32 * sw_if_indexp);
-
-static inline void
-gre_mk_key4 (ip4_address_t src,
- ip4_address_t dst,
- u32 fib_index,
- gre_tunnel_type_t ttype,
- tunnel_mode_t tmode, u16 session_id, gre_tunnel_key4_t * key)
-{
- key->gtk_src = src;
- key->gtk_dst = dst;
- key->gtk_common.type = ttype;
- key->gtk_common.mode = tmode;
- key->gtk_common.fib_index = fib_index;
- key->gtk_common.session_id = session_id;
-}
-
-static inline int
-gre_match_key4 (const gre_tunnel_key4_t * key1,
- const gre_tunnel_key4_t * key2)
-{
- return ((key1->gtk_as_u64 == key2->gtk_as_u64) &&
- (key1->gtk_common.as_u64 == key2->gtk_common.as_u64));
-}
-
-static inline void
-gre_mk_key6 (const ip6_address_t * src,
- const ip6_address_t * dst,
- u32 fib_index,
- gre_tunnel_type_t ttype,
- tunnel_mode_t tmode, u16 session_id, gre_tunnel_key6_t * key)
-{
- key->gtk_src = *src;
- key->gtk_dst = *dst;
- key->gtk_common.type = ttype;
- key->gtk_common.mode = tmode;
- key->gtk_common.fib_index = fib_index;
- key->gtk_common.session_id = session_id;
-}
-
-static inline int
-gre_match_key6 (const gre_tunnel_key6_t * key1,
- const gre_tunnel_key6_t * key2)
-{
- return (ip6_address_is_equal (&key1->gtk_src, &key2->gtk_src) &&
- ip6_address_is_equal (&key1->gtk_dst, &key2->gtk_dst) &&
- (key1->gtk_common.as_u64 == key2->gtk_common.as_u64));
-}
-
-static inline void
-gre_mk_sn_key (const gre_tunnel_t * gt, gre_sn_key_t * key)
-{
- key->src = gt->tunnel_src;
- key->dst = gt->tunnel_dst.fp_addr;
- key->fib_index = gt->outer_fib_index;
-}
-
-#endif /* included_gre_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/gre/gre_api.c b/src/vnet/gre/gre_api.c
deleted file mode 100644
index 3b42c76fb12..00000000000
--- a/src/vnet/gre/gre_api.c
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- *------------------------------------------------------------------
- * gre_api.c - gre api
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <vnet/vnet.h>
-#include <vlibmemory/api.h>
-
-#include <vnet/interface.h>
-#include <vnet/api_errno.h>
-
-#include <vnet/gre/gre.h>
-#include <vnet/fib/fib_table.h>
-#include <vnet/tunnel/tunnel_types_api.h>
-#include <vnet/ip/ip_types_api.h>
-
-#include <vnet/gre/gre.api_enum.h>
-#include <vnet/gre/gre.api_types.h>
-
-#define REPLY_MSG_ID_BASE gre_main.msg_id_base
-#include <vlibapi/api_helper_macros.h>
-
-static int
-gre_tunnel_type_decode (vl_api_gre_tunnel_type_t in, gre_tunnel_type_t * out)
-{
- switch (in)
- {
-#define _(n, v) \
- case GRE_API_TUNNEL_TYPE_##n: \
- *out = GRE_TUNNEL_TYPE_##n; \
- return (0);
- foreach_gre_tunnel_type
-#undef _
- }
-
- return (VNET_API_ERROR_INVALID_VALUE);
-}
-
-static vl_api_gre_tunnel_type_t
-gre_tunnel_type_encode (gre_tunnel_type_t in)
-{
- vl_api_gre_tunnel_type_t out = GRE_API_TUNNEL_TYPE_L3;
-
- switch (in)
- {
-#define _(n, v) \
- case GRE_TUNNEL_TYPE_##n: \
- out = GRE_API_TUNNEL_TYPE_##n; \
- break;
- foreach_gre_tunnel_type
-#undef _
- }
-
- return (out);
-}
-
-static void vl_api_gre_tunnel_add_del_t_handler
- (vl_api_gre_tunnel_add_del_t * mp)
-{
- vnet_gre_tunnel_add_del_args_t _a = { }, *a = &_a;
- vl_api_gre_tunnel_add_del_reply_t *rmp;
- tunnel_encap_decap_flags_t flags;
- u32 sw_if_index = ~0;
- ip46_type_t itype[2];
- int rv = 0;
-
- itype[0] = ip_address_decode (&mp->tunnel.src, &a->src);
- itype[1] = ip_address_decode (&mp->tunnel.dst, &a->dst);
-
- if (itype[0] != itype[1])
- {
- rv = VNET_API_ERROR_INVALID_PROTOCOL;
- goto out;
- }
-
- if (ip46_address_is_equal (&a->src, &a->dst))
- {
- rv = VNET_API_ERROR_SAME_SRC_DST;
- goto out;
- }
-
- rv = gre_tunnel_type_decode (mp->tunnel.type, &a->type);
-
- if (rv)
- goto out;
-
- rv = tunnel_mode_decode (mp->tunnel.mode, &a->mode);
-
- if (rv)
- goto out;
-
- rv = tunnel_encap_decap_flags_decode (mp->tunnel.flags, &flags);
-
- if (rv)
- goto out;
-
- a->is_add = mp->is_add;
- a->is_ipv6 = (itype[0] == IP46_TYPE_IP6);
- a->instance = ntohl (mp->tunnel.instance);
- a->session_id = ntohs (mp->tunnel.session_id);
- a->outer_table_id = ntohl (mp->tunnel.outer_table_id);
- a->flags = flags;
-
- rv = vnet_gre_tunnel_add_del (a, &sw_if_index);
-
-out:
- /* *INDENT-OFF* */
- REPLY_MACRO2(VL_API_GRE_TUNNEL_ADD_DEL_REPLY,
- ({
- rmp->sw_if_index = ntohl (sw_if_index);
- }));
- /* *INDENT-ON* */
-}
-
-static void send_gre_tunnel_details
- (gre_tunnel_t * t, vl_api_gre_tunnel_dump_t * mp)
-{
- vl_api_gre_tunnel_details_t *rmp;
-
- /* *INDENT-OFF* */
- REPLY_MACRO_DETAILS2(VL_API_GRE_TUNNEL_DETAILS,
- ({
- ip_address_encode (&t->tunnel_src, IP46_TYPE_ANY, &rmp->tunnel.src);
- ip_address_encode (&t->tunnel_dst.fp_addr, IP46_TYPE_ANY, &rmp->tunnel.dst);
-
- rmp->tunnel.outer_table_id =
- htonl (fib_table_get_table_id
- (t->outer_fib_index, t->tunnel_dst.fp_proto));
-
- rmp->tunnel.type = gre_tunnel_type_encode (t->type);
- rmp->tunnel.mode = tunnel_mode_encode (t->mode);
- rmp->tunnel.instance = htonl (t->user_instance);
- rmp->tunnel.sw_if_index = htonl (t->sw_if_index);
- rmp->tunnel.session_id = htons (t->session_id);
- }));
- /* *INDENT-ON* */
-}
-
-static void
-vl_api_gre_tunnel_dump_t_handler (vl_api_gre_tunnel_dump_t * mp)
-{
- vl_api_registration_t *reg;
- gre_main_t *gm = &gre_main;
- gre_tunnel_t *t;
- u32 sw_if_index;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- sw_if_index = ntohl (mp->sw_if_index);
-
- if (~0 == sw_if_index)
- {
- /* *INDENT-OFF* */
- pool_foreach (t, gm->tunnels)
- {
- send_gre_tunnel_details(t, mp);
- }
- /* *INDENT-ON* */
- }
-
- else
- {
- if ((sw_if_index >= vec_len (gm->tunnel_index_by_sw_if_index)) ||
- (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index]))
- {
- return;
- }
- t = &gm->tunnels[gm->tunnel_index_by_sw_if_index[sw_if_index]];
- send_gre_tunnel_details (t, mp);
- }
-}
-
-/*
- * gre_api_hookup
- * Add vpe's API message handlers to the table.
- * vlib has already mapped shared memory and
- * added the client registration handlers.
- * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
- */
-/* API definitions */
-#include <vnet/format_fns.h>
-#include <vnet/gre/gre.api.c>
-
-static clib_error_t *
-gre_api_hookup (vlib_main_t * vm)
-{
- /*
- * Set up the (msg_name, crc, message-id) table
- */
- gre_main.msg_id_base = setup_message_id_table ();
-
- return 0;
-}
-
-VLIB_API_INIT_FUNCTION (gre_api_hookup);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/gre/interface.c b/src/vnet/gre/interface.c
deleted file mode 100644
index 0251ced598f..00000000000
--- a/src/vnet/gre/interface.c
+++ /dev/null
@@ -1,840 +0,0 @@
-/*
- * gre_interface.c: gre interfaces
- *
- * Copyright (c) 2012 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vnet/vnet.h>
-#include <vnet/gre/gre.h>
-#include <vnet/ip/format.h>
-#include <vnet/fib/fib_table.h>
-#include <vnet/adj/adj_midchain.h>
-#include <vnet/adj/adj_nbr.h>
-#include <vnet/mpls/mpls.h>
-#include <vnet/l2/l2_input.h>
-#include <vnet/teib/teib.h>
-
-u8 *
-format_gre_tunnel_type (u8 * s, va_list * args)
-{
- gre_tunnel_type_t type = va_arg (*args, int);
-
- switch (type)
- {
-#define _(n, v) case GRE_TUNNEL_TYPE_##n: \
- s = format (s, "%s", v); \
- break;
- foreach_gre_tunnel_type
-#undef _
- }
-
- return (s);
-}
-
-static u8 *
-format_gre_tunnel (u8 * s, va_list * args)
-{
- gre_tunnel_t *t = va_arg (*args, gre_tunnel_t *);
-
- s = format (s, "[%d] instance %d src %U dst %U fib-idx %d sw-if-idx %d ",
- t->dev_instance, t->user_instance,
- format_ip46_address, &t->tunnel_src, IP46_TYPE_ANY,
- format_ip46_address, &t->tunnel_dst.fp_addr, IP46_TYPE_ANY,
- t->outer_fib_index, t->sw_if_index);
-
- s = format (s, "payload %U ", format_gre_tunnel_type, t->type);
- s = format (s, "%U ", format_tunnel_mode, t->mode);
-
- if (t->type == GRE_TUNNEL_TYPE_ERSPAN)
- s = format (s, "session %d ", t->session_id);
-
- if (t->type != GRE_TUNNEL_TYPE_L3)
- s = format (s, "l2-adj-idx %d ", t->l2_adj_index);
-
- return s;
-}
-
-static gre_tunnel_t *
-gre_tunnel_db_find (const vnet_gre_tunnel_add_del_args_t * a,
- u32 outer_fib_index, gre_tunnel_key_t * key)
-{
- gre_main_t *gm = &gre_main;
- uword *p;
-
- if (!a->is_ipv6)
- {
- gre_mk_key4 (a->src.ip4, a->dst.ip4, outer_fib_index,
- a->type, a->mode, a->session_id, &key->gtk_v4);
- p = hash_get_mem (gm->tunnel_by_key4, &key->gtk_v4);
- }
- else
- {
- gre_mk_key6 (&a->src.ip6, &a->dst.ip6, outer_fib_index,
- a->type, a->mode, a->session_id, &key->gtk_v6);
- p = hash_get_mem (gm->tunnel_by_key6, &key->gtk_v6);
- }
-
- if (NULL == p)
- return (NULL);
-
- return (pool_elt_at_index (gm->tunnels, p[0]));
-}
-
-static void
-gre_tunnel_db_add (gre_tunnel_t * t, gre_tunnel_key_t * key)
-{
- gre_main_t *gm = &gre_main;
-
- if (t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6)
- {
- hash_set_mem_alloc (&gm->tunnel_by_key6, &key->gtk_v6, t->dev_instance);
- }
- else
- {
- hash_set_mem_alloc (&gm->tunnel_by_key4, &key->gtk_v4, t->dev_instance);
- }
-}
-
-static void
-gre_tunnel_db_remove (gre_tunnel_t * t, gre_tunnel_key_t * key)
-{
- gre_main_t *gm = &gre_main;
-
- if (t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6)
- {
- hash_unset_mem_free (&gm->tunnel_by_key6, &key->gtk_v6);
- }
- else
- {
- hash_unset_mem_free (&gm->tunnel_by_key4, &key->gtk_v4);
- }
-}
-
-/**
- * gre_tunnel_stack
- *
- * 'stack' (resolve the recursion for) the tunnel's midchain adjacency
- */
-void
-gre_tunnel_stack (adj_index_t ai)
-{
- gre_main_t *gm = &gre_main;
- ip_adjacency_t *adj;
- gre_tunnel_t *gt;
- u32 sw_if_index;
-
- adj = adj_get (ai);
- sw_if_index = adj->rewrite_header.sw_if_index;
-
- if ((vec_len (gm->tunnel_index_by_sw_if_index) <= sw_if_index) ||
- (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index]))
- return;
-
- gt = pool_elt_at_index (gm->tunnels,
- gm->tunnel_index_by_sw_if_index[sw_if_index]);
-
- if ((vnet_hw_interface_get_flags (vnet_get_main (), gt->hw_if_index) &
- VNET_HW_INTERFACE_FLAG_LINK_UP) == 0)
- {
- adj_midchain_delegate_unstack (ai);
- }
- else
- {
- adj_midchain_delegate_stack (ai, gt->outer_fib_index, &gt->tunnel_dst);
- }
-}
-
-/**
- * mgre_tunnel_stack
- *
- * 'stack' (resolve the recursion for) the tunnel's midchain adjacency
- */
-static void
-mgre_tunnel_stack (adj_index_t ai)
-{
- gre_main_t *gm = &gre_main;
- const ip_adjacency_t *adj;
- const gre_tunnel_t *gt;
- u32 sw_if_index;
-
- adj = adj_get (ai);
- sw_if_index = adj->rewrite_header.sw_if_index;
-
- if ((vec_len (gm->tunnel_index_by_sw_if_index) <= sw_if_index) ||
- (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index]))
- return;
-
- gt = pool_elt_at_index (gm->tunnels,
- gm->tunnel_index_by_sw_if_index[sw_if_index]);
-
- if ((vnet_hw_interface_get_flags (vnet_get_main (), gt->hw_if_index) &
- VNET_HW_INTERFACE_FLAG_LINK_UP) == 0)
- {
- adj_midchain_delegate_unstack (ai);
- }
- else
- {
- const teib_entry_t *ne;
-
- ne = teib_entry_find_46 (sw_if_index, adj->ia_nh_proto,
- &adj->sub_type.nbr.next_hop);
- if (NULL != ne)
- teib_entry_adj_stack (ne, ai);
- }
-}
-
-/**
- * @brief Call back when restacking all adjacencies on a GRE interface
- */
-static adj_walk_rc_t
-gre_adj_walk_cb (adj_index_t ai, void *ctx)
-{
- gre_tunnel_stack (ai);
-
- return (ADJ_WALK_RC_CONTINUE);
-}
-static adj_walk_rc_t
-mgre_adj_walk_cb (adj_index_t ai, void *ctx)
-{
- mgre_tunnel_stack (ai);
-
- return (ADJ_WALK_RC_CONTINUE);
-}
-
-static void
-gre_tunnel_restack (gre_tunnel_t * gt)
-{
- fib_protocol_t proto;
-
- /*
- * walk all the adjacencies on th GRE interface and restack them
- */
- FOR_EACH_FIB_IP_PROTOCOL (proto)
- {
- switch (gt->mode)
- {
- case TUNNEL_MODE_P2P:
- adj_nbr_walk (gt->sw_if_index, proto, gre_adj_walk_cb, NULL);
- break;
- case TUNNEL_MODE_MP:
- adj_nbr_walk (gt->sw_if_index, proto, mgre_adj_walk_cb, NULL);
- break;
- }
- }
-}
-
-static void
-gre_teib_mk_key (const gre_tunnel_t * t,
- const teib_entry_t * ne, gre_tunnel_key_t * key)
-{
- const fib_prefix_t *nh;
-
- nh = teib_entry_get_nh (ne);
-
- /* construct the key using mode P2P so it can be found in the DP */
- if (FIB_PROTOCOL_IP4 == nh->fp_proto)
- gre_mk_key4 (t->tunnel_src.ip4,
- nh->fp_addr.ip4,
- teib_entry_get_fib_index (ne),
- t->type, TUNNEL_MODE_P2P, 0, &key->gtk_v4);
- else
- gre_mk_key6 (&t->tunnel_src.ip6,
- &nh->fp_addr.ip6,
- teib_entry_get_fib_index (ne),
- t->type, TUNNEL_MODE_P2P, 0, &key->gtk_v6);
-}
-
-/**
- * An TEIB entry has been added
- */
-static void
-gre_teib_entry_added (const teib_entry_t * ne)
-{
- gre_main_t *gm = &gre_main;
- const ip_address_t *nh;
- gre_tunnel_key_t key;
- gre_tunnel_t *t;
- u32 sw_if_index;
- u32 t_idx;
-
- sw_if_index = teib_entry_get_sw_if_index (ne);
- if (vec_len (gm->tunnel_index_by_sw_if_index) < sw_if_index)
- return;
-
- t_idx = gm->tunnel_index_by_sw_if_index[sw_if_index];
-
- if (INDEX_INVALID == t_idx)
- return;
-
- /* entry has been added on an interface for which there is a GRE tunnel */
- t = pool_elt_at_index (gm->tunnels, t_idx);
-
- if (t->mode != TUNNEL_MODE_MP)
- return;
-
- /* the next-hop (underlay) of the NHRP entry will form part of the key for
- * ingress lookup to match packets to this interface */
- gre_teib_mk_key (t, ne, &key);
- gre_tunnel_db_add (t, &key);
-
- /* update the rewrites for each of the adjacencies for this peer (overlay)
- * using the next-hop (underlay) */
- mgre_walk_ctx_t ctx = {
- .t = t,
- .ne = ne
- };
- nh = teib_entry_get_peer (ne);
- adj_nbr_walk_nh (teib_entry_get_sw_if_index (ne),
- (AF_IP4 == ip_addr_version (nh) ?
- FIB_PROTOCOL_IP4 :
- FIB_PROTOCOL_IP6),
- &ip_addr_46 (nh), mgre_mk_complete_walk, &ctx);
-}
-
-static void
-gre_teib_entry_deleted (const teib_entry_t * ne)
-{
- gre_main_t *gm = &gre_main;
- const ip_address_t *nh;
- gre_tunnel_key_t key;
- gre_tunnel_t *t;
- u32 sw_if_index;
- u32 t_idx;
-
- sw_if_index = teib_entry_get_sw_if_index (ne);
- if (vec_len (gm->tunnel_index_by_sw_if_index) < sw_if_index)
- return;
-
- t_idx = gm->tunnel_index_by_sw_if_index[sw_if_index];
-
- if (INDEX_INVALID == t_idx)
- return;
-
- t = pool_elt_at_index (gm->tunnels, t_idx);
-
- /* remove the next-hop as an ingress lookup key */
- gre_teib_mk_key (t, ne, &key);
- gre_tunnel_db_remove (t, &key);
-
- nh = teib_entry_get_peer (ne);
-
- /* make all the adjacencies incomplete */
- adj_nbr_walk_nh (teib_entry_get_sw_if_index (ne),
- (AF_IP4 == ip_addr_version (nh) ?
- FIB_PROTOCOL_IP4 :
- FIB_PROTOCOL_IP6),
- &ip_addr_46 (nh), mgre_mk_incomplete_walk, t);
-}
-
-static walk_rc_t
-gre_tunnel_delete_teib_walk (index_t nei, void *ctx)
-{
- gre_tunnel_t *t = ctx;
- gre_tunnel_key_t key;
-
- gre_teib_mk_key (t, teib_entry_get (nei), &key);
- gre_tunnel_db_remove (t, &key);
-
- return (WALK_CONTINUE);
-}
-
-static walk_rc_t
-gre_tunnel_add_teib_walk (index_t nei, void *ctx)
-{
- gre_tunnel_t *t = ctx;
- gre_tunnel_key_t key;
-
- gre_teib_mk_key (t, teib_entry_get (nei), &key);
- gre_tunnel_db_add (t, &key);
-
- return (WALK_CONTINUE);
-}
-
-static int
-vnet_gre_tunnel_add (vnet_gre_tunnel_add_del_args_t * a,
- u32 outer_fib_index, u32 * sw_if_indexp)
-{
- gre_main_t *gm = &gre_main;
- vnet_main_t *vnm = gm->vnet_main;
- gre_tunnel_t *t;
- vnet_hw_interface_t *hi;
- u32 hw_if_index, sw_if_index;
- clib_error_t *error;
- u8 is_ipv6 = a->is_ipv6;
- gre_tunnel_key_t key;
-
- t = gre_tunnel_db_find (a, outer_fib_index, &key);
- if (NULL != t)
- return VNET_API_ERROR_IF_ALREADY_EXISTS;
-
- pool_get_aligned (gm->tunnels, t, CLIB_CACHE_LINE_BYTES);
- clib_memset (t, 0, sizeof (*t));
-
- /* Reconcile the real dev_instance and a possible requested instance */
- u32 t_idx = t - gm->tunnels; /* tunnel index (or instance) */
- u32 u_idx = a->instance; /* user specified instance */
- if (u_idx == ~0)
- u_idx = t_idx;
- if (hash_get (gm->instance_used, u_idx))
- {
- pool_put (gm->tunnels, t);
- return VNET_API_ERROR_INSTANCE_IN_USE;
- }
- hash_set (gm->instance_used, u_idx, 1);
-
- t->dev_instance = t_idx; /* actual */
- t->user_instance = u_idx; /* name */
-
- t->type = a->type;
- t->mode = a->mode;
- t->flags = a->flags;
- if (t->type == GRE_TUNNEL_TYPE_ERSPAN)
- t->session_id = a->session_id;
-
- if (t->type == GRE_TUNNEL_TYPE_L3)
- {
- if (t->mode == TUNNEL_MODE_P2P)
- hw_if_index =
- vnet_register_interface (vnm, gre_device_class.index, t_idx,
- gre_hw_interface_class.index, t_idx);
- else
- hw_if_index =
- vnet_register_interface (vnm, gre_device_class.index, t_idx,
- mgre_hw_interface_class.index, t_idx);
- }
- else
- {
- /* Default MAC address (d00b:eed0:0000 + sw_if_index) */
- u8 address[6] =
- { 0xd0, 0x0b, 0xee, 0xd0, (u8) (t_idx >> 8), (u8) t_idx };
- error =
- ethernet_register_interface (vnm, gre_device_class.index, t_idx,
- address, &hw_if_index, 0);
- if (error)
- {
- clib_error_report (error);
- return VNET_API_ERROR_INVALID_REGISTRATION;
- }
- }
-
- /* Set GRE tunnel interface output node (not used for L3 payload) */
- if (GRE_TUNNEL_TYPE_ERSPAN == t->type)
- vnet_set_interface_output_node (vnm, hw_if_index,
- gre_erspan_encap_node.index);
- else
- vnet_set_interface_output_node (vnm, hw_if_index,
- gre_teb_encap_node.index);
-
- hi = vnet_get_hw_interface (vnm, hw_if_index);
- sw_if_index = hi->sw_if_index;
-
- t->hw_if_index = hw_if_index;
- t->outer_fib_index = outer_fib_index;
- t->sw_if_index = sw_if_index;
- t->l2_adj_index = ADJ_INDEX_INVALID;
-
- vec_validate_init_empty (gm->tunnel_index_by_sw_if_index, sw_if_index, ~0);
- gm->tunnel_index_by_sw_if_index[sw_if_index] = t_idx;
-
- if (!is_ipv6)
- {
- hi->min_packet_bytes =
- 64 + sizeof (gre_header_t) + sizeof (ip4_header_t);
- }
- else
- {
- hi->min_packet_bytes =
- 64 + sizeof (gre_header_t) + sizeof (ip6_header_t);
- }
-
- /* Standard default gre MTU. */
- vnet_sw_interface_set_mtu (vnm, sw_if_index, 9000);
-
- /*
- * source the FIB entry for the tunnel's destination
- * and become a child thereof. The tunnel will then get poked
- * when the forwarding for the entry updates, and the tunnel can
- * re-stack accordingly
- */
-
- clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src));
- t->tunnel_dst.fp_len = !is_ipv6 ? 32 : 128;
- t->tunnel_dst.fp_proto = !is_ipv6 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
- t->tunnel_dst.fp_addr = a->dst;
-
- gre_tunnel_db_add (t, &key);
-
- if (t->mode == TUNNEL_MODE_MP)
- teib_walk_itf (t->sw_if_index, gre_tunnel_add_teib_walk, t);
-
- if (t->type == GRE_TUNNEL_TYPE_ERSPAN)
- {
- gre_sn_key_t skey;
- gre_sn_t *gre_sn;
-
- gre_mk_sn_key (t, &skey);
- gre_sn = (gre_sn_t *) hash_get_mem (gm->seq_num_by_key, &skey);
- if (gre_sn != NULL)
- {
- gre_sn->ref_count++;
- t->gre_sn = gre_sn;
- }
- else
- {
- gre_sn = clib_mem_alloc (sizeof (gre_sn_t));
- gre_sn->seq_num = 0;
- gre_sn->ref_count = 1;
- t->gre_sn = gre_sn;
- hash_set_mem_alloc (&gm->seq_num_by_key, &skey, (uword) gre_sn);
- }
- }
-
- if (t->type != GRE_TUNNEL_TYPE_L3)
- {
- t->l2_adj_index = adj_nbr_add_or_lock
- (t->tunnel_dst.fp_proto, VNET_LINK_ETHERNET, &zero_addr, sw_if_index);
- gre_update_adj (vnm, t->sw_if_index, t->l2_adj_index);
- }
-
- if (sw_if_indexp)
- *sw_if_indexp = sw_if_index;
-
- /* register gre46-input nodes */
- ip4_register_protocol (IP_PROTOCOL_GRE, gre4_input_node.index);
- ip6_register_protocol (IP_PROTOCOL_GRE, gre6_input_node.index);
-
- return 0;
-}
-
-static int
-vnet_gre_tunnel_delete (vnet_gre_tunnel_add_del_args_t * a,
- u32 outer_fib_index, u32 * sw_if_indexp)
-{
- gre_main_t *gm = &gre_main;
- vnet_main_t *vnm = gm->vnet_main;
- gre_tunnel_t *t;
- gre_tunnel_key_t key;
- u32 sw_if_index;
-
- t = gre_tunnel_db_find (a, outer_fib_index, &key);
- if (NULL == t)
- return VNET_API_ERROR_NO_SUCH_ENTRY;
-
- if (t->mode == TUNNEL_MODE_MP)
- teib_walk_itf (t->sw_if_index, gre_tunnel_delete_teib_walk, t);
-
- sw_if_index = t->sw_if_index;
- vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */ );
-
- /* make sure tunnel is removed from l2 bd or xconnect */
- set_int_l2_mode (gm->vlib_main, vnm, MODE_L3, sw_if_index, 0,
- L2_BD_PORT_TYPE_NORMAL, 0, 0);
- gm->tunnel_index_by_sw_if_index[sw_if_index] = ~0;
-
- if (t->type == GRE_TUNNEL_TYPE_L3)
- vnet_delete_hw_interface (vnm, t->hw_if_index);
- else
- ethernet_delete_interface (vnm, t->hw_if_index);
-
- if (t->l2_adj_index != ADJ_INDEX_INVALID)
- {
- adj_midchain_delegate_unstack (t->l2_adj_index);
- adj_unlock (t->l2_adj_index);
- }
-
- ASSERT ((t->type != GRE_TUNNEL_TYPE_ERSPAN) || (t->gre_sn != NULL));
- if ((t->type == GRE_TUNNEL_TYPE_ERSPAN) && (t->gre_sn->ref_count-- == 1))
- {
- gre_sn_key_t skey;
- gre_mk_sn_key (t, &skey);
- hash_unset_mem_free (&gm->seq_num_by_key, &skey);
- clib_mem_free (t->gre_sn);
- }
-
- hash_unset (gm->instance_used, t->user_instance);
- gre_tunnel_db_remove (t, &key);
- pool_put (gm->tunnels, t);
-
- if (sw_if_indexp)
- *sw_if_indexp = sw_if_index;
-
- return 0;
-}
-
-int
-vnet_gre_tunnel_add_del (vnet_gre_tunnel_add_del_args_t * a,
- u32 * sw_if_indexp)
-{
- u32 outer_fib_index;
-
- outer_fib_index = fib_table_find ((a->is_ipv6 ?
- FIB_PROTOCOL_IP6 :
- FIB_PROTOCOL_IP4), a->outer_table_id);
-
- if (~0 == outer_fib_index)
- return VNET_API_ERROR_NO_SUCH_FIB;
-
- if (a->session_id > GTK_SESSION_ID_MAX)
- return VNET_API_ERROR_INVALID_SESSION_ID;
-
- if (a->mode == TUNNEL_MODE_MP && !ip46_address_is_zero (&a->dst))
- return (VNET_API_ERROR_INVALID_DST_ADDRESS);
-
- if (a->is_add)
- return (vnet_gre_tunnel_add (a, outer_fib_index, sw_if_indexp));
- else
- return (vnet_gre_tunnel_delete (a, outer_fib_index, sw_if_indexp));
-}
-
-clib_error_t *
-gre_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
-{
- gre_main_t *gm = &gre_main;
- vnet_hw_interface_t *hi;
- gre_tunnel_t *t;
- u32 ti;
-
- hi = vnet_get_hw_interface (vnm, hw_if_index);
-
- if (NULL == gm->tunnel_index_by_sw_if_index ||
- hi->sw_if_index >= vec_len (gm->tunnel_index_by_sw_if_index))
- return (NULL);
-
- ti = gm->tunnel_index_by_sw_if_index[hi->sw_if_index];
-
- if (~0 == ti)
- /* not one of ours */
- return (NULL);
-
- t = pool_elt_at_index (gm->tunnels, ti);
-
- if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
- vnet_hw_interface_set_flags (vnm, hw_if_index,
- VNET_HW_INTERFACE_FLAG_LINK_UP);
- else
- vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */ );
-
- gre_tunnel_restack (t);
-
- return /* no error */ 0;
-}
-
-static clib_error_t *
-create_gre_tunnel_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- vnet_gre_tunnel_add_del_args_t _a, *a = &_a;
- ip46_address_t src = ip46_address_initializer, dst =
- ip46_address_initializer;
- u32 instance = ~0;
- u32 outer_table_id = 0;
- gre_tunnel_type_t t_type = GRE_TUNNEL_TYPE_L3;
- tunnel_mode_t t_mode = TUNNEL_MODE_P2P;
- tunnel_encap_decap_flags_t flags = TUNNEL_ENCAP_DECAP_FLAG_NONE;
- u32 session_id = 0;
- int rv;
- u8 is_add = 1;
- u32 sw_if_index;
- clib_error_t *error = NULL;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "del"))
- is_add = 0;
- else if (unformat (line_input, "instance %d", &instance))
- ;
- else if (unformat (line_input, "src %U", unformat_ip46_address, &src))
- ;
- else if (unformat (line_input, "dst %U", unformat_ip46_address, &dst))
- ;
- else if (unformat (line_input, "outer-table-id %d", &outer_table_id))
- ;
- else if (unformat (line_input, "multipoint"))
- t_mode = TUNNEL_MODE_MP;
- else if (unformat (line_input, "teb"))
- t_type = GRE_TUNNEL_TYPE_TEB;
- else if (unformat (line_input, "erspan %d", &session_id))
- t_type = GRE_TUNNEL_TYPE_ERSPAN;
- else
- if (unformat
- (line_input, "flags %U", unformat_tunnel_encap_decap_flags,
- &flags))
- ;
- else
- {
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
- goto done;
- }
- }
-
- if (ip46_address_is_equal (&src, &dst))
- {
- error = clib_error_return (0, "src and dst are identical");
- goto done;
- }
-
- if (t_mode != TUNNEL_MODE_MP && ip46_address_is_zero (&dst))
- {
- error = clib_error_return (0, "destination address not specified");
- goto done;
- }
-
- if (ip46_address_is_zero (&src))
- {
- error = clib_error_return (0, "source address not specified");
- goto done;
- }
-
- if (ip46_address_is_ip4 (&src) != ip46_address_is_ip4 (&dst))
- {
- error =
- clib_error_return (0, "src and dst address must be the same AF");
- goto done;
- }
-
- clib_memset (a, 0, sizeof (*a));
- a->is_add = is_add;
- a->outer_table_id = outer_table_id;
- a->type = t_type;
- a->mode = t_mode;
- a->session_id = session_id;
- a->is_ipv6 = !ip46_address_is_ip4 (&src);
- a->instance = instance;
- a->flags = flags;
- clib_memcpy (&a->src, &src, sizeof (a->src));
- clib_memcpy (&a->dst, &dst, sizeof (a->dst));
-
- rv = vnet_gre_tunnel_add_del (a, &sw_if_index);
-
- switch (rv)
- {
- case 0:
- vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
- vnet_get_main (), sw_if_index);
- break;
- case VNET_API_ERROR_IF_ALREADY_EXISTS:
- error = clib_error_return (0, "GRE tunnel already exists...");
- goto done;
- case VNET_API_ERROR_NO_SUCH_FIB:
- error = clib_error_return (0, "outer table ID %d doesn't exist\n",
- outer_table_id);
- goto done;
- case VNET_API_ERROR_NO_SUCH_ENTRY:
- error = clib_error_return (0, "GRE tunnel doesn't exist");
- goto done;
- case VNET_API_ERROR_INVALID_SESSION_ID:
- error = clib_error_return (0, "session ID %d out of range\n",
- session_id);
- goto done;
- case VNET_API_ERROR_INSTANCE_IN_USE:
- error = clib_error_return (0, "Instance is in use");
- goto done;
- default:
- error =
- clib_error_return (0, "vnet_gre_tunnel_add_del returned %d", rv);
- goto done;
- }
-
-done:
- unformat_free (line_input);
-
- return error;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (create_gre_tunnel_command, static) = {
- .path = "create gre tunnel",
- .short_help = "create gre tunnel src <addr> dst <addr> [instance <n>] "
- "[outer-fib-id <fib>] [teb | erspan <session-id>] [del] "
- "[multipoint]",
- .function = create_gre_tunnel_command_fn,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-show_gre_tunnel_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- gre_main_t *gm = &gre_main;
- gre_tunnel_t *t;
- u32 ti = ~0;
-
- if (pool_elts (gm->tunnels) == 0)
- vlib_cli_output (vm, "No GRE tunnels configured...");
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "%d", &ti))
- ;
- else
- break;
- }
-
- if (~0 == ti)
- {
- /* *INDENT-OFF* */
- pool_foreach (t, gm->tunnels)
- {
- vlib_cli_output (vm, "%U", format_gre_tunnel, t);
- }
- /* *INDENT-ON* */
- }
- else
- {
- t = pool_elt_at_index (gm->tunnels, ti);
-
- vlib_cli_output (vm, "%U", format_gre_tunnel, t);
- }
-
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (show_gre_tunnel_command, static) = {
- .path = "show gre tunnel",
- .function = show_gre_tunnel_command_fn,
-};
-/* *INDENT-ON* */
-
-const static teib_vft_t gre_teib_vft = {
- .nv_added = gre_teib_entry_added,
- .nv_deleted = gre_teib_entry_deleted,
-};
-
-/* force inclusion from application's main.c */
-clib_error_t *
-gre_interface_init (vlib_main_t * vm)
-{
- teib_register (&gre_teib_vft);
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (gre_interface_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/gre/node.c b/src/vnet/gre/node.c
deleted file mode 100644
index fdd3118bf3c..00000000000
--- a/src/vnet/gre/node.c
+++ /dev/null
@@ -1,598 +0,0 @@
-/*
- * node.c: gre packet processing
- *
- * Copyright (c) 2012 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vlib/vlib.h>
-#include <vnet/pg/pg.h>
-#include <vnet/gre/gre.h>
-#include <vnet/mpls/mpls.h>
-#include <vppinfra/sparse_vec.h>
-
-#define foreach_gre_input_next \
-_(PUNT, "error-punt") \
-_(DROP, "error-drop") \
-_(ETHERNET_INPUT, "ethernet-input") \
-_(IP4_INPUT, "ip4-input") \
-_(IP6_INPUT, "ip6-input") \
-_(MPLS_INPUT, "mpls-input")
-
-typedef enum
-{
-#define _(s,n) GRE_INPUT_NEXT_##s,
- foreach_gre_input_next
-#undef _
- GRE_INPUT_N_NEXT,
-} gre_input_next_t;
-
-typedef struct
-{
- u32 tunnel_id;
- u32 length;
- ip46_address_t src;
- ip46_address_t dst;
-} gre_rx_trace_t;
-
-extern u8 *format_gre_rx_trace (u8 * s, va_list * args);
-
-#ifndef CLIB_MARCH_VARIANT
-u8 *
-format_gre_rx_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- gre_rx_trace_t *t = va_arg (*args, gre_rx_trace_t *);
-
- s = format (s, "GRE: tunnel %d len %d src %U dst %U",
- t->tunnel_id, clib_net_to_host_u16 (t->length),
- format_ip46_address, &t->src, IP46_TYPE_ANY,
- format_ip46_address, &t->dst, IP46_TYPE_ANY);
- return s;
-}
-#endif /* CLIB_MARCH_VARIANT */
-
-typedef struct
-{
- /* Sparse vector mapping gre protocol in network byte order
- to next index. */
- u16 *next_by_protocol;
-} gre_input_runtime_t;
-
-always_inline void
-gre_trace (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t * b,
- u32 tun_sw_if_index, const ip6_header_t * ip6,
- const ip4_header_t * ip4, int is_ipv6)
-{
- gre_rx_trace_t *tr = vlib_add_trace (vm, node,
- b, sizeof (*tr));
- tr->tunnel_id = tun_sw_if_index;
- if (is_ipv6)
- {
- tr->length = ip6->payload_length;
- tr->src.ip6.as_u64[0] = ip6->src_address.as_u64[0];
- tr->src.ip6.as_u64[1] = ip6->src_address.as_u64[1];
- tr->dst.ip6.as_u64[0] = ip6->dst_address.as_u64[0];
- tr->dst.ip6.as_u64[1] = ip6->dst_address.as_u64[1];
- }
- else
- {
- tr->length = ip4->length;
- tr->src.as_u64[0] = tr->src.as_u64[1] = 0;
- tr->dst.as_u64[0] = tr->dst.as_u64[1] = 0;
- tr->src.ip4.as_u32 = ip4->src_address.as_u32;
- tr->dst.ip4.as_u32 = ip4->dst_address.as_u32;
- }
-}
-
-always_inline void
-gre_tunnel_get (const gre_main_t * gm, vlib_node_runtime_t * node,
- vlib_buffer_t * b, u16 * next, const gre_tunnel_key_t * key,
- gre_tunnel_key_t * cached_key, u32 * tun_sw_if_index,
- u32 * cached_tun_sw_if_index, int is_ipv6)
-{
- const uword *p;
- p = is_ipv6 ? hash_get_mem (gm->tunnel_by_key6, &key->gtk_v6)
- : hash_get_mem (gm->tunnel_by_key4, &key->gtk_v4);
- if (PREDICT_FALSE (!p))
- {
- *next = GRE_INPUT_NEXT_DROP;
- b->error = node->errors[GRE_ERROR_NO_SUCH_TUNNEL];
- *tun_sw_if_index = ~0;
- }
- else
- {
- const gre_tunnel_t *tun;
- tun = pool_elt_at_index (gm->tunnels, *p);
- *cached_tun_sw_if_index = *tun_sw_if_index = tun->sw_if_index;
- if (is_ipv6)
- cached_key->gtk_v6 = key->gtk_v6;
- else
- cached_key->gtk_v4 = key->gtk_v4;
- }
-}
-
-always_inline uword
-gre_input (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame,
- const int is_ipv6)
-{
- gre_main_t *gm = &gre_main;
- u32 *from, n_left_from;
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
- u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
- u16 cached_protocol = ~0;
- u32 cached_next_index = SPARSE_VEC_INVALID_INDEX;
- u32 cached_tun_sw_if_index = ~0;
- gre_tunnel_key_t cached_key;
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- vlib_get_buffers (vm, from, bufs, n_left_from);
-
- if (is_ipv6)
- clib_memset (&cached_key.gtk_v6, 0xff, sizeof (cached_key.gtk_v6));
- else
- clib_memset (&cached_key.gtk_v4, 0xff, sizeof (cached_key.gtk_v4));
-
- while (n_left_from >= 2)
- {
- const ip6_header_t *ip6[2];
- const ip4_header_t *ip4[2];
- const gre_header_t *gre[2];
- u32 nidx[2];
- next_info_t ni[2];
- u8 type[2];
- u16 version[2];
- u32 len[2];
- gre_tunnel_key_t key[2];
- u8 matched[2];
- u32 tun_sw_if_index[2];
-
- if (PREDICT_TRUE (n_left_from >= 6))
- {
- vlib_prefetch_buffer_data (b[2], LOAD);
- vlib_prefetch_buffer_data (b[3], LOAD);
- vlib_prefetch_buffer_header (b[4], STORE);
- vlib_prefetch_buffer_header (b[5], STORE);
- }
-
- if (is_ipv6)
- {
- /* ip6_local hands us the ip header, not the gre header */
- ip6[0] = vlib_buffer_get_current (b[0]);
- ip6[1] = vlib_buffer_get_current (b[1]);
- gre[0] = (void *) (ip6[0] + 1);
- gre[1] = (void *) (ip6[1] + 1);
- vlib_buffer_advance (b[0], sizeof (*ip6[0]) + sizeof (*gre[0]));
- vlib_buffer_advance (b[1], sizeof (*ip6[0]) + sizeof (*gre[0]));
- }
- else
- {
- /* ip4_local hands us the ip header, not the gre header */
- ip4[0] = vlib_buffer_get_current (b[0]);
- ip4[1] = vlib_buffer_get_current (b[1]);
- gre[0] = (void *) (ip4[0] + 1);
- gre[1] = (void *) (ip4[1] + 1);
- vlib_buffer_advance (b[0], sizeof (*ip4[0]) + sizeof (*gre[0]));
- vlib_buffer_advance (b[1], sizeof (*ip4[0]) + sizeof (*gre[0]));
- }
-
- if (PREDICT_TRUE (cached_protocol == gre[0]->protocol))
- {
- nidx[0] = cached_next_index;
- }
- else
- {
- cached_next_index = nidx[0] =
- sparse_vec_index (gm->next_by_protocol, gre[0]->protocol);
- cached_protocol = gre[0]->protocol;
- }
- if (PREDICT_TRUE (cached_protocol == gre[1]->protocol))
- {
- nidx[1] = cached_next_index;
- }
- else
- {
- cached_next_index = nidx[1] =
- sparse_vec_index (gm->next_by_protocol, gre[1]->protocol);
- cached_protocol = gre[1]->protocol;
- }
-
- ni[0] = vec_elt (gm->next_by_protocol, nidx[0]);
- ni[1] = vec_elt (gm->next_by_protocol, nidx[1]);
- next[0] = ni[0].next_index;
- next[1] = ni[1].next_index;
- type[0] = ni[0].tunnel_type;
- type[1] = ni[1].tunnel_type;
-
- b[0]->error = nidx[0] == SPARSE_VEC_INVALID_INDEX
- ? node->errors[GRE_ERROR_UNKNOWN_PROTOCOL]
- : node->errors[GRE_ERROR_NONE];
- b[1]->error = nidx[1] == SPARSE_VEC_INVALID_INDEX
- ? node->errors[GRE_ERROR_UNKNOWN_PROTOCOL]
- : node->errors[GRE_ERROR_NONE];
-
- version[0] = clib_net_to_host_u16 (gre[0]->flags_and_version);
- version[1] = clib_net_to_host_u16 (gre[1]->flags_and_version);
- version[0] &= GRE_VERSION_MASK;
- version[1] &= GRE_VERSION_MASK;
-
- b[0]->error = version[0]
- ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] : b[0]->error;
- next[0] = version[0] ? GRE_INPUT_NEXT_DROP : next[0];
- b[1]->error = version[1]
- ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] : b[1]->error;
- next[1] = version[1] ? GRE_INPUT_NEXT_DROP : next[1];
-
- len[0] = vlib_buffer_length_in_chain (vm, b[0]);
- len[1] = vlib_buffer_length_in_chain (vm, b[1]);
-
- /* always search for P2P types in the DP */
- if (is_ipv6)
- {
- gre_mk_key6 (&ip6[0]->dst_address,
- &ip6[0]->src_address,
- vnet_buffer (b[0])->ip.fib_index,
- type[0], TUNNEL_MODE_P2P, 0, &key[0].gtk_v6);
- gre_mk_key6 (&ip6[1]->dst_address,
- &ip6[1]->src_address,
- vnet_buffer (b[1])->ip.fib_index,
- type[1], TUNNEL_MODE_P2P, 0, &key[1].gtk_v6);
- matched[0] = gre_match_key6 (&cached_key.gtk_v6, &key[0].gtk_v6);
- matched[1] = gre_match_key6 (&cached_key.gtk_v6, &key[1].gtk_v6);
- }
- else
- {
- gre_mk_key4 (ip4[0]->dst_address,
- ip4[0]->src_address,
- vnet_buffer (b[0])->ip.fib_index,
- type[0], TUNNEL_MODE_P2P, 0, &key[0].gtk_v4);
- gre_mk_key4 (ip4[1]->dst_address,
- ip4[1]->src_address,
- vnet_buffer (b[1])->ip.fib_index,
- type[1], TUNNEL_MODE_P2P, 0, &key[1].gtk_v4);
- matched[0] = gre_match_key4 (&cached_key.gtk_v4, &key[0].gtk_v4);
- matched[1] = gre_match_key4 (&cached_key.gtk_v4, &key[1].gtk_v4);
- }
-
- tun_sw_if_index[0] = cached_tun_sw_if_index;
- tun_sw_if_index[1] = cached_tun_sw_if_index;
- if (PREDICT_FALSE (!matched[0]))
- gre_tunnel_get (gm, node, b[0], &next[0], &key[0], &cached_key,
- &tun_sw_if_index[0], &cached_tun_sw_if_index,
- is_ipv6);
- if (PREDICT_FALSE (!matched[1]))
- gre_tunnel_get (gm, node, b[1], &next[1], &key[1], &cached_key,
- &tun_sw_if_index[1], &cached_tun_sw_if_index,
- is_ipv6);
-
- if (PREDICT_TRUE (next[0] > GRE_INPUT_NEXT_DROP))
- {
- vlib_increment_combined_counter (&gm->vnet_main->
- interface_main.combined_sw_if_counters
- [VNET_INTERFACE_COUNTER_RX],
- vm->thread_index,
- tun_sw_if_index[0],
- 1 /* packets */ ,
- len[0] /* bytes */ );
- vnet_buffer (b[0])->sw_if_index[VLIB_RX] = tun_sw_if_index[0];
- }
- if (PREDICT_TRUE (next[1] > GRE_INPUT_NEXT_DROP))
- {
- vlib_increment_combined_counter (&gm->vnet_main->
- interface_main.combined_sw_if_counters
- [VNET_INTERFACE_COUNTER_RX],
- vm->thread_index,
- tun_sw_if_index[1],
- 1 /* packets */ ,
- len[1] /* bytes */ );
- vnet_buffer (b[1])->sw_if_index[VLIB_RX] = tun_sw_if_index[1];
- }
-
- vnet_buffer (b[0])->sw_if_index[VLIB_TX] = (u32) ~0;
- vnet_buffer (b[1])->sw_if_index[VLIB_TX] = (u32) ~0;
-
- if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
- gre_trace (vm, node, b[0], tun_sw_if_index[0], ip6[0], ip4[0],
- is_ipv6);
- if (PREDICT_FALSE (b[1]->flags & VLIB_BUFFER_IS_TRACED))
- gre_trace (vm, node, b[1], tun_sw_if_index[1], ip6[1], ip4[1],
- is_ipv6);
-
- b += 2;
- next += 2;
- n_left_from -= 2;
- }
-
- while (n_left_from >= 1)
- {
- const ip6_header_t *ip6[1];
- const ip4_header_t *ip4[1];
- const gre_header_t *gre[1];
- u32 nidx[1];
- next_info_t ni[1];
- u8 type[1];
- u16 version[1];
- u32 len[1];
- gre_tunnel_key_t key[1];
- u8 matched[1];
- u32 tun_sw_if_index[1];
-
- if (PREDICT_TRUE (n_left_from >= 3))
- {
- vlib_prefetch_buffer_data (b[1], LOAD);
- vlib_prefetch_buffer_header (b[2], STORE);
- }
-
- if (is_ipv6)
- {
- /* ip6_local hands us the ip header, not the gre header */
- ip6[0] = vlib_buffer_get_current (b[0]);
- gre[0] = (void *) (ip6[0] + 1);
- vlib_buffer_advance (b[0], sizeof (*ip6[0]) + sizeof (*gre[0]));
- }
- else
- {
- /* ip4_local hands us the ip header, not the gre header */
- ip4[0] = vlib_buffer_get_current (b[0]);
- gre[0] = (void *) (ip4[0] + 1);
- vlib_buffer_advance (b[0], sizeof (*ip4[0]) + sizeof (*gre[0]));
- }
-
- if (PREDICT_TRUE (cached_protocol == gre[0]->protocol))
- {
- nidx[0] = cached_next_index;
- }
- else
- {
- cached_next_index = nidx[0] =
- sparse_vec_index (gm->next_by_protocol, gre[0]->protocol);
- cached_protocol = gre[0]->protocol;
- }
-
- ni[0] = vec_elt (gm->next_by_protocol, nidx[0]);
- next[0] = ni[0].next_index;
- type[0] = ni[0].tunnel_type;
-
- b[0]->error = nidx[0] == SPARSE_VEC_INVALID_INDEX
- ? node->errors[GRE_ERROR_UNKNOWN_PROTOCOL]
- : node->errors[GRE_ERROR_NONE];
-
- version[0] = clib_net_to_host_u16 (gre[0]->flags_and_version);
- version[0] &= GRE_VERSION_MASK;
-
- b[0]->error = version[0]
- ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] : b[0]->error;
- next[0] = version[0] ? GRE_INPUT_NEXT_DROP : next[0];
-
- len[0] = vlib_buffer_length_in_chain (vm, b[0]);
-
- if (is_ipv6)
- {
- gre_mk_key6 (&ip6[0]->dst_address,
- &ip6[0]->src_address,
- vnet_buffer (b[0])->ip.fib_index,
- type[0], TUNNEL_MODE_P2P, 0, &key[0].gtk_v6);
- matched[0] = gre_match_key6 (&cached_key.gtk_v6, &key[0].gtk_v6);
- }
- else
- {
- gre_mk_key4 (ip4[0]->dst_address,
- ip4[0]->src_address,
- vnet_buffer (b[0])->ip.fib_index,
- type[0], TUNNEL_MODE_P2P, 0, &key[0].gtk_v4);
- matched[0] = gre_match_key4 (&cached_key.gtk_v4, &key[0].gtk_v4);
- }
-
- tun_sw_if_index[0] = cached_tun_sw_if_index;
- if (PREDICT_FALSE (!matched[0]))
- gre_tunnel_get (gm, node, b[0], &next[0], &key[0], &cached_key,
- &tun_sw_if_index[0], &cached_tun_sw_if_index,
- is_ipv6);
-
- if (PREDICT_TRUE (next[0] > GRE_INPUT_NEXT_DROP))
- {
- vlib_increment_combined_counter (&gm->vnet_main->
- interface_main.combined_sw_if_counters
- [VNET_INTERFACE_COUNTER_RX],
- vm->thread_index,
- tun_sw_if_index[0],
- 1 /* packets */ ,
- len[0] /* bytes */ );
- vnet_buffer (b[0])->sw_if_index[VLIB_RX] = tun_sw_if_index[0];
- }
-
- vnet_buffer (b[0])->sw_if_index[VLIB_TX] = (u32) ~0;
-
- if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
- gre_trace (vm, node, b[0], tun_sw_if_index[0], ip6[0], ip4[0],
- is_ipv6);
-
- b += 1;
- next += 1;
- n_left_from -= 1;
- }
-
- vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
-
- vlib_node_increment_counter (vm,
- is_ipv6 ? gre6_input_node.index :
- gre4_input_node.index, GRE_ERROR_PKTS_DECAP,
- n_left_from);
-
- return frame->n_vectors;
-}
-
-VLIB_NODE_FN (gre4_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return gre_input (vm, node, from_frame, /* is_ip6 */ 0);
-}
-
-VLIB_NODE_FN (gre6_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return gre_input (vm, node, from_frame, /* is_ip6 */ 1);
-}
-
-static char *gre_error_strings[] = {
-#define gre_error(n,s) s,
-#include "error.def"
-#undef gre_error
-};
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (gre4_input_node) = {
- .name = "gre4-input",
- /* Takes a vector of packets. */
- .vector_size = sizeof (u32),
-
- .n_errors = GRE_N_ERROR,
- .error_strings = gre_error_strings,
-
- .n_next_nodes = GRE_INPUT_N_NEXT,
- .next_nodes = {
-#define _(s,n) [GRE_INPUT_NEXT_##s] = n,
- foreach_gre_input_next
-#undef _
- },
-
- .format_buffer = format_gre_header_with_length,
- .format_trace = format_gre_rx_trace,
- .unformat_buffer = unformat_gre_header,
-};
-
-VLIB_REGISTER_NODE (gre6_input_node) = {
- .name = "gre6-input",
- /* Takes a vector of packets. */
- .vector_size = sizeof (u32),
-
- .runtime_data_bytes = sizeof (gre_input_runtime_t),
-
- .n_errors = GRE_N_ERROR,
- .error_strings = gre_error_strings,
-
- .n_next_nodes = GRE_INPUT_N_NEXT,
- .next_nodes = {
-#define _(s,n) [GRE_INPUT_NEXT_##s] = n,
- foreach_gre_input_next
-#undef _
- },
-
- .format_buffer = format_gre_header_with_length,
- .format_trace = format_gre_rx_trace,
- .unformat_buffer = unformat_gre_header,
-};
-/* *INDENT-ON* */
-
-#ifndef CLIB_MARCH_VARIANT
-void
-gre_register_input_protocol (vlib_main_t * vm,
- gre_protocol_t protocol, u32 node_index,
- gre_tunnel_type_t tunnel_type)
-{
- gre_main_t *em = &gre_main;
- gre_protocol_info_t *pi;
- next_info_t *n;
- u32 i;
-
- {
- clib_error_t *error = vlib_call_init_function (vm, gre_input_init);
- if (error)
- clib_error_report (error);
- }
-
- pi = gre_get_protocol_info (em, protocol);
- pi->node_index = node_index;
- pi->tunnel_type = tunnel_type;
- pi->next_index = vlib_node_add_next (vm, gre4_input_node.index, node_index);
- i = vlib_node_add_next (vm, gre6_input_node.index, node_index);
- ASSERT (i == pi->next_index);
-
- /* Setup gre protocol -> next index sparse vector mapping. */
- n = sparse_vec_validate (em->next_by_protocol,
- clib_host_to_net_u16 (protocol));
- n->next_index = pi->next_index;
- n->tunnel_type = tunnel_type;
-}
-
-static void
-gre_setup_node (vlib_main_t * vm, u32 node_index)
-{
- vlib_node_t *n = vlib_get_node (vm, node_index);
- pg_node_t *pn = pg_get_node (node_index);
-
- n->format_buffer = format_gre_header_with_length;
- n->unformat_buffer = unformat_gre_header;
- pn->unformat_edit = unformat_pg_gre_header;
-}
-
-static clib_error_t *
-gre_input_init (vlib_main_t * vm)
-{
- gre_main_t *gm = &gre_main;
- vlib_node_t *ethernet_input, *ip4_input, *ip6_input, *mpls_unicast_input;
-
- {
- clib_error_t *error;
- error = vlib_call_init_function (vm, gre_init);
- if (error)
- clib_error_report (error);
- }
-
- gre_setup_node (vm, gre4_input_node.index);
- gre_setup_node (vm, gre6_input_node.index);
-
- gm->next_by_protocol = sparse_vec_new
- ( /* elt bytes */ sizeof (gm->next_by_protocol[0]),
- /* bits in index */ BITS (((gre_header_t *) 0)->protocol));
-
- /* These could be moved to the supported protocol input node defn's */
- ethernet_input = vlib_get_node_by_name (vm, (u8 *) "ethernet-input");
- ASSERT (ethernet_input);
- ip4_input = vlib_get_node_by_name (vm, (u8 *) "ip4-input");
- ASSERT (ip4_input);
- ip6_input = vlib_get_node_by_name (vm, (u8 *) "ip6-input");
- ASSERT (ip6_input);
- mpls_unicast_input = vlib_get_node_by_name (vm, (u8 *) "mpls-input");
- ASSERT (mpls_unicast_input);
-
- gre_register_input_protocol (vm, GRE_PROTOCOL_teb,
- ethernet_input->index, GRE_TUNNEL_TYPE_TEB);
-
- gre_register_input_protocol (vm, GRE_PROTOCOL_ip4,
- ip4_input->index, GRE_TUNNEL_TYPE_L3);
-
- gre_register_input_protocol (vm, GRE_PROTOCOL_ip6,
- ip6_input->index, GRE_TUNNEL_TYPE_L3);
-
- gre_register_input_protocol (vm, GRE_PROTOCOL_mpls_unicast,
- mpls_unicast_input->index, GRE_TUNNEL_TYPE_L3);
-
- return 0;
-}
-
-VLIB_INIT_FUNCTION (gre_input_init);
-
-#endif /* CLIB_MARCH_VARIANT */
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/gre/packet.h b/src/vnet/gre/packet.h
index bbd67d565c5..bbda2df3f68 100644
--- a/src/vnet/gre/packet.h
+++ b/src/vnet/gre/packet.h
@@ -138,7 +138,6 @@ typedef struct
This field is platform dependent.
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u32 seq_num;
union
@@ -158,7 +157,6 @@ typedef CLIB_PACKED (struct {
erspan_t2_t erspan;
}) erspan_t2_header_t;
-/* *INDENT-ON* */
/* u64 template for ERSPAN type 2 header with both EN bits set */
#define ERSPAN_HDR2 0x1000180000000000ul
diff --git a/src/vnet/gre/pg.c b/src/vnet/gre/pg.c
deleted file mode 100644
index 38a3a07ebad..00000000000
--- a/src/vnet/gre/pg.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * hdlc_pg.c: packet generator gre interface
- *
- * Copyright (c) 2012 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vlib/vlib.h>
-#include <vnet/pg/pg.h>
-#include <vnet/gre/gre.h>
-
-typedef struct
-{
- pg_edit_t flags_and_version;
- pg_edit_t protocol;
-} pg_gre_header_t;
-
-static inline void
-pg_gre_header_init (pg_gre_header_t * e)
-{
- pg_edit_init (&e->flags_and_version, gre_header_t, flags_and_version);
- pg_edit_init (&e->protocol, gre_header_t, protocol);
-}
-
-uword
-unformat_pg_gre_header (unformat_input_t * input, va_list * args)
-{
- pg_stream_t *s = va_arg (*args, pg_stream_t *);
- pg_gre_header_t *h;
- u32 group_index, error;
-
- h = pg_create_edit_group (s, sizeof (h[0]), sizeof (gre_header_t),
- &group_index);
- pg_gre_header_init (h);
-
- pg_edit_set_fixed (&h->flags_and_version, 0);
-
- error = 1;
- if (!unformat (input, "%U",
- unformat_pg_edit,
- unformat_gre_protocol_net_byte_order, &h->protocol))
- goto done;
-
- {
- gre_main_t *pm = &gre_main;
- gre_protocol_info_t *pi = 0;
- pg_node_t *pg_node = 0;
-
- if (h->protocol.type == PG_EDIT_FIXED)
- {
- u16 t = *(u16 *) h->protocol.values[PG_EDIT_LO];
- pi = gre_get_protocol_info (pm, clib_net_to_host_u16 (t));
- if (pi && pi->node_index != ~0)
- pg_node = pg_get_node (pi->node_index);
- }
-
- if (pg_node && pg_node->unformat_edit
- && unformat_user (input, pg_node->unformat_edit, s))
- ;
- }
-
- error = 0;
-done:
- if (error)
- pg_free_edit_group (s);
- return error == 0;
-}
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/gso/FEATURE.yaml b/src/vnet/gso/FEATURE.yaml
index d3db0cc23e3..5f6275caca2 100644
--- a/src/vnet/gso/FEATURE.yaml
+++ b/src/vnet/gso/FEATURE.yaml
@@ -1,6 +1,6 @@
---
name: VNET GSO
-maintainer: ayourtch@gmail.com sykazmi@cisco.com
+maintainer: ayourtch@gmail.com mohsin.kazmi14@gmail.com
features:
- Basic GSO support
- GSO for VLAN tagged packets
diff --git a/src/vnet/gso/cli.c b/src/vnet/gso/cli.c
index 060ce812fad..11dbaad728f 100644
--- a/src/vnet/gso/cli.c
+++ b/src/vnet/gso/cli.c
@@ -76,13 +76,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_feature_gso_command, static) = {
.path = "set interface feature gso",
.short_help = "set interface feature gso <intfc> [enable | disable]",
.function = set_interface_feature_gso_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/gso/gro_func.h b/src/vnet/gso/gro_func.h
index c9464bdc063..e2e4e93850b 100644
--- a/src/vnet/gso/gro_func.h
+++ b/src/vnet/gso/gro_func.h
@@ -25,6 +25,7 @@
#include <vnet/udp/udp_packet.h>
#include <vnet/tcp/tcp_packet.h>
#include <vnet/vnet.h>
+#include <vnet/interface.h>
#define GRO_MIN_PACKET_SIZE 256
#define GRO_PADDED_PACKET_SIZE 64
@@ -383,6 +384,7 @@ gro_fixup_header (vlib_main_t *vm, vlib_buffer_t *b0, u32 ack_number, u8 is_l2)
1 /* is_ip6 */ );
vnet_buffer2 (b0)->gso_size = b0->current_length - gho0.hdr_sz;
+ vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
if (gho0.gho_flags & GHO_F_IP4)
{
@@ -391,6 +393,7 @@ gro_fixup_header (vlib_main_t *vm, vlib_buffer_t *b0, u32 ack_number, u8 is_l2)
ip4->length =
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
gho0.l3_hdr_offset);
+ vnet_buffer (b0)->l3_hdr_offset = (u8 *) ip4 - b0->data;
b0->flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP4);
vnet_buffer_offload_flags_set (b0, (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM |
VNET_BUFFER_OFFLOAD_F_IP_CKSUM));
@@ -402,12 +405,15 @@ gro_fixup_header (vlib_main_t *vm, vlib_buffer_t *b0, u32 ack_number, u8 is_l2)
ip6->payload_length =
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
gho0.l4_hdr_offset);
+ vnet_buffer (b0)->l3_hdr_offset = (u8 *) ip6 - b0->data;
b0->flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP6);
vnet_buffer_offload_flags_set (b0, VNET_BUFFER_OFFLOAD_F_TCP_CKSUM);
}
tcp_header_t *tcp0 =
(tcp_header_t *) (vlib_buffer_get_current (b0) + gho0.l4_hdr_offset);
+ vnet_buffer (b0)->l4_hdr_offset = (u8 *) tcp0 - b0->data;
+ vnet_buffer2 (b0)->gso_l4_hdr_sz = tcp_header_bytes (tcp0);
tcp0->ack_number = ack_number;
b0->flags &= ~VLIB_BUFFER_IS_TRACED;
}
@@ -444,9 +450,9 @@ vnet_gro_flow_table_flush (vlib_main_t * vm, gro_flow_table_t * flow_table,
}
static_always_inline void
-vnet_gro_flow_table_schedule_node_on_dispatcher (vlib_main_t * vm,
- gro_flow_table_t *
- flow_table)
+vnet_gro_flow_table_schedule_node_on_dispatcher (vlib_main_t *vm,
+ vnet_hw_if_tx_queue_t *txq,
+ gro_flow_table_t *flow_table)
{
if (gro_flow_table_is_timeout (vm, flow_table))
{
@@ -457,9 +463,13 @@ vnet_gro_flow_table_schedule_node_on_dispatcher (vlib_main_t * vm,
{
u32 node_index = flow_table->node_index;
vlib_frame_t *f = vlib_get_frame_to_node (vm, node_index);
+ vnet_hw_if_tx_frame_t *ft = vlib_frame_scalar_args (f);
u32 *f_to = vlib_frame_vector_args (f);
u32 i = 0;
+ ft->shared_queue = txq->shared_queue;
+ ft->queue_id = txq->queue_id;
+
while (i < n_to)
{
f_to[f->n_vectors] = to[i];
diff --git a/src/vnet/gso/gso.h b/src/vnet/gso/gso.h
index 926ce634fd0..dee5da5c70b 100644
--- a/src/vnet/gso/gso.h
+++ b/src/vnet/gso/gso.h
@@ -18,6 +18,7 @@
#include <vnet/vnet.h>
#include <vnet/gso/hdr_offset_parser.h>
+#include <vnet/ip/ip_psh_cksum.h>
typedef struct
{
@@ -33,6 +34,274 @@ u32 gso_segment_buffer (vlib_main_t *vm, vnet_interface_per_thread_data_t *ptd,
u32 bi, vlib_buffer_t *b, generic_header_offset_t *gho,
u32 n_bytes_b, u8 is_l2, u8 is_ip6);
+static_always_inline void
+gso_init_bufs_from_template_base (vlib_buffer_t **bufs, vlib_buffer_t *b0,
+ u32 flags, u16 n_bufs, u16 hdr_sz)
+{
+ u32 i = n_bufs;
+ while (i >= 6)
+ {
+ /* prefetches */
+ CLIB_PREFETCH (bufs[2], 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (bufs[3], 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ vlib_prefetch_buffer_data (bufs[4], LOAD);
+ vlib_prefetch_buffer_data (bufs[5], LOAD);
+
+ /* copying objects from cacheline 0 */
+ bufs[0]->current_data = 0;
+ bufs[1]->current_data = 0;
+
+ bufs[0]->current_length = hdr_sz;
+ bufs[1]->current_length = hdr_sz;
+
+ bufs[0]->flags = bufs[1]->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID | flags;
+ bufs[0]->flow_id = bufs[1]->flow_id = b0->flow_id;
+ bufs[0]->error = bufs[1]->error = b0->error;
+ bufs[0]->current_config_index = bufs[1]->current_config_index =
+ b0->current_config_index;
+
+ clib_memcpy_fast (&bufs[0]->opaque, &b0->opaque, sizeof (b0->opaque));
+ clib_memcpy_fast (&bufs[1]->opaque, &b0->opaque, sizeof (b0->opaque));
+
+ /* copying objects from cacheline 1 */
+ bufs[0]->trace_handle = b0->trace_handle;
+ bufs[1]->trace_handle = b0->trace_handle;
+
+ bufs[0]->total_length_not_including_first_buffer = 0;
+ bufs[1]->total_length_not_including_first_buffer = 0;
+
+ clib_memcpy_fast (&bufs[0]->opaque2, &b0->opaque2, sizeof (b0->opaque2));
+ clib_memcpy_fast (&bufs[1]->opaque2, &b0->opaque2, sizeof (b0->opaque2));
+
+ /* copying data */
+ clib_memcpy_fast (bufs[0]->data, vlib_buffer_get_current (b0), hdr_sz);
+ clib_memcpy_fast (bufs[1]->data, vlib_buffer_get_current (b0), hdr_sz);
+
+ /* header offset fixup */
+ vnet_buffer (bufs[0])->l2_hdr_offset -= b0->current_data;
+ vnet_buffer (bufs[0])->l3_hdr_offset -= b0->current_data;
+ vnet_buffer (bufs[0])->l4_hdr_offset -= b0->current_data;
+ vnet_buffer2 (bufs[0])->outer_l3_hdr_offset -= b0->current_data;
+ vnet_buffer2 (bufs[0])->outer_l4_hdr_offset -= b0->current_data;
+
+ vnet_buffer (bufs[1])->l2_hdr_offset -= b0->current_data;
+ vnet_buffer (bufs[1])->l3_hdr_offset -= b0->current_data;
+ vnet_buffer (bufs[1])->l4_hdr_offset -= b0->current_data;
+ vnet_buffer2 (bufs[1])->outer_l3_hdr_offset -= b0->current_data;
+ vnet_buffer2 (bufs[1])->outer_l4_hdr_offset -= b0->current_data;
+
+ bufs += 2;
+ i -= 2;
+ }
+
+ while (i > 0)
+ {
+ /* copying objects from cacheline 0 */
+ bufs[0]->current_data = 0;
+ bufs[0]->current_length = hdr_sz;
+ bufs[0]->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID | flags;
+ bufs[0]->flow_id = b0->flow_id;
+ bufs[0]->error = b0->error;
+ bufs[0]->current_config_index = b0->current_config_index;
+ clib_memcpy_fast (&bufs[0]->opaque, &b0->opaque, sizeof (b0->opaque));
+
+ /* copying objects from cacheline 1 */
+ bufs[0]->trace_handle = b0->trace_handle;
+ bufs[0]->total_length_not_including_first_buffer = 0;
+ clib_memcpy_fast (&bufs[0]->opaque2, &b0->opaque2, sizeof (b0->opaque2));
+
+ /* copying data */
+ clib_memcpy_fast (bufs[0]->data, vlib_buffer_get_current (b0), hdr_sz);
+
+ /* header offset fixup */
+ vnet_buffer (bufs[0])->l2_hdr_offset -= b0->current_data;
+ vnet_buffer (bufs[0])->l3_hdr_offset -= b0->current_data;
+ vnet_buffer (bufs[0])->l4_hdr_offset -= b0->current_data;
+ vnet_buffer2 (bufs[0])->outer_l3_hdr_offset -= b0->current_data;
+ vnet_buffer2 (bufs[0])->outer_l4_hdr_offset -= b0->current_data;
+
+ bufs++;
+ i--;
+ }
+}
+
+static_always_inline void
+gso_fixup_segmented_buf (vlib_main_t *vm, vlib_buffer_t *b0, u32 next_tcp_seq,
+ int is_l2, u8 oflags, u16 hdr_sz, u16 l4_hdr_sz,
+ clib_ip_csum_t *c, u8 tcp_flags, u8 is_prefetch,
+ vlib_buffer_t *b1)
+{
+
+ i16 l3_hdr_offset = vnet_buffer (b0)->l3_hdr_offset;
+ i16 l4_hdr_offset = vnet_buffer (b0)->l4_hdr_offset;
+
+ ip4_header_t *ip4 = (ip4_header_t *) (b0->data + l3_hdr_offset);
+ ip6_header_t *ip6 = (ip6_header_t *) (b0->data + l3_hdr_offset);
+ tcp_header_t *tcp = (tcp_header_t *) (b0->data + l4_hdr_offset);
+
+ tcp->flags = tcp_flags;
+ tcp->seq_number = clib_host_to_net_u32 (next_tcp_seq);
+ c->odd = 0;
+
+ if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
+ {
+ ip4->length =
+ clib_host_to_net_u16 (b0->current_length - hdr_sz +
+ (l4_hdr_offset - l3_hdr_offset) + l4_hdr_sz);
+ ip4->checksum = 0;
+ ip4->checksum = ip4_header_checksum (ip4);
+ vnet_buffer_offload_flags_clear (b0, (VNET_BUFFER_OFFLOAD_F_IP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TCP_CKSUM));
+ c->sum += clib_mem_unaligned (&ip4->src_address, u32);
+ c->sum += clib_mem_unaligned (&ip4->dst_address, u32);
+ c->sum += clib_host_to_net_u32 (
+ (clib_net_to_host_u16 (ip4->length) - ip4_header_bytes (ip4)) +
+ (ip4->protocol << 16));
+ }
+ else
+ {
+ ip6->payload_length =
+ clib_host_to_net_u16 (b0->current_length - hdr_sz + l4_hdr_sz);
+ vnet_buffer_offload_flags_clear (b0, VNET_BUFFER_OFFLOAD_F_TCP_CKSUM);
+ ip6_psh_t psh = { 0 };
+ u32 *p = (u32 *) &psh;
+ psh.src = ip6->src_address;
+ psh.dst = ip6->dst_address;
+ psh.l4len = ip6->payload_length;
+ psh.proto = clib_host_to_net_u32 ((u32) ip6->protocol);
+ for (int i = 0; i < 10; i++)
+ c->sum += p[i];
+ }
+
+ if (is_prefetch)
+ CLIB_PREFETCH (vlib_buffer_get_current (b1) + hdr_sz,
+ CLIB_CACHE_LINE_BYTES, LOAD);
+
+ clib_ip_csum_chunk (c, (u8 *) tcp, l4_hdr_sz);
+ tcp->checksum = clib_ip_csum_fold (c);
+
+ if (!is_l2 && ((oflags & VNET_BUFFER_OFFLOAD_F_TNL_MASK) == 0))
+ {
+ u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+
+ ip_adjacency_t *adj0 = adj_get (adj_index0);
+
+ if (adj0->lookup_next_index == IP_LOOKUP_NEXT_MIDCHAIN &&
+ adj0->sub_type.midchain.fixup_func)
+ /* calls e.g. ipip44_fixup */
+ adj0->sub_type.midchain.fixup_func (
+ vm, adj0, b0, adj0->sub_type.midchain.fixup_data);
+ }
+}
+
+static_always_inline u32
+gso_segment_buffer_inline (vlib_main_t *vm,
+ vnet_interface_per_thread_data_t *ptd,
+ vlib_buffer_t *b, int is_l2)
+{
+ vlib_buffer_t **bufs = 0;
+ u32 n_tx_bytes = 0;
+
+ u8 oflags = vnet_buffer (b)->oflags;
+ i16 l4_hdr_offset = vnet_buffer (b)->l4_hdr_offset;
+ u16 gso_size = vnet_buffer2 (b)->gso_size;
+ u16 l4_hdr_sz = vnet_buffer2 (b)->gso_l4_hdr_sz;
+
+ u8 tcp_flags = 0, tcp_flags_no_fin_psh = 0;
+ u32 default_bflags =
+ b->flags & ~(VNET_BUFFER_F_GSO | VLIB_BUFFER_NEXT_PRESENT);
+ u16 hdr_sz = (l4_hdr_offset - b->current_data) + l4_hdr_sz;
+ u32 next_tcp_seq = 0, tcp_seq = 0;
+ u32 data_size = vlib_buffer_length_in_chain (vm, b) - hdr_sz;
+ u16 size =
+ clib_min (gso_size, vlib_buffer_get_default_data_size (vm) - hdr_sz);
+ u16 n_alloc = 0, n_bufs = ((data_size + size - 1) / size);
+ clib_ip_csum_t c = { .sum = 0, .odd = 0 };
+ u8 *src_ptr, *dst_ptr;
+ u16 src_left, dst_left, bytes_to_copy;
+ u32 i = 0;
+
+ vec_validate (ptd->split_buffers, n_bufs - 1);
+ n_alloc = vlib_buffer_alloc (vm, ptd->split_buffers, n_bufs);
+ if (n_alloc < n_bufs)
+ {
+ vlib_buffer_free (vm, ptd->split_buffers, n_alloc);
+ return 0;
+ }
+
+ vec_validate (bufs, n_bufs - 1);
+ vlib_get_buffers (vm, ptd->split_buffers, bufs, n_bufs);
+
+ tcp_header_t *tcp = (tcp_header_t *) (b->data + l4_hdr_offset);
+
+ tcp_seq = next_tcp_seq = clib_net_to_host_u32 (tcp->seq_number);
+ /* store original flags for last packet and reset FIN and PSH */
+ tcp_flags = tcp->flags;
+ tcp_flags_no_fin_psh = tcp->flags & ~(TCP_FLAG_FIN | TCP_FLAG_PSH);
+ tcp->checksum = 0;
+
+ gso_init_bufs_from_template_base (bufs, b, default_bflags, n_bufs, hdr_sz);
+
+ src_ptr = vlib_buffer_get_current (b) + hdr_sz;
+ src_left = b->current_length - hdr_sz;
+ dst_ptr = vlib_buffer_get_current (bufs[i]) + hdr_sz;
+ dst_left = size;
+
+ while (data_size)
+ {
+ bytes_to_copy = clib_min (src_left, dst_left);
+ clib_ip_csum_and_copy_chunk (&c, src_ptr, dst_ptr, bytes_to_copy);
+
+ src_left -= bytes_to_copy;
+ src_ptr += bytes_to_copy;
+ data_size -= bytes_to_copy;
+ dst_left -= bytes_to_copy;
+ dst_ptr += bytes_to_copy;
+ next_tcp_seq += bytes_to_copy;
+ bufs[i]->current_length += bytes_to_copy;
+
+ if (0 == src_left)
+ {
+ /* init src to the next buffer in chain */
+ if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b = vlib_get_buffer (vm, b->next_buffer);
+ src_left = b->current_length;
+ src_ptr = vlib_buffer_get_current (b);
+ }
+ else
+ {
+ ASSERT (data_size == 0);
+ break;
+ }
+ }
+ if (0 == dst_left && data_size)
+ {
+ vlib_prefetch_buffer_header (bufs[i + 1], LOAD);
+
+ n_tx_bytes += bufs[i]->current_length;
+ gso_fixup_segmented_buf (vm, bufs[i], tcp_seq, is_l2, oflags, hdr_sz,
+ l4_hdr_sz, &c, tcp_flags_no_fin_psh, 1,
+ bufs[i + 1]);
+ i++;
+ dst_left = size;
+ dst_ptr = vlib_buffer_get_current (bufs[i]) + hdr_sz;
+ tcp_seq = next_tcp_seq;
+ // reset clib_ip_csum_t
+ c.odd = 0;
+ c.sum = 0;
+ }
+ }
+
+ ASSERT ((i + 1) == n_alloc);
+ n_tx_bytes += bufs[i]->current_length;
+ gso_fixup_segmented_buf (vm, bufs[i], tcp_seq, is_l2, oflags, hdr_sz,
+ l4_hdr_sz, &c, tcp_flags, 0, NULL);
+
+ vec_free (bufs);
+ return n_tx_bytes;
+}
+
#endif /* included_gso_h */
/*
diff --git a/src/vnet/gso/gso.rst b/src/vnet/gso/gso.rst
new file mode 100644
index 00000000000..78788f82216
--- /dev/null
+++ b/src/vnet/gso/gso.rst
@@ -0,0 +1,154 @@
+.. _gso_doc:
+
+Generic Segmentation Offload
+============================
+
+Overview
+________
+
+Modern physical NICs provide offload capabilities to software based network
+stacks to transfer some type of the packet processing from CPU to physical
+NICs. TCP Segmentation Offload (TSO) is one among many which is provided by
+modern physical NICs. Software based network stack can offload big (up to 64KB)
+TCP packets to NIC and NIC will segment them into Maximum Segment Size packets.
+Hence network stack save CPU cycles by processing few big packets instead of
+processing many small packets.
+
+GSO is software based analogous to TSO which is used by virtual interfaces
+i.e. tap, virtio, af_packet, vhost-user etc. Typically, virtual interfaces
+provide capability to offload big packets (64KB size). But in reality, they
+just pass the packet as it is to the other end without segmenting it. Hence, it
+is necessary to validate the support of GSO offloading in whole setup otherwise
+packet will be dropped when it will be processed by virtual entity which does
+not support GSO.
+
+The GSO Infrastructure
+_______________________
+
+Software based network stacks implements GSO packet segmentation in software
+where egress interface (virtual or physical) does not support GSO or TSO
+offload. VPP implements GSO stack to provide support for software based packet
+chunking of GSO packets when egress interface does not support GSO or TSO
+offload.
+
+It is implemented as a feature node on interface-output feature arc. It
+implements support for basic GSO, GSO with VXLAN tunnel and GSO with IPIP
+tunnel. GSO with Geneve and GSO with NVGRE are not supported today. But one can
+enable GSO feature node on tunnel interfaces i.e. IPSEC etc to segment GSO
+packets before they will be tunneled.
+
+Virtual interfaces does not support GSO with tunnels. So, special care is
+needed when user configures tunnel(s) along with GSO in the setup. In such case,
+either enable GSO feature node on tunnel interface (mean chunk the GSO packets
+before they will be encapsulated in tunnel) or disable the GSO offload on the
+egress interface (only work for VXLAN tunnel and IPIP tunnel), if it is enabled,
+should work fine.
+
+Similarly, many physical interfaces does not support GSO with tunnels too. User
+can do the same configuration as it is mentioned previously for virtual
+interfaces.
+
+Data structures
+^^^^^^^^^^^^^^^
+
+VPP ``vlib_buffer_t`` uses ``VNET_BUFFER_F_GSO`` flags to mark the buffer carrying GSO
+packet and also contain metadata fields with respect to GSO:
+
+.. code:: c
+
+ i16 l2_hdr_offset;
+ i16 l3_hdr_offset;
+ i16 l4_hdr_offset;
+
+ u16 gso_size;
+ u16 gso_l4_hdr_sz;
+ i16 outer_l3_hdr_offset;
+ i16 outer_l4_hdr_offset;
+
+Packet header offsets are computed from the reference of ``vlib_buffer_t`` data
+pointer.
+
+``l2_hdr_offset``, ``l3_hdr_offset`` and ``l4_hdr_offset`` are set on input of checksum
+offload or GSO enabled interfaces or features i.e. host stack. Appropriate
+offload flags are also set to ``vnet_buffer_oflags_t`` to reflect the actual packet
+offloads which will be used later at egress interface tx node or
+interface-output node or GSO node to process the packet appropriately. These
+fields are present in 1st cache line and does not incur extra cycles as most of
+the VPP features fetch the ``vlib_buffer_t`` 1st cache line to access ``current_data``
+or ``current_length`` fields of the packet.
+
+Please note that ``gso_size``, ``gso_l4_hdr_sz``, ``outer_l3_hdr_offset`` and
+``outer_l4_hdr_offset`` are in second cache line of ``vlib_buffer_t``. Accessing them in
+data plane will incur some extra cycles but cost of these cycles will be
+amortized over (up to 64KB) packet.
+
+The ``gso_size`` and ``gso_l4_hdr_sz`` are set on input of GSO enabled interfaces (tap,
+virtio, af_packet etc) or features (vpp host stack), when we receive a GSO
+packet (a chain of buffers with the first one having ``VNET_BUFFER_F_GSO`` bit set),
+and needs to persist all the way to the interface-output, in case the egress
+interface is not GSO-enabled - then we need to perform the segmentation, and use
+these values to chunk the payload appropriately.
+
+``outer_l3_hdr_offset`` and ``outer_l4_hdr_offset`` are used in case of tunneled packet
+(i.e. VXLAN or IPIP). ``outer_l3_hdr_offset`` will point to outer l3 header of the
+tunnel headers and ``outer_l4_hdr_offset`` will point to outer l4 header of the
+tunnel headers, if any.
+
+Following are the helper functions used to set and clear the offload flags from
+``vlib_buffer_t`` metadata:
+
+.. code:: c
+
+ static_always_inline void
+ vnet_buffer_offload_flags_set (vlib_buffer_t *b, vnet_buffer_oflags_t oflags)
+ {
+ if (b->flags & VNET_BUFFER_F_OFFLOAD)
+ {
+ /* add a flag to existing offload */
+ vnet_buffer (b)->oflags |= oflags;
+ }
+ else
+ {
+ /* no offload yet: reset offload flags to new value */
+ vnet_buffer (b)->oflags = oflags;
+ b->flags |= VNET_BUFFER_F_OFFLOAD;
+ }
+ }
+
+ static_always_inline void
+ vnet_buffer_offload_flags_clear (vlib_buffer_t *b, vnet_buffer_oflags_t oflags)
+ {
+ vnet_buffer (b)->oflags &= ~oflags;
+ if (0 == vnet_buffer (b)->oflags)
+ b->flags &= ~VNET_BUFFER_F_OFFLOAD;
+ }
+
+
+ENABLE GSO FEATURE NODE
+-----------------------
+
+GSO feature node is not enabled by default when egress interface does not
+support GSO. User has to enable it explicitly using api or cli.
+
+GSO API
+^^^^^^^
+
+This API message is used to enable GSO feature node on an interface.
+
+.. code:: c
+
+ autoreply define feature_gso_enable_disable
+ {
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ bool enable_disable;
+ option vat_help = "<intfc> | sw_if_index <nn> [enable | disable]";
+ };
+
+GSO CLI
+^^^^^^^
+
+::
+
+ set interface feature gso <intfc> [enable | disable]
diff --git a/src/vnet/gso/hdr_offset_parser.h b/src/vnet/gso/hdr_offset_parser.h
index e846aaa6fd2..08037f57ea0 100644
--- a/src/vnet/gso/hdr_offset_parser.h
+++ b/src/vnet/gso/hdr_offset_parser.h
@@ -21,8 +21,10 @@
#include <vnet/ip/ip6_packet.h>
#include <vnet/udp/udp_local.h>
#include <vnet/udp/udp_packet.h>
+#include <vnet/tcp/tcp_packet.h>
#include <vnet/vnet.h>
-#include <vnet/vxlan/vxlan_packet.h>
+
+#define VXLAN_HEADER_SIZE 8
#define foreach_gho_flag \
_( 0, IP4) \
@@ -155,8 +157,6 @@ vnet_geneve_inner_header_parser_inline (vlib_buffer_t * b0,
/* not supported yet */
if ((gho->gho_flags & GHO_F_GENEVE_TUNNEL) == 0)
return;
-
- ASSERT (0);
}
static_always_inline void
@@ -166,8 +166,6 @@ vnet_gre_inner_header_parser_inline (vlib_buffer_t * b0,
/* not supported yet */
if ((gho->gho_flags & GHO_F_GRE_TUNNEL) == 0)
return;
-
- ASSERT (0);
}
static_always_inline void
@@ -440,7 +438,7 @@ vnet_generic_outer_header_parser_inline (vlib_buffer_t * b0,
if (UDP_DST_PORT_vxlan == clib_net_to_host_u16 (udp->dst_port))
{
gho->gho_flags |= GHO_F_VXLAN_TUNNEL;
- gho->hdr_sz += sizeof (vxlan_header_t);
+ gho->hdr_sz += VXLAN_HEADER_SIZE;
}
else if (UDP_DST_PORT_geneve == clib_net_to_host_u16 (udp->dst_port))
{
diff --git a/src/vnet/gso/node.c b/src/vnet/gso/node.c
index c48d8fefe16..c1d4459476e 100644
--- a/src/vnet/gso/node.c
+++ b/src/vnet/gso/node.c
@@ -80,119 +80,108 @@ format_gso_trace (u8 * s, va_list * args)
return s;
}
-static_always_inline u16
-tso_segment_ipip_tunnel_fixup (vlib_main_t * vm,
- vnet_interface_per_thread_data_t * ptd,
- vlib_buffer_t * sb0,
- generic_header_offset_t * gho)
+static_always_inline void
+tso_segment_ipip_tunnel_fixup (vlib_main_t *vm,
+ vnet_interface_per_thread_data_t *ptd,
+ vlib_buffer_t *sb0)
{
u16 n_tx_bufs = vec_len (ptd->split_buffers);
- u16 i = 0, n_tx_bytes = 0;
+ u16 i = 0;
while (i < n_tx_bufs)
{
vlib_buffer_t *b0 = vlib_get_buffer (vm, ptd->split_buffers[i]);
- vnet_get_outer_header (b0, gho);
- clib_memcpy_fast (vlib_buffer_get_current (b0),
- vlib_buffer_get_current (sb0), gho->outer_hdr_sz);
-
- ip4_header_t *ip4 =
- (ip4_header_t *) (vlib_buffer_get_current (b0) +
- gho->outer_l3_hdr_offset);
- ip6_header_t *ip6 =
- (ip6_header_t *) (vlib_buffer_get_current (b0) +
- gho->outer_l3_hdr_offset);
-
- if (gho->gho_flags & GHO_F_OUTER_IP4)
+ i16 outer_l3_hdr_offset = vnet_buffer2 (b0)->outer_l3_hdr_offset;
+ i16 l3_hdr_offset = vnet_buffer (b0)->l3_hdr_offset;
+
+ ip4_header_t *ip4 = (ip4_header_t *) (b0->data + outer_l3_hdr_offset);
+ ip6_header_t *ip6 = (ip6_header_t *) (b0->data + outer_l3_hdr_offset);
+
+ if (vnet_buffer (b0)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM)
{
- ip4->length =
- clib_host_to_net_u16 (b0->current_length -
- gho->outer_l3_hdr_offset);
+ ip4->length = clib_host_to_net_u16 (
+ b0->current_length - (outer_l3_hdr_offset - b0->current_data));
ip4->checksum = ip4_header_checksum (ip4);
+ vnet_buffer_offload_flags_clear (
+ b0, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
}
- else if (gho->gho_flags & GHO_F_OUTER_IP6)
+ else
{
- ip6->payload_length =
- clib_host_to_net_u16 (b0->current_length -
- gho->outer_l4_hdr_offset);
+ ip6->payload_length = clib_host_to_net_u16 (
+ b0->current_length - (l3_hdr_offset - b0->current_data));
+ vnet_buffer_offload_flags_clear (b0, VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
}
- n_tx_bytes += gho->outer_hdr_sz;
i++;
}
- return n_tx_bytes;
}
static_always_inline void
-tso_segment_vxlan_tunnel_headers_fixup (vlib_main_t * vm, vlib_buffer_t * b,
- generic_header_offset_t * gho)
+tso_segment_vxlan_tunnel_headers_fixup (vlib_main_t *vm, vlib_buffer_t *b)
{
- u8 proto = 0;
ip4_header_t *ip4 = 0;
ip6_header_t *ip6 = 0;
udp_header_t *udp = 0;
+ i16 outer_l3_hdr_offset = vnet_buffer2 (b)->outer_l3_hdr_offset;
+ i16 outer_l4_hdr_offset = vnet_buffer2 (b)->outer_l4_hdr_offset;
- ip4 =
- (ip4_header_t *) (vlib_buffer_get_current (b) + gho->outer_l3_hdr_offset);
- ip6 =
- (ip6_header_t *) (vlib_buffer_get_current (b) + gho->outer_l3_hdr_offset);
- udp =
- (udp_header_t *) (vlib_buffer_get_current (b) + gho->outer_l4_hdr_offset);
+ ip4 = (ip4_header_t *) (b->data + outer_l3_hdr_offset);
+ ip6 = (ip6_header_t *) (b->data + outer_l3_hdr_offset);
+ udp = (udp_header_t *) (b->data + outer_l4_hdr_offset);
- if (gho->gho_flags & GHO_F_OUTER_IP4)
+ if (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM)
{
- proto = ip4->protocol;
- ip4->length =
- clib_host_to_net_u16 (b->current_length - gho->outer_l3_hdr_offset);
+ ip4->length = clib_host_to_net_u16 (
+ b->current_length - (outer_l3_hdr_offset - b->current_data));
ip4->checksum = ip4_header_checksum (ip4);
+ if (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM)
+ {
+ udp->length = clib_host_to_net_u16 (
+ b->current_length - (outer_l4_hdr_offset - b->current_data));
+ // udp checksum is 0, in udp tunnel
+ udp->checksum = 0;
+ }
+ vnet_buffer_offload_flags_clear (
+ b, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TNL_VXLAN);
}
- else if (gho->gho_flags & GHO_F_OUTER_IP6)
- {
- proto = ip6->protocol;
- ip6->payload_length =
- clib_host_to_net_u16 (b->current_length - gho->outer_l4_hdr_offset);
- }
- if (proto == IP_PROTOCOL_UDP)
+ else
{
- int bogus;
- udp->length =
- clib_host_to_net_u16 (b->current_length - gho->outer_l4_hdr_offset);
- udp->checksum = 0;
- if (gho->gho_flags & GHO_F_OUTER_IP6)
+ ip6->payload_length = clib_host_to_net_u16 (
+ b->current_length - (outer_l4_hdr_offset - b->current_data));
+
+ if (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM)
{
+ int bogus;
+ udp->length = ip6->payload_length;
+ // udp checksum is 0, in udp tunnel
+ udp->checksum = 0;
udp->checksum =
ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6, &bogus);
+ vnet_buffer_offload_flags_clear (
+ b, VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TNL_VXLAN);
}
- else if (gho->gho_flags & GHO_F_OUTER_IP4)
- {
- udp->checksum = ip4_tcp_udp_compute_checksum (vm, b, ip4);
- }
- /* FIXME: it should be OUTER_UDP_CKSUM */
- vnet_buffer_offload_flags_clear (b, VNET_BUFFER_OFFLOAD_F_UDP_CKSUM);
}
}
-static_always_inline u16
-tso_segment_vxlan_tunnel_fixup (vlib_main_t * vm,
- vnet_interface_per_thread_data_t * ptd,
- vlib_buffer_t * sb0,
- generic_header_offset_t * gho)
+static_always_inline void
+tso_segment_vxlan_tunnel_fixup (vlib_main_t *vm,
+ vnet_interface_per_thread_data_t *ptd,
+ vlib_buffer_t *sb0)
{
u16 n_tx_bufs = vec_len (ptd->split_buffers);
- u16 i = 0, n_tx_bytes = 0;
+ u16 i = 0;
while (i < n_tx_bufs)
{
vlib_buffer_t *b0 = vlib_get_buffer (vm, ptd->split_buffers[i]);
- vnet_get_outer_header (b0, gho);
- clib_memcpy_fast (vlib_buffer_get_current (b0),
- vlib_buffer_get_current (sb0), gho->outer_hdr_sz);
- tso_segment_vxlan_tunnel_headers_fixup (vm, b0, gho);
- n_tx_bytes += gho->outer_hdr_sz;
+ tso_segment_vxlan_tunnel_headers_fixup (vm, b0);
i++;
}
- return n_tx_bytes;
}
static_always_inline u16
@@ -555,30 +544,28 @@ vnet_gso_node_inline (vlib_main_t * vm,
if (PREDICT_FALSE (hi->sw_if_index != swif0))
{
hi0 = vnet_get_sup_hw_interface (vnm, swif0);
- if ((hi0->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO) ==
- 0 &&
+ if ((hi0->caps & VNET_HW_IF_CAP_TCP_GSO) == 0 &&
(b[0]->flags & VNET_BUFFER_F_GSO))
break;
}
if (PREDICT_FALSE (hi->sw_if_index != swif1))
{
hi1 = vnet_get_sup_hw_interface (vnm, swif1);
- if (!(hi1->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO) &&
+ if (!(hi1->caps & VNET_HW_IF_CAP_TCP_GSO) &&
(b[1]->flags & VNET_BUFFER_F_GSO))
break;
}
if (PREDICT_FALSE (hi->sw_if_index != swif2))
{
hi2 = vnet_get_sup_hw_interface (vnm, swif2);
- if ((hi2->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO) ==
- 0 &&
+ if ((hi2->caps & VNET_HW_IF_CAP_TCP_GSO) == 0 &&
(b[2]->flags & VNET_BUFFER_F_GSO))
break;
}
if (PREDICT_FALSE (hi->sw_if_index != swif3))
{
hi3 = vnet_get_sup_hw_interface (vnm, swif3);
- if (!(hi3->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO) &&
+ if (!(hi3->caps & VNET_HW_IF_CAP_TCP_GSO) &&
(b[3]->flags & VNET_BUFFER_F_GSO))
break;
}
@@ -589,6 +576,7 @@ vnet_gso_node_inline (vlib_main_t * vm,
t0->flags = b[0]->flags & VNET_BUFFER_F_GSO;
t0->gso_size = vnet_buffer2 (b[0])->gso_size;
t0->gso_l4_hdr_sz = vnet_buffer2 (b[0])->gso_l4_hdr_sz;
+ clib_memset (&t0->gho, 0, sizeof (t0->gho));
vnet_generic_header_offset_parser (b[0], &t0->gho, is_l2,
is_ip4, is_ip6);
}
@@ -598,6 +586,7 @@ vnet_gso_node_inline (vlib_main_t * vm,
t1->flags = b[1]->flags & VNET_BUFFER_F_GSO;
t1->gso_size = vnet_buffer2 (b[1])->gso_size;
t1->gso_l4_hdr_sz = vnet_buffer2 (b[1])->gso_l4_hdr_sz;
+ clib_memset (&t1->gho, 0, sizeof (t1->gho));
vnet_generic_header_offset_parser (b[1], &t1->gho, is_l2,
is_ip4, is_ip6);
}
@@ -607,6 +596,7 @@ vnet_gso_node_inline (vlib_main_t * vm,
t2->flags = b[2]->flags & VNET_BUFFER_F_GSO;
t2->gso_size = vnet_buffer2 (b[2])->gso_size;
t2->gso_l4_hdr_sz = vnet_buffer2 (b[2])->gso_l4_hdr_sz;
+ clib_memset (&t2->gho, 0, sizeof (t2->gho));
vnet_generic_header_offset_parser (b[2], &t2->gho, is_l2,
is_ip4, is_ip6);
}
@@ -616,6 +606,7 @@ vnet_gso_node_inline (vlib_main_t * vm,
t3->flags = b[3]->flags & VNET_BUFFER_F_GSO;
t3->gso_size = vnet_buffer2 (b[3])->gso_size;
t3->gso_l4_hdr_sz = vnet_buffer2 (b[3])->gso_l4_hdr_sz;
+ clib_memset (&t3->gho, 0, sizeof (t3->gho));
vnet_generic_header_offset_parser (b[3], &t3->gho, is_l2,
is_ip4, is_ip6);
}
@@ -649,7 +640,7 @@ vnet_gso_node_inline (vlib_main_t * vm,
if (PREDICT_FALSE (hi->sw_if_index != swif0))
{
hi0 = vnet_get_sup_hw_interface (vnm, swif0);
- if ((hi0->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO) == 0 &&
+ if ((hi0->caps & VNET_HW_IF_CAP_TCP_GSO) == 0 &&
(b[0]->flags & VNET_BUFFER_F_GSO))
do_segmentation0 = 1;
}
@@ -669,6 +660,7 @@ vnet_gso_node_inline (vlib_main_t * vm,
t0->flags = b[0]->flags & VNET_BUFFER_F_GSO;
t0->gso_size = vnet_buffer2 (b[0])->gso_size;
t0->gso_l4_hdr_sz = vnet_buffer2 (b[0])->gso_l4_hdr_sz;
+ clib_memset (&t0->gho, 0, sizeof (t0->gho));
vnet_generic_header_offset_parser (b[0], &t0->gho, is_l2,
is_ip4, is_ip6);
}
@@ -685,37 +677,10 @@ vnet_gso_node_inline (vlib_main_t * vm,
to_next -= 1;
n_left_to_next += 1;
/* undo the counting. */
- generic_header_offset_t gho = { 0 };
- u32 n_bytes_b0 = vlib_buffer_length_in_chain (vm, b[0]);
u32 n_tx_bytes = 0;
- u32 inner_is_ip6 = is_ip6;
-
- vnet_generic_header_offset_parser (b[0], &gho, is_l2,
- is_ip4, is_ip6);
-
- if (PREDICT_FALSE (gho.gho_flags & GHO_F_TUNNEL))
- {
- if (PREDICT_FALSE
- (gho.gho_flags & (GHO_F_GRE_TUNNEL |
- GHO_F_GENEVE_TUNNEL)))
- {
- /* not supported yet */
- drop_one_buffer_and_count (vm, vnm, node, from - 1,
- hi->sw_if_index,
- GSO_ERROR_UNHANDLED_TYPE);
- b += 1;
- continue;
- }
-
- vnet_get_inner_header (b[0], &gho);
-
- n_bytes_b0 -= gho.outer_hdr_sz;
- inner_is_ip6 = (gho.gho_flags & GHO_F_IP6) != 0;
- }
n_tx_bytes =
- tso_segment_buffer (vm, ptd, bi0, b[0], &gho, n_bytes_b0,
- is_l2, inner_is_ip6);
+ gso_segment_buffer_inline (vm, ptd, b[0], is_l2);
if (PREDICT_FALSE (n_tx_bytes == 0))
{
@@ -726,21 +691,15 @@ vnet_gso_node_inline (vlib_main_t * vm,
continue;
}
-
- if (PREDICT_FALSE (gho.gho_flags & GHO_F_VXLAN_TUNNEL))
+ if (PREDICT_FALSE (vnet_buffer (b[0])->oflags &
+ VNET_BUFFER_OFFLOAD_F_TNL_VXLAN))
{
- vnet_get_outer_header (b[0], &gho);
- n_tx_bytes +=
- tso_segment_vxlan_tunnel_fixup (vm, ptd, b[0], &gho);
+ tso_segment_vxlan_tunnel_fixup (vm, ptd, b[0]);
}
- else
- if (PREDICT_FALSE
- (gho.gho_flags & (GHO_F_IPIP_TUNNEL |
- GHO_F_IPIP6_TUNNEL)))
+ else if (PREDICT_FALSE (vnet_buffer (b[0])->oflags &
+ VNET_BUFFER_OFFLOAD_F_TNL_IPIP))
{
- vnet_get_outer_header (b[0], &gho);
- n_tx_bytes +=
- tso_segment_ipip_tunnel_fixup (vm, ptd, b[0], &gho);
+ tso_segment_ipip_tunnel_fixup (vm, ptd, b[0]);
}
u16 n_tx_bufs = vec_len (ptd->split_buffers);
@@ -774,7 +733,7 @@ vnet_gso_node_inline (vlib_main_t * vm,
to_next, n_left_to_next);
}
/* The buffers were enqueued. Reset the length */
- _vec_len (ptd->split_buffers) = 0;
+ vec_set_len (ptd->split_buffers, 0);
/* Free the now segmented buffer */
vlib_buffer_free_one (vm, bi0);
b += 1;
@@ -808,8 +767,7 @@ vnet_gso_inline (vlib_main_t * vm,
hi = vnet_get_sup_hw_interface (vnm,
vnet_buffer (b)->sw_if_index[VLIB_TX]);
- if (hi->caps & (VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO |
- VNET_HW_INTERFACE_CAP_SUPPORTS_VXLAN_TNL_GSO))
+ if (hi->caps & (VNET_HW_IF_CAP_TCP_GSO | VNET_HW_IF_CAP_VXLAN_TNL_GSO))
return vnet_gso_node_inline (vm, node, frame, vnm, hi,
is_l2, is_ip4, is_ip6,
/* do_segmentation */ 0);
@@ -849,7 +807,6 @@ VLIB_NODE_FN (gso_ip6_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1 /* ip6 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (gso_l2_ip4_node) = {
.vector_size = sizeof (u32),
diff --git a/src/vnet/handoff.c b/src/vnet/handoff.c
index f64d5ad3a69..e9c3bb6de67 100644
--- a/src/vnet/handoff.c
+++ b/src/vnet/handoff.c
@@ -15,13 +15,13 @@
*/
#include <vnet/vnet.h>
-#include <vppinfra/xxhash.h>
+#include <vnet/hash/hash.h>
#include <vlib/threads.h>
-#include <vnet/handoff.h>
#include <vnet/feature/feature.h>
typedef struct
{
+ vnet_hash_fn_t hash_fn;
uword *workers_bitmap;
u32 *workers;
} per_inteface_handoff_data_t;
@@ -36,14 +36,14 @@ typedef struct
/* Worker handoff index */
u32 frame_queue_index;
-
- u64 (*hash_fn) (ethernet_header_t *);
} handoff_main_t;
extern handoff_main_t handoff_main;
#ifndef CLIB_MARCH_VARIANT
+
handoff_main_t handoff_main;
+
#endif /* CLIB_MARCH_VARIANT */
typedef struct
@@ -78,12 +78,35 @@ format_worker_handoff_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
worker_handoff_trace_t *t = va_arg (*args, worker_handoff_trace_t *);
- s =
- format (s, "worker-handoff: sw_if_index %d, next_worker %d, buffer 0x%x",
- t->sw_if_index, t->next_worker_index, t->buffer_index);
+ s = format (s, "worker-handoff: sw_if_index %d, next_worker %d, buffer 0x%x",
+ t->sw_if_index, t->next_worker_index, t->buffer_index);
return s;
}
+static void
+worker_handoff_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_buffer_t **bufs, u16 *threads, u32 n_vectors)
+{
+ worker_handoff_trace_t *t;
+ vlib_buffer_t **b;
+ u16 *ti;
+
+ b = bufs;
+ ti = threads;
+
+ while (n_vectors)
+ {
+ t = vlib_add_trace (vm, node, b[0], sizeof (*t));
+ t->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
+ t->next_worker_index = ti[0];
+ t->buffer_index = vlib_get_buffer_index (vm, b[0]);
+
+ b += 1;
+ ti += 1;
+ n_vectors -= 1;
+ }
+}
+
VLIB_NODE_FN (worker_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
@@ -102,26 +125,16 @@ VLIB_NODE_FN (worker_handoff_node) (vlib_main_t * vm,
while (n_left_from > 0)
{
- u32 sw_if_index0;
- u32 hash;
- u64 hash_key;
per_inteface_handoff_data_t *ihd0;
- u32 index0;
-
+ u32 sw_if_index0, hash, index0;
+ void *data;
sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
- ASSERT (hm->if_data);
ihd0 = vec_elt_at_index (hm->if_data, sw_if_index0);
- /*
- * Force unknown traffic onto worker 0,
- * and into ethernet-input. $$$$ add more hashes.
- */
-
/* Compute ingress LB hash */
- hash_key = hm->hash_fn ((ethernet_header_t *)
- vlib_buffer_get_current (b[0]));
- hash = (u32) clib_xxhash (hash_key);
+ data = vlib_buffer_get_current (b[0]);
+ ihd0->hash_fn (&data, &hash, 1);
/* if input node did not specify next index, then packet
should go to ethernet-input */
@@ -133,22 +146,16 @@ VLIB_NODE_FN (worker_handoff_node) (vlib_main_t * vm,
ti[0] = hm->first_worker_index + ihd0->workers[index0];
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
- {
- worker_handoff_trace_t *t =
- vlib_add_trace (vm, node, b[0], sizeof (*t));
- t->sw_if_index = sw_if_index0;
- t->next_worker_index = ti[0];
- t->buffer_index = vlib_get_buffer_index (vm, b[0]);
- }
-
/* next */
n_left_from -= 1;
ti += 1;
b += 1;
}
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
+ worker_handoff_trace_frame (vm, node, bufs, thread_indices,
+ frame->n_vectors);
+
n_enq = vlib_buffer_enqueue_to_thread (vm, node, hm->frame_queue_index, from,
thread_indices, frame->n_vectors, 1);
@@ -159,7 +166,6 @@ VLIB_NODE_FN (worker_handoff_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (worker_handoff_node) = {
.name = "worker-handoff",
.vector_size = sizeof (u32),
@@ -174,12 +180,12 @@ VLIB_REGISTER_NODE (worker_handoff_node) = {
},
};
-/* *INDENT-ON* */
-
#ifndef CLIB_MARCH_VARIANT
+
int
-interface_handoff_enable_disable (vlib_main_t * vm, u32 sw_if_index,
- uword * bitmap, int enable_disable)
+interface_handoff_enable_disable (vlib_main_t *vm, u32 sw_if_index,
+ uword *bitmap, u8 is_sym, int is_l4,
+ int enable_disable)
{
handoff_main_t *hm = &handoff_main;
vnet_sw_interface_t *sw;
@@ -212,16 +218,34 @@ interface_handoff_enable_disable (vlib_main_t * vm, u32 sw_if_index,
if (enable_disable)
{
d->workers_bitmap = bitmap;
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, bitmap)
- {
+ {
vec_add1(d->workers, i);
}
- /* *INDENT-ON* */
+
+ if (is_sym)
+ {
+ if (is_l4)
+ return VNET_API_ERROR_UNIMPLEMENTED;
+
+ d->hash_fn = vnet_hash_function_from_name (
+ "handoff-eth-sym", VNET_HASH_FN_TYPE_ETHERNET);
+ }
+ else
+ {
+ if (is_l4)
+ d->hash_fn =
+ vnet_hash_default_function (VNET_HASH_FN_TYPE_ETHERNET);
+ else
+ d->hash_fn = vnet_hash_function_from_name (
+ "handoff-eth", VNET_HASH_FN_TYPE_ETHERNET);
+ }
}
vnet_feature_enable_disable ("device-input", "worker-handoff",
sw_if_index, enable_disable, 0, 0);
+ vnet_feature_enable_disable ("port-rx-eth", "worker-handoff", sw_if_index,
+ enable_disable, 0, 0);
return rv;
}
@@ -230,12 +254,9 @@ set_interface_handoff_command_fn (vlib_main_t * vm,
unformat_input_t * input,
vlib_cli_command_t * cmd)
{
- handoff_main_t *hm = &handoff_main;
- u32 sw_if_index = ~0;
+ u32 sw_if_index = ~0, is_sym = 0, is_l4 = 0;
int enable_disable = 1;
uword *bitmap = 0;
- u32 sym = ~0;
-
int rv = 0;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
@@ -248,9 +269,11 @@ set_interface_handoff_command_fn (vlib_main_t * vm,
vnet_get_main (), &sw_if_index))
;
else if (unformat (input, "symmetrical"))
- sym = 1;
+ is_sym = 1;
else if (unformat (input, "asymmetrical"))
- sym = 0;
+ is_sym = 0;
+ else if (unformat (input, "l4"))
+ is_l4 = 1;
else
break;
}
@@ -261,9 +284,8 @@ set_interface_handoff_command_fn (vlib_main_t * vm,
if (bitmap == 0)
return clib_error_return (0, "Please specify list of workers...");
- rv =
- interface_handoff_enable_disable (vm, sw_if_index, bitmap,
- enable_disable);
+ rv = interface_handoff_enable_disable (vm, sw_if_index, bitmap, is_sym,
+ is_l4, enable_disable);
switch (rv)
{
@@ -287,22 +309,15 @@ set_interface_handoff_command_fn (vlib_main_t * vm,
return clib_error_return (0, "unknown return value %d", rv);
}
- if (sym == 1)
- hm->hash_fn = eth_get_sym_key;
- else if (sym == 0)
- hm->hash_fn = eth_get_key;
-
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_handoff_command, static) = {
.path = "set interface handoff",
- .short_help =
- "set interface handoff <interface-name> workers <workers-list> [symmetrical|asymmetrical]",
+ .short_help = "set interface handoff <interface-name> workers <workers-list>"
+ " [symmetrical|asymmetrical]",
.function = set_interface_handoff_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
handoff_init (vlib_main_t * vm)
@@ -328,7 +343,6 @@ handoff_init (vlib_main_t * vm)
}
}
- hm->hash_fn = eth_get_key;
hm->frame_queue_index = ~0;
return 0;
diff --git a/src/vnet/hash/FEATURE.yaml b/src/vnet/hash/FEATURE.yaml
index 1e3d23ea882..d5b9a069c27 100644
--- a/src/vnet/hash/FEATURE.yaml
+++ b/src/vnet/hash/FEATURE.yaml
@@ -1,6 +1,6 @@
---
name: Hash infrastructure
-maintainer: Mohsin Kazmi <sykazmi@cisco.com>, Damjan Marion <damarion@cisco.com>
+maintainer: Mohsin Kazmi <mohsin.kazmi14@gmail.com>, Damjan Marion <damarion@cisco.com>
features:
- Ethernet
- IP
diff --git a/src/vnet/hash/crc32_5tuple.c b/src/vnet/hash/crc32_5tuple.c
index 29d92a96938..2cdb19440c6 100644
--- a/src/vnet/hash/crc32_5tuple.c
+++ b/src/vnet/hash/crc32_5tuple.c
@@ -7,29 +7,10 @@
#include <vnet/ethernet/ethernet.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
-#include <vnet/ip/ip46_address.h>
-#include <vnet/udp/udp_packet.h>
#include <vnet/hash/hash.h>
#include <vppinfra/crc32.h>
-typedef union
-{
- struct
- {
- ip46_address_t src_address;
- ip46_address_t dst_address;
- union
- {
- struct
- {
- u16 src_port;
- u16 dst_port;
- };
- u32 l4_hdr;
- };
- };
- u8 as_u8[36];
-} crc32c_5tuple_key_t;
+#ifdef clib_crc32c_uses_intrinsics
static const u8 l4_mask_bits[256] = {
[IP_PROTOCOL_ICMP] = 16, [IP_PROTOCOL_IGMP] = 8,
@@ -38,39 +19,42 @@ static const u8 l4_mask_bits[256] = {
[IP_PROTOCOL_ICMP6] = 16,
};
-static_always_inline void
-compute_ip6_key (ip6_header_t *ip, crc32c_5tuple_key_t *k)
+static_always_inline u32
+compute_ip6_key (ip6_header_t *ip)
{
+ u32 hash = 0, l4hdr;
u8 pr;
-
- /* copy 32 bytes of ip6 src and dst addresses into hash_key_t */
- clib_memcpy_fast ((u8 *) k, (u8 *) ip + 8, sizeof (ip6_address_t) * 2);
+ /* dst + src ip as u64 */
+ hash = clib_crc32c_u64 (hash, *(u64u *) ((u8 *) ip + 8));
+ hash = clib_crc32c_u64 (hash, *(u64u *) ((u8 *) ip + 16));
+ hash = clib_crc32c_u64 (hash, *(u64u *) ((u8 *) ip + 24));
+ hash = clib_crc32c_u64 (hash, *(u64u *) ((u8 *) ip + 32));
pr = ip->protocol;
- /* write l4 header */
- k->l4_hdr = *(u32 *) ip6_next_header (ip) & pow2_mask (l4_mask_bits[pr]);
+ l4hdr = *(u32 *) ip6_next_header (ip) & pow2_mask (l4_mask_bits[pr]);
+ /* protocol + l4 hdr */
+ return clib_crc32c_u64 (hash, ((u64) pr << 32) | l4hdr);
}
-static_always_inline void
-compute_ip4_key (ip4_header_t *ip, crc32c_5tuple_key_t *k)
+static_always_inline u32
+compute_ip4_key (ip4_header_t *ip)
{
+ u32 hash = 0, l4hdr;
u8 pr;
- u64 *key = (u64 *) k;
- /* copy 8 bytes of ip src and dst addresses into hash_key_t */
- key[0] = 0;
- key[1] = 0;
- key[2] = 0;
- key[3] = *(u64 *) ((u8 *) ip + 12);
+ /* dst + src ip as u64 */
+ hash = clib_crc32c_u64 (0, *(u64 *) ((u8 *) ip + 12));
pr = ip->protocol;
- /* write l4 header */
- k->l4_hdr = *(u32 *) ip4_next_header (ip) & pow2_mask (l4_mask_bits[pr]);
+ l4hdr = *(u32 *) ip4_next_header (ip) & pow2_mask (l4_mask_bits[pr]);
+ /* protocol + l4 hdr */
+ return clib_crc32c_u64 (hash, ((u64) pr << 32) | l4hdr);
}
-static_always_inline void
-compute_ip_key (void *p, crc32c_5tuple_key_t *key)
+static_always_inline u32
+compute_ip_key (void *p)
{
if ((((u8 *) p)[0] & 0xf0) == 0x40)
- compute_ip4_key (p, key);
+ return compute_ip4_key (p);
else if ((((u8 *) p)[0] & 0xf0) == 0x60)
- compute_ip6_key (p, key);
+ return compute_ip6_key (p);
+ return 0;
}
void
@@ -80,22 +64,15 @@ vnet_crc32c_5tuple_ip_func (void **p, u32 *hash, u32 n_packets)
while (n_left_from >= 8)
{
- crc32c_5tuple_key_t key[4] = {};
-
clib_prefetch_load (p[4]);
clib_prefetch_load (p[5]);
clib_prefetch_load (p[6]);
clib_prefetch_load (p[7]);
- compute_ip_key (p[0], &key[0]);
- compute_ip_key (p[1], &key[1]);
- compute_ip_key (p[2], &key[2]);
- compute_ip_key (p[3], &key[3]);
-
- hash[0] = clib_crc32c (key[0].as_u8, sizeof (key[0]));
- hash[1] = clib_crc32c (key[1].as_u8, sizeof (key[1]));
- hash[2] = clib_crc32c (key[2].as_u8, sizeof (key[2]));
- hash[3] = clib_crc32c (key[3].as_u8, sizeof (key[3]));
+ hash[0] = compute_ip_key (p[0]);
+ hash[1] = compute_ip_key (p[1]);
+ hash[2] = compute_ip_key (p[2]);
+ hash[3] = compute_ip_key (p[3]);
hash += 4;
n_left_from -= 4;
@@ -104,11 +81,7 @@ vnet_crc32c_5tuple_ip_func (void **p, u32 *hash, u32 n_packets)
while (n_left_from > 0)
{
- crc32c_5tuple_key_t key = {};
-
- compute_ip_key (p[0], &key);
-
- hash[0] = clib_crc32c (key.as_u8, sizeof (key));
+ hash[0] = compute_ip_key (p[0]);
hash += 1;
n_left_from -= 1;
@@ -116,8 +89,8 @@ vnet_crc32c_5tuple_ip_func (void **p, u32 *hash, u32 n_packets)
}
}
-static_always_inline void
-compute_ethernet_key (void *p, crc32c_5tuple_key_t *key)
+static_always_inline u32
+compute_ethernet_key (void *p)
{
u16 ethertype = 0, l2hdr_sz = 0;
@@ -142,13 +115,14 @@ compute_ethernet_key (void *p, crc32c_5tuple_key_t *key)
if (ethertype == ETHERNET_TYPE_IP4)
{
ip4_header_t *ip4 = (ip4_header_t *) (p + l2hdr_sz);
- compute_ip4_key (ip4, key);
+ return compute_ip4_key (ip4);
}
else if (ethertype == ETHERNET_TYPE_IP6)
{
ip6_header_t *ip6 = (ip6_header_t *) (p + l2hdr_sz);
- compute_ip6_key (ip6, key);
+ return compute_ip6_key (ip6);
}
+ return 0;
}
void
@@ -158,22 +132,15 @@ vnet_crc32c_5tuple_ethernet_func (void **p, u32 *hash, u32 n_packets)
while (n_left_from >= 8)
{
- crc32c_5tuple_key_t key[4] = {};
-
clib_prefetch_load (p[4]);
clib_prefetch_load (p[5]);
clib_prefetch_load (p[6]);
clib_prefetch_load (p[7]);
- compute_ethernet_key (p[0], &key[0]);
- compute_ethernet_key (p[1], &key[1]);
- compute_ethernet_key (p[2], &key[2]);
- compute_ethernet_key (p[3], &key[3]);
-
- hash[0] = clib_crc32c (key[0].as_u8, sizeof (key[0]));
- hash[1] = clib_crc32c (key[1].as_u8, sizeof (key[1]));
- hash[2] = clib_crc32c (key[2].as_u8, sizeof (key[2]));
- hash[3] = clib_crc32c (key[3].as_u8, sizeof (key[3]));
+ hash[0] = compute_ethernet_key (p[0]);
+ hash[1] = compute_ethernet_key (p[1]);
+ hash[2] = compute_ethernet_key (p[2]);
+ hash[3] = compute_ethernet_key (p[3]);
hash += 4;
n_left_from -= 4;
@@ -182,11 +149,7 @@ vnet_crc32c_5tuple_ethernet_func (void **p, u32 *hash, u32 n_packets)
while (n_left_from > 0)
{
- crc32c_5tuple_key_t key = {};
-
- compute_ethernet_key (p[0], &key);
-
- hash[0] = clib_crc32c (key.as_u8, sizeof (key));
+ hash[0] = compute_ethernet_key (p[0]);
hash += 1;
n_left_from -= 1;
@@ -201,3 +164,5 @@ VNET_REGISTER_HASH_FUNCTION (crc32c_5tuple, static) = {
.function[VNET_HASH_FN_TYPE_ETHERNET] = vnet_crc32c_5tuple_ethernet_func,
.function[VNET_HASH_FN_TYPE_IP] = vnet_crc32c_5tuple_ip_func,
};
+
+#endif
diff --git a/src/vnet/handoff.h b/src/vnet/hash/handoff_eth.c
index f50b86d5c6d..dc8db2ac413 100644
--- a/src/vnet/handoff.h
+++ b/src/vnet/hash/handoff_eth.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Copyright (c) 2021 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -13,14 +13,24 @@
* limitations under the License.
*/
-#ifndef included_vnet_handoff_h
-#define included_vnet_handoff_h
-
#include <vlib/vlib.h>
#include <vnet/ethernet/ethernet.h>
+#include <vnet/hash/hash.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
#include <vnet/mpls/packet.h>
+#include <vppinfra/crc32.h>
+#include <vppinfra/xxhash.h>
+
+always_inline u32
+ho_hash (u64 key)
+{
+#ifdef clib_crc32c_uses_intrinsics
+ return clib_crc32c ((u8 *) &key, sizeof (key));
+#else
+ return clib_xxhash (key);
+#endif
+}
static inline u64
ipv4_get_key (ip4_header_t * ip)
@@ -235,7 +245,103 @@ eth_get_key (ethernet_header_t * h0)
return hash_key;
}
-#endif /* included_vnet_handoff_h */
+void
+handoff_eth_func (void **p, u32 *hash, u32 n_packets)
+{
+ u32 n_left_from = n_packets;
+
+ while (n_left_from >= 8)
+ {
+ u64 key[4] = {};
+
+ clib_prefetch_load (p[4]);
+ clib_prefetch_load (p[5]);
+ clib_prefetch_load (p[6]);
+ clib_prefetch_load (p[7]);
+
+ key[0] = eth_get_key ((ethernet_header_t *) p[0]);
+ key[1] = eth_get_key ((ethernet_header_t *) p[1]);
+ key[2] = eth_get_key ((ethernet_header_t *) p[2]);
+ key[3] = eth_get_key ((ethernet_header_t *) p[3]);
+
+ hash[0] = ho_hash (key[0]);
+ hash[1] = ho_hash (key[1]);
+ hash[2] = ho_hash (key[2]);
+ hash[3] = ho_hash (key[3]);
+
+ hash += 4;
+ n_left_from -= 4;
+ p += 4;
+ }
+
+ while (n_left_from > 0)
+ {
+ u64 key;
+
+ key = eth_get_key ((ethernet_header_t *) p[0]);
+ hash[0] = ho_hash (key);
+
+ hash += 1;
+ n_left_from -= 1;
+ p += 1;
+ }
+}
+
+VNET_REGISTER_HASH_FUNCTION (handoff_eth, static) = {
+ .name = "handoff-eth",
+ .description = "Ethernet/IPv4/IPv6/MPLS headers",
+ .priority = 2,
+ .function[VNET_HASH_FN_TYPE_ETHERNET] = handoff_eth_func,
+};
+
+void
+handoff_eth_sym_func (void **p, u32 *hash, u32 n_packets)
+{
+ u32 n_left_from = n_packets;
+
+ while (n_left_from >= 8)
+ {
+ u64 key[4] = {};
+
+ clib_prefetch_load (p[4]);
+ clib_prefetch_load (p[5]);
+ clib_prefetch_load (p[6]);
+ clib_prefetch_load (p[7]);
+
+ key[0] = eth_get_sym_key ((ethernet_header_t *) p[0]);
+ key[1] = eth_get_sym_key ((ethernet_header_t *) p[1]);
+ key[2] = eth_get_sym_key ((ethernet_header_t *) p[2]);
+ key[3] = eth_get_sym_key ((ethernet_header_t *) p[3]);
+
+ hash[0] = ho_hash (key[0]);
+ hash[1] = ho_hash (key[1]);
+ hash[2] = ho_hash (key[2]);
+ hash[3] = ho_hash (key[3]);
+
+ hash += 4;
+ n_left_from -= 4;
+ p += 4;
+ }
+
+ while (n_left_from > 0)
+ {
+ u64 key;
+
+ key = eth_get_sym_key ((ethernet_header_t *) p[0]);
+ hash[0] = ho_hash (key);
+
+ hash += 1;
+ n_left_from -= 1;
+ p += 1;
+ }
+}
+
+VNET_REGISTER_HASH_FUNCTION (handoff_eth_sym, static) = {
+ .name = "handoff-eth-sym",
+ .description = "Ethernet/IPv4/IPv6/MPLS headers Symmetric",
+ .priority = 1,
+ .function[VNET_HASH_FN_TYPE_ETHERNET] = handoff_eth_sym_func,
+};
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/hash/hash.rst b/src/vnet/hash/hash.rst
new file mode 100644
index 00000000000..3db74e2f093
--- /dev/null
+++ b/src/vnet/hash/hash.rst
@@ -0,0 +1,90 @@
+.. _hash_doc:
+
+Hash Infra
+==========
+
+Overview
+________
+
+Modern physical NICs uses packet flow hash for different purposes, i.e. Receive
+Side Scaling, flow steering and interface bonding etc. NICs can also provide
+packet flow hash prepended to data packet as metadata which can be used by
+applications without recomputing the packet flow hash.
+
+As more and more services are deployed in virtualized environment, making use of
+virtual interfaces to interconnect those services.
+
+The Hash Infrastructure
+_______________________
+
+VPP implements software based hashing functionality which can be used for different
+purposes. It also provides users a centralized way to registry custom hash functions
+based on traffic profile to be used in different vpp features i.e. Multi-TXQ,
+software RSS or bonding driver.
+
+Data structures
+^^^^^^^^^^^^^^^
+
+Hashing infra provides two types of hashing functions:
+``VNET_HASH_FN_TYPE_ETHERNET`` and ``VNET_HASH_FN_TYPE_IP`` for ethernet traffic and
+IP traffic respectively.
+Hashing infra provides uniform signature to the functions to be implemented:
+
+.. code:: c
+
+ void (*vnet_hash_fn_t) (void **p, u32 *h, u32 n_packets);
+
+Here ``**p`` is the array of pointers pointing to the beginning of packet headers
+(either ethernet or ip).
+``*h`` is an empty array of size n_packets. On return, it will contain hashes.
+``n_packets`` is the number of packets pass to this function.
+
+Custom hashing functions can be registered through ``VNET_REGISTER_HASH_FUNCTION``.
+Users need to provide a name, description, priority and hashing functions for
+registration.
+
+Default hashing function is selected based on the highest priority among the registered
+hashing functions.
+
+.. code:: c
+
+ typedef struct vnet_hash_function_registration
+ {
+ const char *name;
+ const char *description;
+ int priority;
+ vnet_hash_fn_t function[VNET_HASH_FN_TYPE_N];
+
+ struct vnet_hash_function_registration *next;
+ } vnet_hash_function_registration_t;
+
+For example, ``crc32c_5tuple`` provides two hashing functions: for IP traffic and for
+ethernet traffic. It uses 5 tuples from the flow to compute the crc32 hash on it.
+
+.. code:: c
+
+ void vnet_crc32c_5tuple_ip_func (void **p, u32 *hash, u32 n_packets);
+ void vnet_crc32c_5tuple_ethernet_func (void **p, u32 *hash, u32 n_packets);
+
+ VNET_REGISTER_HASH_FUNCTION (crc32c_5tuple, static) = {
+ .name = "crc32c-5tuple",
+ .description = "IPv4/IPv6 header and TCP/UDP ports",
+ .priority = 50,
+ .function[VNET_HASH_FN_TYPE_ETHERNET] = vnet_crc32c_5tuple_ethernet_func,
+ .function[VNET_HASH_FN_TYPE_IP] = vnet_crc32c_5tuple_ip_func,
+ };
+
+
+Users can see all the registered hash functions along with priority and description.
+
+Hash API
+^^^^^^^^
+
+There is no Hash API at the moment.
+
+Hash CLI
+^^^^^^^^
+
+::
+
+ show hash
diff --git a/src/vnet/hash/hash_eth.c b/src/vnet/hash/hash_eth.c
new file mode 100644
index 00000000000..1ac8b66a1bc
--- /dev/null
+++ b/src/vnet/hash/hash_eth.c
@@ -0,0 +1,326 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#define _GNU_SOURCE
+#include <stdint.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/ip6_hop_by_hop_packet.h>
+#include <vnet/tcp/tcp_packet.h>
+#include <vppinfra/lb_hash_hash.h>
+#include <vnet/hash/hash.h>
+
+static_always_inline u16 *
+locate_ethertype (ethernet_header_t *eth)
+{
+ u16 *ethertype_p;
+ ethernet_vlan_header_t *vlan;
+
+ if (!ethernet_frame_is_tagged (clib_net_to_host_u16 (eth->type)))
+ {
+ ethertype_p = &eth->type;
+ }
+ else
+ {
+ vlan = (void *) (eth + 1);
+ ethertype_p = &vlan->type;
+ if (*ethertype_p == ntohs (ETHERNET_TYPE_VLAN))
+ {
+ vlan++;
+ ethertype_p = &vlan->type;
+ }
+ }
+ return ethertype_p;
+}
+
+static void
+hash_eth_l2 (void **p, u32 *hash, u32 n_packets)
+{
+ u32 n_left_from = n_packets;
+
+ while (n_left_from >= 8)
+ {
+ ethernet_header_t *eth = *p;
+ u64 *dst = (u64 *) &eth->dst_address[0];
+ u64 a = clib_mem_unaligned (dst, u64);
+ u32 *src = (u32 *) &eth->src_address[2];
+ u32 b = clib_mem_unaligned (src, u32);
+
+ clib_prefetch_load (p[4]);
+ clib_prefetch_load (p[5]);
+ clib_prefetch_load (p[6]);
+ clib_prefetch_load (p[7]);
+
+ hash[0] = lb_hash_hash_2_tuples (a, b);
+ hash[1] = lb_hash_hash_2_tuples (a, b);
+ hash[2] = lb_hash_hash_2_tuples (a, b);
+ hash[3] = lb_hash_hash_2_tuples (a, b);
+
+ hash += 4;
+ n_left_from -= 4;
+ p += 4;
+ }
+
+ while (n_left_from > 0)
+ {
+ ethernet_header_t *eth = *p;
+ u64 *dst = (u64 *) &eth->dst_address[0];
+ u64 a = clib_mem_unaligned (dst, u64);
+ u32 *src = (u32 *) &eth->src_address[2];
+ u32 b = clib_mem_unaligned (src, u32);
+
+ hash[0] = lb_hash_hash_2_tuples (a, b);
+
+ hash += 1;
+ n_left_from -= 1;
+ p += 1;
+ }
+}
+
+static_always_inline u32
+hash_eth_l23_inline (void **p)
+{
+ ethernet_header_t *eth = *p;
+ u8 ip_version;
+ ip4_header_t *ip4;
+ u16 ethertype, *ethertype_p;
+ u32 *mac1, *mac2, *mac3;
+ u32 hash;
+
+ ethertype_p = locate_ethertype (eth);
+ ethertype = clib_mem_unaligned (ethertype_p, u16);
+
+ if ((ethertype != htons (ETHERNET_TYPE_IP4)) &&
+ (ethertype != htons (ETHERNET_TYPE_IP6)))
+ {
+ hash_eth_l2 (p, &hash, 1);
+ return hash;
+ }
+
+ ip4 = (ip4_header_t *) (ethertype_p + 1);
+ ip_version = (ip4->ip_version_and_header_length >> 4);
+
+ if (ip_version == 0x4)
+ {
+ u32 a;
+
+ mac1 = (u32 *) &eth->dst_address[0];
+ mac2 = (u32 *) &eth->dst_address[4];
+ mac3 = (u32 *) &eth->src_address[2];
+
+ a = clib_mem_unaligned (mac1, u32) ^ clib_mem_unaligned (mac2, u32) ^
+ clib_mem_unaligned (mac3, u32);
+ hash = lb_hash_hash_2_tuples (
+ clib_mem_unaligned (&ip4->address_pair, u64), a);
+ return hash;
+ }
+
+ if (ip_version == 0x6)
+ {
+ u64 a;
+ ip6_header_t *ip6 = (ip6_header_t *) (eth + 1);
+
+ mac1 = (u32 *) &eth->dst_address[0];
+ mac2 = (u32 *) &eth->dst_address[4];
+ mac3 = (u32 *) &eth->src_address[2];
+
+ a = clib_mem_unaligned (mac1, u32) ^ clib_mem_unaligned (mac2, u32) ^
+ clib_mem_unaligned (mac3, u32);
+ hash = lb_hash_hash (
+ clib_mem_unaligned (&ip6->src_address.as_uword[0], uword),
+ clib_mem_unaligned (&ip6->src_address.as_uword[1], uword),
+ clib_mem_unaligned (&ip6->dst_address.as_uword[0], uword),
+ clib_mem_unaligned (&ip6->dst_address.as_uword[1], uword), a);
+ return hash;
+ }
+
+ hash_eth_l2 (p, &hash, 1);
+ return hash;
+}
+
+static void
+hash_eth_l23 (void **p, u32 *hash, u32 n_packets)
+{
+ u32 n_left_from = n_packets;
+
+ while (n_left_from >= 8)
+ {
+ clib_prefetch_load (p[4]);
+ clib_prefetch_load (p[5]);
+ clib_prefetch_load (p[6]);
+ clib_prefetch_load (p[7]);
+
+ hash[0] = hash_eth_l23_inline (&p[0]);
+ hash[1] = hash_eth_l23_inline (&p[1]);
+ hash[2] = hash_eth_l23_inline (&p[2]);
+ hash[3] = hash_eth_l23_inline (&p[3]);
+
+ hash += 4;
+ n_left_from -= 4;
+ p += 4;
+ }
+
+ while (n_left_from > 0)
+ {
+ hash[0] = hash_eth_l23_inline (&p[0]);
+
+ hash += 1;
+ n_left_from -= 1;
+ p += 1;
+ }
+}
+
+static_always_inline u32
+hash_eth_l34_inline (void **p)
+{
+ ethernet_header_t *eth = *p;
+ u8 ip_version;
+ uword is_tcp_udp;
+ ip4_header_t *ip4;
+ u16 ethertype, *ethertype_p;
+ u32 hash;
+
+ ethertype_p = locate_ethertype (eth);
+ ethertype = clib_mem_unaligned (ethertype_p, u16);
+
+ if ((ethertype != htons (ETHERNET_TYPE_IP4)) &&
+ (ethertype != htons (ETHERNET_TYPE_IP6)))
+ {
+ hash_eth_l2 (p, &hash, 1);
+ return hash;
+ }
+
+ ip4 = (ip4_header_t *) (ethertype_p + 1);
+ ip_version = (ip4->ip_version_and_header_length >> 4);
+
+ if (ip_version == 0x4)
+ {
+ u32 a, t1, t2;
+ tcp_header_t *tcp = (void *) (ip4 + 1);
+
+ is_tcp_udp = (ip4->protocol == IP_PROTOCOL_TCP) ||
+ (ip4->protocol == IP_PROTOCOL_UDP);
+ t1 = is_tcp_udp ? clib_mem_unaligned (&tcp->src, u16) : 0;
+ t2 = is_tcp_udp ? clib_mem_unaligned (&tcp->dst, u16) : 0;
+ a = t1 ^ t2;
+ hash = lb_hash_hash_2_tuples (
+ clib_mem_unaligned (&ip4->address_pair, u64), a);
+ return hash;
+ }
+
+ if (ip_version == 0x6)
+ {
+ u64 a;
+ u32 t1, t2;
+ ip6_header_t *ip6 = (ip6_header_t *) (eth + 1);
+ tcp_header_t *tcp = (void *) (ip6 + 1);
+
+ is_tcp_udp = 0;
+ if (PREDICT_TRUE ((ip6->protocol == IP_PROTOCOL_TCP) ||
+ (ip6->protocol == IP_PROTOCOL_UDP)))
+ {
+ is_tcp_udp = 1;
+ tcp = (void *) (ip6 + 1);
+ }
+ else if (ip6->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ {
+ ip6_hop_by_hop_header_t *hbh = (ip6_hop_by_hop_header_t *) (ip6 + 1);
+ if ((hbh->protocol == IP_PROTOCOL_TCP) ||
+ (hbh->protocol == IP_PROTOCOL_UDP))
+ {
+ is_tcp_udp = 1;
+ tcp = (tcp_header_t *) ((u8 *) hbh + ((hbh->length + 1) << 3));
+ }
+ }
+ t1 = is_tcp_udp ? clib_mem_unaligned (&tcp->src, u16) : 0;
+ t2 = is_tcp_udp ? clib_mem_unaligned (&tcp->dst, u16) : 0;
+ a = t1 ^ t2;
+ hash = lb_hash_hash (
+ clib_mem_unaligned (&ip6->src_address.as_uword[0], uword),
+ clib_mem_unaligned (&ip6->src_address.as_uword[1], uword),
+ clib_mem_unaligned (&ip6->dst_address.as_uword[0], uword),
+ clib_mem_unaligned (&ip6->dst_address.as_uword[1], uword), a);
+ return hash;
+ }
+
+ hash_eth_l2 (p, &hash, 1);
+ return hash;
+}
+
+static void
+hash_eth_l34 (void **p, u32 *hash, u32 n_packets)
+{
+ u32 n_left_from = n_packets;
+
+ while (n_left_from >= 8)
+ {
+ clib_prefetch_load (p[4]);
+ clib_prefetch_load (p[5]);
+ clib_prefetch_load (p[6]);
+ clib_prefetch_load (p[7]);
+
+ hash[0] = hash_eth_l34_inline (&p[0]);
+ hash[1] = hash_eth_l34_inline (&p[1]);
+ hash[2] = hash_eth_l34_inline (&p[2]);
+ hash[3] = hash_eth_l34_inline (&p[3]);
+
+ hash += 4;
+ n_left_from -= 4;
+ p += 4;
+ }
+
+ while (n_left_from > 0)
+ {
+ hash[0] = hash_eth_l34_inline (&p[0]);
+
+ hash += 1;
+ n_left_from -= 1;
+ p += 1;
+ }
+}
+
+VNET_REGISTER_HASH_FUNCTION (hash_eth_l2, static) = {
+ .name = "hash-eth-l2",
+ .description = "Hash ethernet L2 headers",
+ .priority = 50,
+ .function[VNET_HASH_FN_TYPE_ETHERNET] = hash_eth_l2,
+};
+
+VNET_REGISTER_HASH_FUNCTION (hash_eth_l23, static) = {
+ .name = "hash-eth-l23",
+ .description = "Hash ethernet L23 headers",
+ .priority = 50,
+ .function[VNET_HASH_FN_TYPE_ETHERNET] = hash_eth_l23,
+};
+
+VNET_REGISTER_HASH_FUNCTION (hash_eth_l34, static) = {
+ .name = "hash-eth-l34",
+ .description = "Hash ethernet L34 headers",
+ .priority = 50,
+ .function[VNET_HASH_FN_TYPE_ETHERNET] = hash_eth_l34,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/hdlc/hdlc.c b/src/vnet/hdlc/hdlc.c
index fa1e7cd5eaf..443a0396e9e 100644
--- a/src/vnet/hdlc/hdlc.c
+++ b/src/vnet/hdlc/hdlc.c
@@ -197,7 +197,6 @@ hdlc_build_rewrite (vnet_main_t * vnm,
return (rewrite);
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (hdlc_hw_interface_class) = {
.name = "HDLC",
.format_header = format_hdlc_header_with_length,
@@ -205,7 +204,6 @@ VNET_HW_INTERFACE_CLASS (hdlc_hw_interface_class) = {
.build_rewrite = hdlc_build_rewrite,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
-/* *INDENT-ON* */
static void
add_protocol (hdlc_main_t * pm, hdlc_protocol_t protocol, char *protocol_name)
diff --git a/src/vnet/hdlc/node.c b/src/vnet/hdlc/node.c
index 8bb621231c7..48269a3b8d3 100644
--- a/src/vnet/hdlc/node.c
+++ b/src/vnet/hdlc/node.c
@@ -279,7 +279,6 @@ static char *hdlc_error_strings[] = {
#undef hdlc_error
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (hdlc_input_node) = {
.function = hdlc_input,
.name = "hdlc-input",
@@ -302,7 +301,6 @@ VLIB_REGISTER_NODE (hdlc_input_node) = {
.format_trace = format_hdlc_input_trace,
.unformat_buffer = unformat_hdlc_header,
};
-/* *INDENT-ON* */
static clib_error_t *
hdlc_input_runtime_init (vlib_main_t * vm)
diff --git a/src/vnet/interface.api b/src/vnet/interface.api
index d89dea4e353..eea86aa1ac8 100644
--- a/src/vnet/interface.api
+++ b/src/vnet/interface.api
@@ -458,6 +458,29 @@ autoreply define sw_interface_set_rx_placement
bool is_main;
};
+/** \brief Set an interface's tx-placement
+ Tx-Queue placement on specific thread is operational for only hardware
+ interface. It will not set queue - thread placement for sub-interfaces,
+ p2p and pipe interfaces.
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - the interface whose tx-placement will be set
+ @param queue_id - the queue number whose tx-placement will be set.
+ @param array_size - the size of the thread indexes array
+ @param threads - the thread indexes of main and worker(s) threads
+ whom tx-placement will be at.
+*/
+autoendian autoreply define sw_interface_set_tx_placement
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ u32 queue_id;
+ u32 array_size;
+ u32 threads[array_size];
+ option vat_help = "<interface | sw_if_index <index>> queue <n> [threads <list> | mask <hex>]";
+};
+
/** \brief Set custom interface name
Set custom interface name for the interface.
@param client_index - opaque cookie to identify the sender
@@ -512,6 +535,60 @@ define sw_interface_rx_placement_details
vl_api_rx_mode_t mode;
};
+service {
+ rpc sw_interface_tx_placement_get returns sw_interface_tx_placement_get_reply
+ stream sw_interface_tx_placement_details;
+};
+
+/** \brief get the tx queue placement of interface(s)
+ @param cursor - optional, it allows client to continue a dump
+ @param sw_if_index - optional interface index for which queue placement to
+ be requested. sw_if_index = ~0 will get the placement information for all
+ interfaces. It will not get information related to sub-interfaces, p2p
+ and pipe interfaces.
+*/
+autoendian define sw_interface_tx_placement_get
+{
+ u32 client_index;
+ u32 context;
+ u32 cursor;
+ vl_api_interface_index_t sw_if_index;
+ option vat_help = "[interface | sw_if_index <index>]";
+};
+
+autoendian define sw_interface_tx_placement_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 cursor;
+};
+
+/** \brief show the interface's queue - thread placement
+ This api is used to display the interface and queue worker
+ thread placement. One message per tx-queue per interface will
+ be sent to client.
+ Each message will contain information about tx-queue id of an
+ interface, interface index, thread on which this tx-queue is
+ placed and mode of tx-queue.
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - the interface whose tx-placement will be dumped
+ @param queue_id - the queue id
+ @param shared - the queue is shared on other threads
+ @param array_size - the size of the threads array
+ @param threads - the main and worker(s) thread index(es) whom tx-placement are at.
+*/
+autoendian define sw_interface_tx_placement_details
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ u32 queue_id;
+ u8 shared;
+ u32 array_size;
+ u32 threads[array_size];
+};
+
/* Gross kludge, DGMS */
autoreply define interface_name_renumber
{
@@ -656,6 +733,61 @@ autoreply define collect_detailed_interface_stats
bool enable_disable;
};
+/** \brief pcap_set_filter_function
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param filter_function_name - the name of the filter function
+ to set for pcap capture
+*/
+autoreply define pcap_set_filter_function
+{
+ u32 client_index;
+ u32 context;
+
+ string filter_function_name[];
+};
+
+/** \brief pcap_trace_on
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param capture_rx - capture received packets
+ @param capture_tx - capture transmitted packets
+ @param capture_drop - capture dropped packets
+ @param filter - is a filter is being used on this capture
+ @param preallocate_data - preallocate the data buffer
+ @param free_data - free the data buffer
+ @param max_packets - depth of local buffer
+ @param max_bytes_per_packet - maximum number of bytes to capture
+ for each packet
+ @param sw_if_index - specify a given interface, or 0 for any
+ @param error - filter packets based on a specific error.
+ @param filename - output filename, will be placed in /tmp
+*/
+autoreply define pcap_trace_on
+{
+ u32 client_index;
+ u32 context;
+ bool capture_rx;
+ bool capture_tx;
+ bool capture_drop;
+ bool filter;
+ bool preallocate_data;
+ bool free_data;
+ u32 max_packets [default=1000];
+ u32 max_bytes_per_packet [default=512];
+ vl_api_interface_index_t sw_if_index;
+ string error[128];
+ string filename[64];
+
+ option vat_help = "pcap_trace_on [capture_rx] [capture_tx] [capture_drop] [max_packets <nn>] [sw_if_index <sw_if_index>|0 for any] [error <node>.<error>] [filename <name>] [max_bytes_per_packet <nnnn>] [filter] [preallocate_data] [free_data]";
+};
+
+autoreply define pcap_trace_off
+{
+ u32 client_index;
+ u32 context;
+};
+
/*
* Local Variables:
* eval: (c-set-style "gnu")
diff --git a/src/vnet/interface.c b/src/vnet/interface.c
index ab12da563a5..5fb2ff65fa2 100644
--- a/src/vnet/interface.c
+++ b/src/vnet/interface.c
@@ -45,11 +45,9 @@
#include <vnet/interface/rx_queue_funcs.h>
#include <vnet/interface/tx_queue_funcs.h>
-/* *INDENT-OFF* */
VLIB_REGISTER_LOG_CLASS (if_default_log, static) = {
.class_name = "interface",
};
-/* *INDENT-ON* */
#define log_debug(fmt,...) vlib_log_debug(if_default_log.class, fmt, __VA_ARGS__)
#define log_err(fmt,...) vlib_log_err(if_default_log.class, fmt, __VA_ARGS__)
@@ -141,15 +139,12 @@ serialize_vnet_interface_state (serialize_main_t * m, va_list * va)
/* Serialize hardware interface classes since they may have changed.
Must do this before sending up/down flags. */
- /* *INDENT-OFF* */
pool_foreach (hif, im->hw_interfaces) {
vnet_hw_interface_class_t * hw_class = vnet_get_hw_interface_class (vnm, hif->hw_class_index);
serialize_cstring (m, hw_class->name);
}
- /* *INDENT-ON* */
/* Send sw/hw interface state when non-zero. */
- /* *INDENT-OFF* */
pool_foreach (sif, im->sw_interfaces) {
if (sif->flags != 0)
{
@@ -158,14 +153,12 @@ serialize_vnet_interface_state (serialize_main_t * m, va_list * va)
st->flags = sif->flags;
}
}
- /* *INDENT-ON* */
vec_serialize (m, sts, serialize_vec_vnet_sw_hw_interface_state);
if (sts)
- _vec_len (sts) = 0;
+ vec_set_len (sts, 0);
- /* *INDENT-OFF* */
pool_foreach (hif, im->hw_interfaces) {
if (hif->flags != 0)
{
@@ -174,7 +167,6 @@ serialize_vnet_interface_state (serialize_main_t * m, va_list * va)
st->flags = vnet_hw_interface_flags_to_sw(hif->flags);
}
}
- /* *INDENT-ON* */
vec_serialize (m, sts, serialize_vec_vnet_sw_hw_interface_state);
@@ -206,7 +198,6 @@ unserialize_vnet_interface_state (serialize_main_t * m, va_list * va)
uword *p;
clib_error_t *error;
- /* *INDENT-OFF* */
pool_foreach (hif, im->hw_interfaces) {
unserialize_cstring (m, &class_name);
p = hash_get_mem (im->hw_interface_class_by_name, class_name);
@@ -222,7 +213,6 @@ unserialize_vnet_interface_state (serialize_main_t * m, va_list * va)
clib_error_report (error);
vec_free (class_name);
}
- /* *INDENT-ON* */
}
vec_unserialize (m, &sts, unserialize_vec_vnet_sw_hw_interface_state);
@@ -655,6 +645,7 @@ vnet_create_sw_interface (vnet_main_t * vnm, vnet_sw_interface_t * template,
/* undo the work done by vnet_create_sw_interface_no_callbacks() */
log_err ("create_sw_interface: set flags failed\n %U",
format_clib_error, error);
+ call_sw_interface_add_del_callbacks (vnm, *sw_if_index, 0);
vnet_sw_interface_t *sw =
pool_elt_at_index (im->sw_interfaces, *sw_if_index);
pool_put (im->sw_interfaces, sw);
@@ -768,18 +759,40 @@ sw_interface_walk_callback (vnet_main_t * vnm, u32 sw_if_index, void *ctx)
return WALK_CONTINUE;
}
-void
-vnet_hw_interface_set_mtu (vnet_main_t * vnm, u32 hw_if_index, u32 mtu)
+clib_error_t *
+vnet_hw_interface_set_max_frame_size (vnet_main_t *vnm, u32 hw_if_index,
+ u32 fs)
{
vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+ vnet_hw_interface_class_t *hw_if_class =
+ vnet_get_hw_interface_class (vnm, hi->hw_class_index);
+ clib_error_t *err = 0;
+ log_debug ("set_max_frame_size: interface %v, max_frame_size %u -> %u",
+ hi->name, hi->max_frame_size, fs);
- if (hi->max_packet_bytes != mtu)
+ if (hw_if_class->set_max_frame_size == 0)
+ return vnet_error (VNET_ERR_UNSUPPORTED,
+ "hw class doesn't support changing Max Frame Size");
+
+ if (hi->max_frame_size != fs)
{
- hi->max_packet_bytes = mtu;
- ethernet_set_flags (vnm, hw_if_index, ETHERNET_INTERFACE_FLAG_MTU);
+ u32 mtu;
+ if (hw_if_class->set_max_frame_size)
+ if ((err = hw_if_class->set_max_frame_size (vnm, hi, fs)))
+ return err;
+ hi->max_frame_size = fs;
+ mtu = fs - hi->frame_overhead;
vnet_hw_interface_walk_sw (vnm, hw_if_index, sw_interface_walk_callback,
&mtu);
}
+ return 0;
+}
+clib_error_t *
+vnet_hw_interface_set_mtu (vnet_main_t *vnm, u32 hw_if_index, u32 mtu)
+{
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+ return vnet_hw_interface_set_max_frame_size (vnm, hw_if_index,
+ mtu + hi->frame_overhead);
}
static void
@@ -804,6 +817,36 @@ setup_output_node (vlib_main_t * vm,
n->unformat_buffer = hw_class->unformat_header;
}
+void
+vnet_reset_interface_l3_output_node (vlib_main_t *vm, u32 sw_if_index)
+{
+ vnet_set_interface_l3_output_node (vm, sw_if_index,
+ (u8 *) "interface-output");
+}
+
+void
+vnet_set_interface_l3_output_node (vlib_main_t *vm, u32 sw_if_index,
+ u8 *output_node)
+{
+ vlib_node_t *l3_node;
+
+ l3_node = vlib_get_node_by_name (vm, output_node);
+
+ static char *arcs[] = {
+ "ip4-output",
+ "ip6-output",
+ "mpls-output",
+ "ethernet-output",
+ };
+ u8 a;
+
+ for (a = 0; a < ARRAY_LEN (arcs); a++)
+ {
+ u8 arc = vnet_get_feature_arc_index (arcs[a]);
+ vnet_feature_modify_end_node (arc, sw_if_index, l3_node->index);
+ }
+}
+
/* Register an interface instance. */
u32
vnet_register_interface (vnet_main_t * vnm,
@@ -821,7 +864,6 @@ vnet_register_interface (vnet_main_t * vnm,
vnet_feature_config_main_t *fcm;
vnet_config_main_t *cm;
u32 hw_index, i;
- char *tx_node_name = NULL, *output_node_name = NULL;
vlib_node_t *if_out_node =
vlib_get_node (vm, vnet_interface_output_node.index);
@@ -833,6 +875,10 @@ vnet_register_interface (vnet_main_t * vnm,
hw->hw_if_index = hw_index;
hw->default_rx_mode = VNET_HW_IF_RX_MODE_POLLING;
+ if (hw_class->tx_hash_fn_type == VNET_HASH_FN_TYPE_ETHERNET ||
+ hw_class->tx_hash_fn_type == VNET_HASH_FN_TYPE_IP)
+ hw->hf = vnet_hash_default_function (hw_class->tx_hash_fn_type);
+
if (dev_class->format_device_name)
hw->name = format (0, "%U", dev_class->format_device_name, dev_instance);
else if (hw_class->format_interface_name)
@@ -864,15 +910,11 @@ vnet_register_interface (vnet_main_t * vnm,
hw->hw_instance = hw_instance;
hw->max_rate_bits_per_sec = 0;
- hw->min_packet_bytes = 0;
vnet_sw_interface_set_mtu (vnm, hw->sw_if_index, 0);
if (dev_class->tx_function == 0 && dev_class->tx_fn_registrations == 0)
goto no_output_nodes; /* No output/tx nodes to create */
- tx_node_name = (char *) format (0, "%v-tx", hw->name);
- output_node_name = (char *) format (0, "%v-output", hw->name);
-
/* If we have previously deleted interface nodes, re-use them. */
if (vec_len (im->deleted_hw_interface_nodes) > 0)
{
@@ -885,8 +927,8 @@ vnet_register_interface (vnet_main_t * vnm,
hw->tx_node_index = hn->tx_node_index;
hw->output_node_index = hn->output_node_index;
- vlib_node_rename (vm, hw->tx_node_index, "%v", tx_node_name);
- vlib_node_rename (vm, hw->output_node_index, "%v", output_node_name);
+ vlib_node_rename (vm, hw->tx_node_index, "%v-tx", hw->name);
+ vlib_node_rename (vm, hw->output_node_index, "%v-output", hw->name);
foreach_vlib_main ()
{
@@ -940,7 +982,7 @@ vnet_register_interface (vnet_main_t * vnm,
VLIB_NODE_RUNTIME_PERF_RESET);
}
- _vec_len (im->deleted_hw_interface_nodes) -= 1;
+ vec_dec_len (im->deleted_hw_interface_nodes, 1);
}
else
{
@@ -960,7 +1002,6 @@ vnet_register_interface (vnet_main_t * vnm,
r.vector_size = sizeof (u32);
r.flags = VLIB_NODE_FLAG_IS_OUTPUT;
- r.name = tx_node_name;
if (dev_class->tx_fn_registrations)
{
r.function = 0;
@@ -969,14 +1010,13 @@ vnet_register_interface (vnet_main_t * vnm,
else
r.function = dev_class->tx_function;
- hw->tx_node_index = vlib_register_node (vm, &r);
+ hw->tx_node_index = vlib_register_node (vm, &r, "%v-tx", hw->name);
vlib_node_add_named_next_with_slot (vm, hw->tx_node_index,
"error-drop",
VNET_INTERFACE_TX_NEXT_DROP);
r.flags = 0;
- r.name = output_node_name;
r.format_trace = format_vnet_interface_output_trace;
if (if_out_node->node_fn_registrations)
{
@@ -990,12 +1030,14 @@ vnet_register_interface (vnet_main_t * vnm,
static char *e[] = {
"interface is down",
"interface is deleted",
+ "no tx queue available",
};
r.n_errors = ARRAY_LEN (e);
r.error_strings = e;
}
- hw->output_node_index = vlib_register_node (vm, &r);
+ hw->output_node_index =
+ vlib_register_node (vm, &r, "%v-output", hw->name);
vlib_node_add_named_next_with_slot (vm, hw->output_node_index,
"error-drop",
@@ -1038,9 +1080,6 @@ no_output_nodes:
VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE);
vnet_hw_interface_set_flags_helper (vnm, hw_index, /* flags */ 0,
VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE);
- vec_free (tx_node_name);
- vec_free (output_node_name);
-
return hw_index;
}
@@ -1067,7 +1106,6 @@ vnet_delete_hw_interface (vnet_main_t * vnm, u32 hw_if_index)
/* Delete any sub-interfaces. */
{
u32 id, sw_if_index;
- /* *INDENT-OFF* */
hash_foreach (id, sw_if_index, hw->sub_interface_sw_if_index_by_id,
({
vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
@@ -1077,7 +1115,6 @@ vnet_delete_hw_interface (vnet_main_t * vnm, u32 hw_if_index)
vnet_delete_sw_interface (vnm, sw_if_index);
}));
hash_free (hw->sub_interface_sw_if_index_by_id);
- /* *INDENT-ON* */
}
/* Delete software interface corresponding to hardware interface. */
@@ -1102,11 +1139,12 @@ vnet_delete_hw_interface (vnet_main_t * vnm, u32 hw_if_index)
"interface-%d-output-deleted", hw_if_index);
vlib_node_rename (vm, hw->tx_node_index, "interface-%d-tx-deleted",
hw_if_index);
+ vlib_unregister_errors (vm, hw->output_node_index);
+ vlib_unregister_errors (vm, hw->tx_node_index);
vec_add2 (im->deleted_hw_interface_nodes, dn, 1);
dn->tx_node_index = hw->tx_node_index;
dn->output_node_index = hw->output_node_index;
}
-
hash_unset_mem (im->hw_interface_by_name, hw->name);
vec_free (hw->name);
vec_free (hw->hw_address);
@@ -1127,14 +1165,12 @@ vnet_hw_interface_walk_sw (vnet_main_t * vnm,
if (WALK_STOP == fn (vnm, hi->sw_if_index, ctx))
return;
- /* *INDENT-OFF* */
hash_foreach (id, sw_if_index,
hi->sub_interface_sw_if_index_by_id,
({
if (WALK_STOP == fn (vnm, sw_if_index, ctx))
break;
}));
- /* *INDENT-ON* */
}
void
@@ -1146,13 +1182,11 @@ vnet_hw_interface_walk (vnet_main_t * vnm,
im = &vnm->interface_main;
- /* *INDENT-OFF* */
pool_foreach (hi, im->hw_interfaces)
{
if (WALK_STOP == fn(vnm, hi->hw_if_index, ctx))
break;
}
- /* *INDENT-ON* */
}
void
@@ -1164,13 +1198,11 @@ vnet_sw_interface_walk (vnet_main_t * vnm,
im = &vnm->interface_main;
- /* *INDENT-OFF* */
pool_foreach (si, im->sw_interfaces)
{
if (WALK_STOP == fn (vnm, si, ctx))
break;
}
- /* *INDENT-ON* */
}
void
@@ -1308,7 +1340,10 @@ vnet_hw_interface_compare (vnet_main_t * vnm,
int
vnet_sw_interface_is_p2p (vnet_main_t * vnm, u32 sw_if_index)
{
- vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+ vnet_sw_interface_t *si = vnet_get_sw_interface_or_null (vnm, sw_if_index);
+ if (si == NULL)
+ return -1;
+
if ((si->type == VNET_SW_INTERFACE_TYPE_P2P) ||
(si->type == VNET_SW_INTERFACE_TYPE_PIPE))
return 1;
@@ -1353,6 +1388,26 @@ vnet_sw_interface_supports_addressing (vnet_main_t *vnm, u32 sw_if_index)
return NULL;
}
+u32
+vnet_register_device_class (vlib_main_t *vm, vnet_device_class_t *c)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ c->index = vec_len (im->device_classes);
+ hash_set_mem (im->device_class_by_name, c->name, c->index);
+
+ /* to avoid confusion, please remove ".tx_function" statement
+ from VNET_DEVICE_CLASS() if using function candidates */
+ ASSERT (c->tx_fn_registrations == 0 || c->tx_function == 0);
+
+ if (c->tx_fn_registrations)
+ c->tx_function =
+ vlib_node_get_preferred_node_fn_variant (vm, c->tx_fn_registrations);
+
+ vec_add1 (im->device_classes, c[0]);
+ return c->index;
+}
+
clib_error_t *
vnet_interface_init (vlib_main_t * vm)
{
@@ -1399,28 +1454,10 @@ vnet_interface_init (vlib_main_t * vm)
im->device_class_by_name = hash_create_string ( /* size */ 0,
sizeof (uword));
- {
- vnet_device_class_t *c;
-
- c = vnm->device_class_registrations;
-
- while (c)
- {
- c->index = vec_len (im->device_classes);
- hash_set_mem (im->device_class_by_name, c->name, c->index);
- /* to avoid confusion, please remove ".tx_function" statement
- from VNET_DEVICE_CLASS() if using function candidates */
- ASSERT (c->tx_fn_registrations == 0 || c->tx_function == 0);
-
- if (c->tx_fn_registrations)
- c->tx_function = vlib_node_get_preferred_node_fn_variant (
- vm, c->tx_fn_registrations);
-
- vec_add1 (im->device_classes, c[0]);
- c = c->next_class_registration;
- }
- }
+ for (vnet_device_class_t *c = vnm->device_class_registrations; c;
+ c = c->next_class_registration)
+ vnet_register_device_class (vm, c);
im->hw_interface_class_by_name = hash_create_string ( /* size */ 0,
sizeof (uword));
@@ -1890,13 +1927,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (collect_detailed_interface_stats_command, static) = {
.path = "interface collect detailed-stats",
.short_help = "interface collect detailed-stats <enable|disable>",
.function = collect_detailed_interface_stats_cli,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/interface.h b/src/vnet/interface.h
index 0a30239e982..f0cb540f979 100644
--- a/src/vnet/interface.h
+++ b/src/vnet/interface.h
@@ -44,6 +44,7 @@
#include <vppinfra/pcap.h>
#include <vnet/l3_types.h>
#include <vppinfra/lock.h>
+#include <vnet/hash/hash.h>
struct vnet_main_t;
struct vnet_hw_interface_t;
@@ -69,6 +70,10 @@ typedef clib_error_t *(vnet_subif_add_del_function_t)
(struct vnet_main_t * vnm, u32 if_index,
struct vnet_sw_interface_t * template, int is_add);
+/* Interface set mtu callback. */
+typedef clib_error_t *(vnet_interface_set_max_frame_size_function_t) (
+ struct vnet_main_t *vnm, struct vnet_hw_interface_t *hi, u32 mtu);
+
/* Interface set mac address callback. */
typedef clib_error_t *(vnet_interface_set_mac_address_function_t)
(struct vnet_hw_interface_t * hi,
@@ -287,6 +292,8 @@ typedef struct _vnet_device_class
} vnet_device_class_t;
+u32 vnet_register_device_class (vlib_main_t *, vnet_device_class_t *);
+
#ifndef CLIB_MARCH_VARIANT
#define VNET_DEVICE_CLASS(x,...) \
__VA_ARGS__ vnet_device_class_t x; \
@@ -315,7 +322,8 @@ static __clib_unused vnet_device_class_t __clib_unused_##x
#endif
#define VNET_DEVICE_CLASS_TX_FN(devclass) \
- uword CLIB_MARCH_SFX (devclass##_tx_fn) (); \
+ uword CLIB_MARCH_SFX (devclass##_tx_fn) ( \
+ vlib_main_t *, vlib_node_runtime_t *, vlib_frame_t *); \
static vlib_node_fn_registration_t CLIB_MARCH_SFX ( \
devclass##_tx_fn_registration) = { \
.function = &CLIB_MARCH_SFX (devclass##_tx_fn), \
@@ -410,6 +418,9 @@ typedef struct _vnet_hw_interface_class
/* Flags */
vnet_hw_interface_class_flags_t flags;
+ /* tx hash type for interfaces of this hw class */
+ vnet_hash_fn_type_t tx_hash_fn_type;
+
/* Function to call when hardware interface is added/deleted. */
vnet_interface_function_t *interface_add_del_function;
@@ -425,6 +436,9 @@ typedef struct _vnet_hw_interface_class
/* Function to add/delete additional MAC addresses */
vnet_interface_add_del_mac_address_function_t *mac_addr_add_del_function;
+ /* Function to set max frame size. */
+ vnet_interface_set_max_frame_size_function_t *set_max_frame_size;
+
/* Format function to display interface name. */
format_function_t *format_interface_name;
@@ -515,60 +529,60 @@ typedef enum vnet_hw_interface_flags_t_
VNET_HW_INTERFACE_FLAG_NBMA = (1 << 19),
} vnet_hw_interface_flags_t;
-typedef enum vnet_hw_interface_capabilities_t_
+#define foreach_vnet_hw_if_caps \
+ _ (0, TX_IP4_CKSUM, "ip4-csum-tx") \
+ _ (1, TX_TCP_CKSUM, "tcp-csum-tx") \
+ _ (2, TX_UDP_CKSUM, "udp-csum-tx") \
+ _ (3, TX_IP4_OUTER_CKSUM, "outer-ip4-csum-tx") \
+ _ (4, TX_UDP_OUTER_CKSUM, "outer-udp-csum-tx") \
+ _ (5, RX_IP4_CKSUM, "ip4-csum-rx") \
+ _ (6, RX_TCP_CKSUM, "tcp-csum-rx") \
+ _ (7, RX_UDP_CKSUM, "udp-csum-rx") \
+ _ (8, RX_IP4_OUTER_CKSUM, "outer-ip4-csum-rx") \
+ _ (9, RX_UDP_OUTER_CKSUM, "outer-udp-csum-rx") \
+ _ (10, TCP_GSO, "tcp-tso") \
+ _ (11, UDP_GSO, "udp-gso") \
+ _ (12, VXLAN_TNL_GSO, "vxlan-tnl-gso") \
+ _ (13, IPIP_TNL_GSO, "ipip-tnl-gso") \
+ _ (14, GENEVE_TNL_GSO, "geneve-tnl-gso") \
+ _ (15, GRE_TNL_GSO, "gre-tnl-gso") \
+ _ (16, UDP_TNL_GSO, "udp-tnl-gso") \
+ _ (17, IP_TNL_GSO, "ip-tnl-gso") \
+ _ (18, TCP_LRO, "tcp-lro") \
+ _ (30, INT_MODE, "int-mode") \
+ _ (31, MAC_FILTER, "mac-filter")
+
+typedef enum vnet_hw_if_caps_t_
{
VNET_HW_INTERFACE_CAP_NONE,
+#define _(bit, sfx, str) VNET_HW_IF_CAP_##sfx = (1 << (bit)),
+ foreach_vnet_hw_if_caps
+#undef _
+
+} vnet_hw_if_caps_t;
+
+#define VNET_HW_IF_CAP_L4_TX_CKSUM \
+ (VNET_HW_IF_CAP_TX_TCP_CKSUM | VNET_HW_IF_CAP_TX_UDP_CKSUM)
+
+#define VNET_HW_IF_CAP_TX_CKSUM \
+ (VNET_HW_IF_CAP_TX_IP4_CKSUM | VNET_HW_IF_CAP_TX_TCP_CKSUM | \
+ VNET_HW_IF_CAP_TX_UDP_CKSUM)
+
+#define VNET_HW_IF_CAP_TX_OUTER_CKSUM \
+ (VNET_HW_IF_CAP_TX_IP4_OUTER_CKSUM | VNET_HW_IF_CAP_TX_UDP_OUTER_CKSUM)
- /* tx checksum offload */
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_IP4_CKSUM = (1 << 0),
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM = (1 << 1),
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM = (1 << 2),
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_IP4_OUTER_CKSUM = (1 << 3),
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_OUTER_CKSUM = (1 << 4),
-
- /* rx checksum offload */
- VNET_HW_INTERFACE_CAP_SUPPORTS_RX_IP4_CKSUM = (1 << 5),
- VNET_HW_INTERFACE_CAP_SUPPORTS_RX_UDP_CKSUM = (1 << 6),
- VNET_HW_INTERFACE_CAP_SUPPORTS_RX_TCP_CKSUM = (1 << 7),
- VNET_HW_INTERFACE_CAP_SUPPORTS_RX_IP4_OUTER_CKSUM = (1 << 8),
- VNET_HW_INTERFACE_CAP_SUPPORTS_RX_UDP_OUTER_CKSUM = (1 << 9),
-
- /* gso */
- VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO = (1 << 10),
- VNET_HW_INTERFACE_CAP_SUPPORTS_UDP_GSO = (1 << 11),
- VNET_HW_INTERFACE_CAP_SUPPORTS_VXLAN_TNL_GSO = (1 << 12),
- VNET_HW_INTERFACE_CAP_SUPPORTS_IPIP_TNL_GSO = (1 << 13),
- VNET_HW_INTERFACE_CAP_SUPPORTS_GENEVE_TNL_GSO = (1 << 14),
- VNET_HW_INTERFACE_CAP_SUPPORTS_GRE_TNL_GSO = (1 << 15),
- VNET_HW_INTERFACE_CAP_SUPPORTS_UDP_TNL_GSO = (1 << 16),
- VNET_HW_INTERFACE_CAP_SUPPORTS_IP_TNL_GSO = (1 << 17),
-
- /* lro */
- VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_LRO = (1 << 18),
-
- /* rx mode */
- VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE = (1 << 30),
- /* hw/driver can switch between l2-promisc and l3-dmac-filter modes */
- VNET_HW_INTERFACE_CAP_SUPPORTS_MAC_FILTER = (1 << 31),
-} vnet_hw_interface_capabilities_t;
-
-#define VNET_HW_INTERFACE_CAP_SUPPORTS_L4_TX_CKSUM \
- (VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM | \
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM)
-
-#define VNET_HW_INTERFACE_CAP_SUPPORTS_TX_CKSUM \
- (VNET_HW_INTERFACE_CAP_SUPPORTS_TX_IP4_CKSUM | \
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM | \
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM)
-
-#define VNET_HW_INTERFACE_CAP_SUPPORTS_L4_RX_CKSUM \
- (VNET_HW_INTERFACE_CAP_SUPPORTS_RX_TCP_CKSUM | \
- VNET_HW_INTERFACE_CAP_SUPPORTS_RX_UDP_CKSUM)
-
-#define VNET_HW_INTERFACE_CAP_SUPPORTS_RX_CKSUM \
- (VNET_HW_INTERFACE_CAP_SUPPORTS_RX_IP4_CKSUM | \
- VNET_HW_INTERFACE_CAP_SUPPORTS_RX_TCP_CKSUM | \
- VNET_HW_INTERFACE_CAP_SUPPORTS_RX_UDP_CKSUM)
+#define VNET_HW_IF_CAP_TX_CKSUM_MASK \
+ (VNET_HW_IF_CAP_TX_CKSUM | VNET_HW_IF_CAP_TX_OUTER_CKSUM)
+
+#define VNET_HW_IF_CAP_L4_RX_CKSUM \
+ (VNET_HW_IF_CAP_RX_TCP_CKSUM | VNET_HW_IF_CAP_RX_UDP_CKSUM)
+
+#define VNET_HW_IF_CAP_RX_CKSUM \
+ (VNET_HW_IF_CAP_RX_IP4_CKSUM | VNET_HW_IF_CAP_RX_TCP_CKSUM | \
+ VNET_HW_IF_CAP_RX_UDP_CKSUM)
+
+#define VNET_HW_IF_CAP_TNL_GSO_MASK \
+ VNET_HW_IF_CAP_VXLAN_TNL_GSO | VNET_HW_IF_CAP_IPIP_TNL_GSO
#define VNET_HW_INTERFACE_FLAG_DUPLEX_SHIFT 1
#define VNET_HW_INTERFACE_FLAG_SPEED_SHIFT 3
@@ -629,8 +643,9 @@ typedef struct
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- vnet_hw_if_tx_frame_t frame;
- u32 n_threads;
+ vnet_hw_if_tx_frame_t *frame;
+ u32 *lookup_table;
+ u32 n_queues;
} vnet_hw_if_output_node_runtime_t;
/* Hardware-interface. This corresponds to a physical wire
@@ -642,7 +657,7 @@ typedef struct vnet_hw_interface_t
vnet_hw_interface_flags_t flags;
/* capabilities flags */
- vnet_hw_interface_capabilities_t caps;
+ vnet_hw_if_caps_t caps;
/* Hardware address as vector. Zero (e.g. zero-length vector) if no
address for this class (e.g. PPP). */
@@ -684,20 +699,20 @@ typedef struct vnet_hw_interface_t
used by node function vnet_per_buffer_interface_output() */
u32 output_node_next_index;
+ /* called when hw interface is using transmit side packet steering */
+ vnet_hash_fn_t hf;
+
/* Maximum transmit rate for this interface in bits/sec. */
f64 max_rate_bits_per_sec;
- /* Smallest packet size supported by this interface. */
- u32 min_supported_packet_bytes;
-
- /* Largest packet size supported by this interface. */
- u32 max_supported_packet_bytes;
-
/* Smallest packet size for this interface. */
- u32 min_packet_bytes;
+ u32 min_frame_size;
+
+ /* Largest frame size for this interface. */
+ u32 max_frame_size;
- /* Largest packet size for this interface. */
- u32 max_packet_bytes;
+ /* Layer 2 overhead */
+ u16 frame_overhead;
/* Hash table mapping sub interface id to sw_if_index. */
uword *sub_interface_sw_if_index_by_id;
diff --git a/src/vnet/interface/caps.c b/src/vnet/interface/caps.c
new file mode 100644
index 00000000000..54e8d90c471
--- /dev/null
+++ b/src/vnet/interface/caps.c
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/interface.h>
+
+VLIB_REGISTER_LOG_CLASS (if_caps_log, static) = {
+ .class_name = "interface",
+ .subclass_name = "caps",
+};
+
+#define log_debug(fmt, ...) \
+ vlib_log_debug (if_caps_log.class, fmt, __VA_ARGS__)
+
+format_function_t format_vnet_hw_if_caps;
+
+void
+vnet_hw_if_change_caps (vnet_main_t *vnm, u32 hw_if_index,
+ vnet_hw_if_caps_change_t *caps)
+{
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+ vnet_hw_if_caps_t old = hi->caps;
+
+ hi->caps = (hi->caps & ~caps->mask) | caps->val;
+
+ log_debug ("change: interface %U, set: %U, cleared: %U",
+ format_vnet_hw_if_index_name, vnm, hw_if_index,
+ format_vnet_hw_if_caps, (old ^ hi->caps) & caps->val,
+ format_vnet_hw_if_caps, (old ^ hi->caps) & ~caps->val);
+}
+
+u8 *
+format_vnet_hw_if_caps (u8 *s, va_list *va)
+{
+ vnet_hw_if_caps_t caps = va_arg (*va, vnet_hw_if_caps_t);
+
+ const char *strings[sizeof (vnet_hw_if_caps_t) * 8] = {
+#define _(bit, sfx, str) [bit] = (str),
+ foreach_vnet_hw_if_caps
+#undef _
+ };
+
+ if (caps == 0)
+ return format (s, "none");
+
+ while (caps)
+ {
+ int bit = get_lowest_set_bit_index (caps);
+
+ if (strings[bit])
+ s = format (s, "%s", strings[bit]);
+ else
+ s = format (s, "unknown-%u", bit);
+
+ caps = clear_lowest_set_bit (caps);
+ if (caps)
+ vec_add1 (s, ' ');
+ }
+
+ return s;
+}
diff --git a/src/vnet/interface/monitor.c b/src/vnet/interface/monitor.c
new file mode 100644
index 00000000000..3ae1fd29156
--- /dev/null
+++ b/src/vnet/interface/monitor.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vlib/vlib.h>
+
+static clib_error_t *
+monitor_interface_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ const vnet_main_t *vnm = vnet_get_main ();
+ const vlib_combined_counter_main_t *counters =
+ vnm->interface_main.combined_sw_if_counters;
+ f64 refresh_interval = 1.0;
+ u32 refresh_count = ~0;
+ clib_error_t *error = 0;
+ vlib_counter_t vrx[2], vtx[2];
+ f64 ts[2];
+ u32 hw_if_index = ~0;
+ u8 spin = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_vnet_hw_interface, vnm,
+ &hw_if_index))
+ ;
+ else if (unformat (input, "interval %f", &refresh_interval))
+ ;
+ else if (unformat (input, "count %u", &refresh_count))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ if (hw_if_index == ~0)
+ {
+ error = clib_error_return (0, "no interface passed");
+ goto done;
+ }
+
+ vlib_get_combined_counter (counters + VNET_INTERFACE_COUNTER_RX, hw_if_index,
+ &vrx[spin]);
+ vlib_get_combined_counter (counters + VNET_INTERFACE_COUNTER_TX, hw_if_index,
+ &vtx[spin]);
+ ts[spin] = vlib_time_now (vm);
+
+ while (refresh_count--)
+ {
+ f64 sleep_interval, tsd;
+
+ while (((sleep_interval =
+ ts[spin] + refresh_interval - vlib_time_now (vm)) > 0.0))
+ {
+ uword event_type, *event_data = 0;
+ vlib_process_wait_for_event_or_clock (vm, sleep_interval);
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case ~0: /* no events => timeout */
+ break;
+ default:
+ /* someone pressed a key, abort */
+ vlib_cli_output (vm, "Aborted due to a keypress.");
+ goto done;
+ }
+ vec_free (event_data);
+ }
+ spin ^= 1;
+ vlib_get_combined_counter (counters + VNET_INTERFACE_COUNTER_RX,
+ hw_if_index, &vrx[spin]);
+ vlib_get_combined_counter (counters + VNET_INTERFACE_COUNTER_TX,
+ hw_if_index, &vtx[spin]);
+ ts[spin] = vlib_time_now (vm);
+
+ tsd = ts[spin] - ts[spin ^ 1];
+ vlib_cli_output (
+ vm, "rx: %Upps %Ubps tx: %Upps %Ubps", format_base10,
+ (u64) ((vrx[spin].packets - vrx[spin ^ 1].packets) / tsd),
+ format_base10,
+ (u64) (8 * (vrx[spin].bytes - vrx[spin ^ 1].bytes) / tsd),
+ format_base10,
+ (u64) ((vtx[spin].packets - vtx[spin ^ 1].packets) / tsd),
+ format_base10,
+ (u64) (8 * (vtx[spin].bytes - vtx[spin ^ 1].bytes) / tsd));
+ }
+
+done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (monitor_interface_command, static) = {
+ .path = "monitor interface",
+ .short_help =
+ "monitor interface <interface> [interval <intv>] [count <count>]",
+ .function = monitor_interface_command_fn,
+ .is_mp_safe = 1,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/interface/runtime.c b/src/vnet/interface/runtime.c
index e63f1ecc2fc..a88a23bd4c9 100644
--- a/src/vnet/interface/runtime.c
+++ b/src/vnet/interface/runtime.c
@@ -184,39 +184,73 @@ vnet_hw_if_update_runtime_data (vnet_main_t *vnm, u32 hw_if_index)
}
}
- new_out_runtimes =
- vec_dup_aligned (hi->output_node_thread_runtimes, CLIB_CACHE_LINE_BYTES);
- vec_validate_aligned (new_out_runtimes, n_threads - 1,
- CLIB_CACHE_LINE_BYTES);
-
- if (vec_len (hi->output_node_thread_runtimes) != vec_len (new_out_runtimes))
- something_changed_on_tx = 1;
-
- for (int i = 0; i < vec_len (hi->tx_queue_indices); i++)
+ if (vec_len (hi->tx_queue_indices) > 0)
{
- u32 thread_index;
- u32 queue_index = hi->tx_queue_indices[i];
- vnet_hw_if_tx_queue_t *txq = vnet_hw_if_get_tx_queue (vnm, queue_index);
- uword n_threads = clib_bitmap_count_set_bits (txq->threads);
+ new_out_runtimes = vec_dup_aligned (hi->output_node_thread_runtimes,
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (new_out_runtimes, n_threads - 1,
+ CLIB_CACHE_LINE_BYTES);
- clib_bitmap_foreach (thread_index, txq->threads)
+ for (u32 i = 0; i < vec_len (new_out_runtimes); i++)
{
vnet_hw_if_output_node_runtime_t *rt;
- rt = vec_elt_at_index (new_out_runtimes, thread_index);
- if ((rt->frame.queue_id != txq->queue_id) ||
- (rt->n_threads != n_threads))
+ rt = vec_elt_at_index (new_out_runtimes, i);
+ u32 n_queues = 0, total_queues = vec_len (hi->tx_queue_indices);
+ rt->frame = 0;
+ rt->lookup_table = 0;
+
+ for (u32 j = 0; j < total_queues; j++)
{
+ u32 queue_index = hi->tx_queue_indices[j];
+ vnet_hw_if_tx_frame_t frame = { .shared_queue = 0,
+ .hints = 7,
+ .queue_id = ~0 };
+ vnet_hw_if_tx_queue_t *txq =
+ vnet_hw_if_get_tx_queue (vnm, queue_index);
+ if (!clib_bitmap_get (txq->threads, i))
+ continue;
+
log_debug ("tx queue data changed for interface %v, thread %u "
- "(queue_id %u -> %u, n_threads %u -> %u)",
- hi->name, thread_index, rt->frame.queue_id,
- txq->queue_id, rt->n_threads, n_threads);
+ "(queue_id %u)",
+ hi->name, i, txq->queue_id);
+ something_changed_on_tx = 1;
+
+ frame.queue_id = txq->queue_id;
+ frame.shared_queue = txq->shared_queue;
+ vec_add1 (rt->frame, frame);
+ n_queues++;
+ }
+
+ // don't initialize rt->n_queues above
+ if (rt->n_queues != n_queues)
+ {
something_changed_on_tx = 1;
- rt->frame.queue_id = txq->queue_id;
- rt->frame.shared_queue = txq->shared_queue;
- rt->n_threads = n_threads;
+ rt->n_queues = n_queues;
+ }
+ /*
+ * It is only used in case of multiple txq.
+ */
+ if (rt->n_queues > 0)
+ {
+ if (!is_pow2 (n_queues))
+ n_queues = max_pow2 (n_queues);
+
+ vec_validate_aligned (rt->lookup_table, n_queues - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ for (u32 k = 0; k < vec_len (rt->lookup_table); k++)
+ {
+ rt->lookup_table[k] = rt->frame[k % rt->n_queues].queue_id;
+ log_debug ("tx queue lookup table changed for interface %v, "
+ "(lookup table [%u]=%u)",
+ hi->name, k, rt->lookup_table[k]);
+ }
}
}
}
+ else
+ /* interface deleted */
+ something_changed_on_tx = 1;
if (something_changed_on_rx || something_changed_on_tx)
{
@@ -255,10 +289,9 @@ vnet_hw_if_update_runtime_data (vnet_main_t *vnm, u32 hw_if_index)
{
void *in = rt->rxq_interrupts;
int int_num = -1;
- while ((int_num = clib_interrupt_get_next (in, int_num)) !=
- -1)
+ while ((int_num = clib_interrupt_get_next_and_clear (
+ in, int_num)) != -1)
{
- clib_interrupt_clear (in, int_num);
pending_int = clib_bitmap_set (pending_int, int_num, 1);
last_int = clib_max (last_int, int_num);
}
@@ -303,6 +336,11 @@ vnet_hw_if_update_runtime_data (vnet_main_t *vnm, u32 hw_if_index)
{
vec_free (d[i]);
vec_free (a[i]);
+ if (new_out_runtimes)
+ {
+ vec_free (new_out_runtimes[i].frame);
+ vec_free (new_out_runtimes[i].lookup_table);
+ }
}
vec_free (d);
diff --git a/src/vnet/interface/rx_queue.c b/src/vnet/interface/rx_queue.c
index cec0296519c..b1fc82f38e9 100644
--- a/src/vnet/interface/rx_queue.c
+++ b/src/vnet/interface/rx_queue.c
@@ -124,7 +124,10 @@ vnet_hw_if_unregister_all_rx_queues (vnet_main_t *vnm, u32 hw_if_index)
vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
vnet_interface_main_t *im = &vnm->interface_main;
vnet_hw_if_rx_queue_t *rxq;
+ vlib_main_t *vm;
+ vnet_hw_if_rx_node_runtime_t *rt;
u64 key;
+ u32 queue_index;
log_debug ("unregister_all: interface %v", hi->name);
@@ -132,6 +135,15 @@ vnet_hw_if_unregister_all_rx_queues (vnet_main_t *vnm, u32 hw_if_index)
{
rxq = vnet_hw_if_get_rx_queue (vnm, hi->rx_queue_indices[i]);
key = rx_queue_key (rxq->hw_if_index, rxq->queue_id);
+ if (PREDICT_FALSE (rxq->mode == VNET_HW_IF_RX_MODE_INTERRUPT ||
+ rxq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
+ {
+ vm = vlib_get_main_by_index (rxq->thread_index);
+ queue_index = vnet_hw_if_get_rx_queue_index_by_id (vnm, hw_if_index,
+ rxq->queue_id);
+ rt = vlib_node_get_runtime_data (vm, hi->input_node_index);
+ clib_interrupt_clear (rt->rxq_interrupts, queue_index);
+ }
hash_unset_mem_free (&im->rxq_index_by_hw_if_index_and_queue_id, &key);
pool_put_index (im->hw_if_rx_queues, hi->rx_queue_indices[i]);
@@ -240,14 +252,12 @@ vnet_hw_if_generate_rxq_int_poll_vector (vlib_main_t *vm,
vec_reset_length (rt->rxq_vector_int);
- while ((int_num = clib_interrupt_get_next (rt->rxq_interrupts, int_num)) !=
- -1)
+ while ((int_num = clib_interrupt_get_next_and_clear (rt->rxq_interrupts,
+ int_num)) != -1)
{
vnet_hw_if_rx_queue_t *rxq = vnet_hw_if_get_rx_queue (vnm, int_num);
vnet_hw_if_rxq_poll_vector_t *pv;
- clib_interrupt_clear (rt->rxq_interrupts, int_num);
-
vec_add2 (rt->rxq_vector_int, pv, 1);
pv->dev_instance = rxq->dev_instance;
pv->queue_id = rxq->queue_id;
diff --git a/src/vnet/interface/stats.c b/src/vnet/interface/stats.c
new file mode 100644
index 00000000000..4f3213aafc3
--- /dev/null
+++ b/src/vnet/interface/stats.c
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlib/stats/stats.h>
+#include <vnet/vnet.h>
+#include <vnet/devices/devices.h> /* vnet_get_aggregate_rx_packets */
+#include <vnet/interface.h>
+
+vlib_stats_string_vector_t if_names = 0;
+static u32 **dir_entry_indices = 0;
+
+static struct
+{
+ char *prefix, *name;
+ u32 index;
+} if_counters[] = {
+#define _(e, n, p) { .prefix = #p, .name = #n },
+ foreach_simple_interface_counter_name foreach_combined_interface_counter_name
+#undef _
+};
+
+static clib_error_t *
+statseg_sw_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_add)
+{
+ u8 *name = 0;
+
+ if (if_names == 0)
+ {
+ if_names = vlib_stats_add_string_vector ("/if/names");
+
+ for (int i = 0; i < ARRAY_LEN (if_counters); i++)
+ if_counters[i].index = vlib_stats_find_entry_index (
+ "/%s/%s", if_counters[i].prefix, if_counters[i].name);
+ }
+
+ vec_validate (dir_entry_indices, sw_if_index);
+
+ vlib_stats_segment_lock ();
+
+ if (is_add)
+ {
+ vnet_sw_interface_t *si, *si_sup;
+ vnet_hw_interface_t *hi_sup;
+
+ si = vnet_get_sw_interface (vnm, sw_if_index);
+ si_sup = vnet_get_sup_sw_interface (vnm, si->sw_if_index);
+ ASSERT (si_sup->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+ hi_sup = vnet_get_hw_interface (vnm, si_sup->hw_if_index);
+
+ name = format (0, "%v", hi_sup->name);
+ if (si->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
+ name = format (name, ".%d", si->sub.id);
+
+ vlib_stats_set_string_vector (&if_names, sw_if_index, "%v", name);
+
+ for (u32 index, i = 0; i < ARRAY_LEN (if_counters); i++)
+ {
+ index = vlib_stats_add_symlink (
+ if_counters[i].index, sw_if_index, "/interfaces/%U/%s",
+ format_vlib_stats_symlink, name, if_counters[i].name);
+ ASSERT (index != ~0);
+ vec_add1 (dir_entry_indices[sw_if_index], index);
+ }
+ }
+ else
+ {
+ name = format (0, "%s", "deleted");
+ vlib_stats_set_string_vector (&if_names, sw_if_index, "%v", name);
+ for (u32 i = 0; i < vec_len (dir_entry_indices[sw_if_index]); i++)
+ vlib_stats_remove_entry (dir_entry_indices[sw_if_index][i]);
+ vec_free (dir_entry_indices[sw_if_index]);
+ }
+
+ vec_free (name);
+
+ vlib_stats_segment_unlock ();
+
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (statseg_sw_interface_add_del);
diff --git a/src/vnet/interface/tx_queue.rst b/src/vnet/interface/tx_queue.rst
new file mode 100644
index 00000000000..e8f0e039b8e
--- /dev/null
+++ b/src/vnet/interface/tx_queue.rst
@@ -0,0 +1,159 @@
+.. _TX_Queue_doc:
+
+Transmit Queues
+===============
+
+Overview
+________
+
+VPP implements Transmit queues infra to access and manage them. It provides
+common registration functions to register or unregister interfaces’ transmit
+queues. It also provides functions for queues placement on given thread(s).
+
+The TXQ Infrastructure
+_______________________
+
+Infra registers each queue using a unique key which is formed by concatenating
+the hardware interface index ``hw_if_index`` and unique queue identifier for
+given interface ``queue_id``. As a result of registration of queue, infra
+returns back a unique global ``queue_index`` which can be used by driver to
+access that queue later.
+
+Interface output node uses pre-computed ``output_node_thread_runtime`` data
+which provides essential information related to queue placements on given
+thread of given interface. Transmit queue infra implements an algorithm to
+pre-compute this information. It also pre-computes scalar arguments of frame
+``vnet_hw_if_tx_frame_t``. It also pre-calculates a ``lookup_table`` for
+thread if there are multiple transmit queues are placed on that thread.
+Interface drivers call ``vnet_hw_if_update_runtime_data()`` to execute that
+algorithm after registering the transmit queues to TXQ infra.
+
+The algorithm makes the copy of existing runtime data and iterate through them
+for each vpp main and worker thread. In each iteration, algorithm loop through
+all the tx queues of given interface to fill the information in the frame data
+structure ``vnet_hw_if_tx_frame_t``. Algorithm also updates the information
+related to number of transmit queues of given interface on given vpp thread in
+data structure ``output_node_thread_runtime``. As a consequence of any update
+to the copy, triggers the function to update the actual working copy by taking
+the worker barrier and free the old copy of ``output_node_thread_runtime``.
+
+Multi-TXQ infra
+^^^^^^^^^^^^^^^
+
+Interface output node uses packet flow hash using hash infra in case of multi-txq
+on given thread. Each hardware interface class contains type of the hash required
+for interfaces from that hardware interface class i.e. ethernet interface hardware
+class contains type ``VNET_HASH_FN_TYPE_ETHERNET``. Though, the hash function
+itself is contained by hardware interface data structure of given interface. Default
+hashing function is selected upon interface creation based on priority. User can
+configure a different hash to an interface for multi-txq use case.
+
+Interface output node uses packet flow hash as an index to the pre-calculated lookup
+table to get the queue identifier for given transmit queue. Interface output node
+enqueues the packets to respective frame and also copies the ``vnet_hw_if_tx_frame_t``
+to frame scalar arguments. Drivers use scalar arguments ``vnet_hw_if_tx_frame_t``
+of the given frame to extract the information about the transmit queue to be used to
+transmit the packets. Drivers may need to acquire a lock on given queue before
+transmitting the packets based on the ``shared_queue`` bit status.
+
+Data structures
+^^^^^^^^^^^^^^^
+
+Queue information is stored in data structure ``vnet_hw_if_tx_queue_t``:
+
+.. code:: c
+
+ typedef struct
+ {
+ /* either this queue is shared among multiple threads */
+ u8 shared_queue : 1;
+ /* hw interface index */
+ u32 hw_if_index;
+
+ /* hardware queue identifier */
+ u32 queue_id;
+
+ /* bitmap of threads which use this queue */
+ clib_bitmap_t *threads;
+ } vnet_hw_if_tx_queue_t;
+
+
+Frame information is stored in data structure: ``vnet_hw_if_tx_frame_t``:
+
+.. code:: c
+
+ typedef enum
+ {
+ VNET_HW_IF_TX_FRAME_HINT_NOT_CHAINED = (1 << 0),
+ VNET_HW_IF_TX_FRAME_HINT_NO_GSO = (1 << 1),
+ VNET_HW_IF_TX_FRAME_HINT_NO_CKSUM_OFFLOAD = (1 << 2),
+ } vnet_hw_if_tx_frame_hint_t;
+
+ typedef struct
+ {
+ u8 shared_queue : 1;
+ vnet_hw_if_tx_frame_hint_t hints : 16;
+ u32 queue_id;
+ } vnet_hw_if_tx_frame_t;
+
+Output node runtime information is stored in data structure: ``output_node_thread_runtime``:
+
+.. code:: c
+
+ typedef struct
+ {
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ vnet_hw_if_tx_frame_t *frame;
+ u32 *lookup_table;
+ u32 n_queues;
+ } vnet_hw_if_output_node_runtime_t;
+
+
+MultiTXQ API
+^^^^^^^^^^^^
+
+This API message is used to place tx queue of an interface to vpp main or worker(s) thread(s).
+
+.. code:: c
+
+ autoendian autoreply define sw_interface_set_tx_placement
+ {
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ u32 queue_id;
+ u32 array_size;
+ u32 threads[array_size];
+ option vat_help = "<interface | sw_if_index <index>> queue <n> [threads <list> | mask <hex>]";
+ };
+
+Multi-TXQ CLI
+^^^^^^^^^^^^^
+
+::
+
+ set interface tx-queue set interface tx-queue <interface> queue <n> [threads <list>]
+ set interface tx-hash set interface tx-hash <interface> hash-name <hash-name>
+
+::
+
+ show hardware-interfaces
+
+ Name Idx Link Hardware
+ tap0 1 up tap0
+ Link speed: unknown
+ RX Queues:
+ queue thread mode
+ 0 main (0) polling
+ TX Queues:
+ TX Hash: [name: crc32c-5tuple priority: 50 description: IPv4/IPv6 header and TCP/UDP ports]
+ queue shared thread(s)
+ 0 no 0
+ Ethernet address 02:fe:27:69:5a:b5
+ VIRTIO interface
+ instance 0
+ RX QUEUE : Total Packets
+ 0 : 0
+ TX QUEUE : Total Packets
+ 0 : 0
+
diff --git a/src/vnet/interface/tx_queue_funcs.h b/src/vnet/interface/tx_queue_funcs.h
index 22956a4eb9b..8fcf7c336a8 100644
--- a/src/vnet/interface/tx_queue_funcs.h
+++ b/src/vnet/interface/tx_queue_funcs.h
@@ -27,3 +27,20 @@ vnet_hw_if_get_tx_queue (vnet_main_t *vnm, u32 queue_index)
return 0;
return pool_elt_at_index (im->hw_if_tx_queues, queue_index);
}
+
+static_always_inline int
+vnet_hw_if_txq_cmp_cli_api (vnet_hw_if_tx_queue_t **a,
+ vnet_hw_if_tx_queue_t **b)
+{
+ if (*a == *b)
+ return 0;
+
+ if (a[0]->hw_if_index != b[0]->hw_if_index)
+ return 2 * (a[0]->hw_if_index > b[0]->hw_if_index) - 1;
+
+ if (a[0]->queue_id != b[0]->queue_id)
+ return 2 * (a[0]->queue_id > b[0]->queue_id) - 1;
+
+ ASSERT (0);
+ return ~0;
+}
diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c
index bbb6168df9e..c727e519138 100644
--- a/src/vnet/interface_api.c
+++ b/src/vnet/interface_api.c
@@ -17,11 +17,15 @@
*------------------------------------------------------------------
*/
+#define _GNU_SOURCE
+#include <string.h>
+
#include <vnet/vnet.h>
#include <vlibmemory/api.h>
#include <vnet/interface.h>
#include <vnet/interface/rx_queue_funcs.h>
+#include <vnet/interface/tx_queue_funcs.h>
#include <vnet/api_errno.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/ip/ip.h>
@@ -56,7 +60,9 @@ vpe_api_main_t vpe_api_main;
_ (SW_INTERFACE_ADD_DEL_ADDRESS, sw_interface_add_del_address) \
_ (SW_INTERFACE_SET_RX_MODE, sw_interface_set_rx_mode) \
_ (SW_INTERFACE_RX_PLACEMENT_DUMP, sw_interface_rx_placement_dump) \
+ _ (SW_INTERFACE_TX_PLACEMENT_GET, sw_interface_tx_placement_get) \
_ (SW_INTERFACE_SET_RX_PLACEMENT, sw_interface_set_rx_placement) \
+ _ (SW_INTERFACE_SET_TX_PLACEMENT, sw_interface_set_tx_placement) \
_ (SW_INTERFACE_SET_TABLE, sw_interface_set_table) \
_ (SW_INTERFACE_GET_TABLE, sw_interface_get_table) \
_ (SW_INTERFACE_SET_UNNUMBERED, sw_interface_set_unnumbered) \
@@ -143,6 +149,7 @@ vl_api_hw_interface_set_mtu_t_handler (vl_api_hw_interface_set_mtu_t * mp)
u32 sw_if_index = ntohl (mp->sw_if_index);
u16 mtu = ntohs (mp->mtu);
ethernet_main_t *em = &ethernet_main;
+ clib_error_t *err;
int rv = 0;
VALIDATE_SW_IF_INDEX (mp);
@@ -154,7 +161,6 @@ vl_api_hw_interface_set_mtu_t_handler (vl_api_hw_interface_set_mtu_t * mp)
goto bad_sw_if_index;
}
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, si->hw_if_index);
ethernet_interface_t *eif = ethernet_get_interface (em, si->hw_if_index);
if (!eif)
@@ -163,20 +169,13 @@ vl_api_hw_interface_set_mtu_t_handler (vl_api_hw_interface_set_mtu_t * mp)
goto bad_sw_if_index;
}
- if (mtu < hi->min_supported_packet_bytes)
- {
- rv = VNET_API_ERROR_INVALID_VALUE;
- goto bad_sw_if_index;
- }
-
- if (mtu > hi->max_supported_packet_bytes)
+ if ((err = vnet_hw_interface_set_mtu (vnm, si->hw_if_index, mtu)))
{
- rv = VNET_API_ERROR_INVALID_VALUE;
+ rv = vnet_api_error (err);
+ clib_error_free (err);
goto bad_sw_if_index;
}
- vnet_hw_interface_set_mtu (vnm, si->hw_if_index, mtu);
-
BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_HW_INTERFACE_SET_MTU_REPLY);
}
@@ -262,7 +261,7 @@ send_sw_interface_details (vpe_api_main_t * am,
mp->link_duplex = ntohl (((hi->flags & VNET_HW_INTERFACE_FLAG_DUPLEX_MASK) >>
VNET_HW_INTERFACE_FLAG_DUPLEX_SHIFT));
mp->link_speed = ntohl (hi->link_speed);
- mp->link_mtu = ntohs (hi->max_packet_bytes);
+ mp->link_mtu = ntohs (hi->max_frame_size - hi->frame_overhead);
mp->mtu[VNET_MTU_L3] = ntohl (swif->mtu[VNET_MTU_L3]);
mp->mtu[VNET_MTU_IP4] = ntohl (swif->mtu[VNET_MTU_IP4]);
mp->mtu[VNET_MTU_IP6] = ntohl (swif->mtu[VNET_MTU_IP6]);
@@ -388,8 +387,6 @@ vl_api_sw_interface_dump_t_handler (vl_api_sw_interface_dump_t * mp)
vec_add1 (filter, 0); /* Ensure it's a C string for strcasecmp() */
}
- char *strcasestr (char *, char *); /* lnx hdr file botch */
- /* *INDENT-OFF* */
pool_foreach (swif, im->sw_interfaces)
{
if (!vnet_swif_is_api_visible (swif))
@@ -403,7 +400,6 @@ vl_api_sw_interface_dump_t_handler (vl_api_sw_interface_dump_t * mp)
send_sw_interface_details (am, rp, swif, name, mp->context);
}
- /* *INDENT-ON* */
vec_free (name);
vec_free (filter);
@@ -470,41 +466,17 @@ vl_api_sw_interface_set_table_t_handler (vl_api_sw_interface_set_table_t * mp)
REPLY_MACRO (VL_API_SW_INTERFACE_SET_TABLE_REPLY);
}
-int
-ip_table_bind (fib_protocol_t fproto, u32 sw_if_index, u32 table_id)
+void
+fib_table_bind (fib_protocol_t fproto, u32 sw_if_index, u32 fib_index)
{
- CLIB_UNUSED (ip_interface_address_t * ia);
- u32 fib_index, mfib_index;
+ u32 table_id;
- /*
- * This if table does not exist = error is what we want in the end.
- */
- fib_index = fib_table_find (fproto, table_id);
- mfib_index = mfib_table_find (fproto, table_id);
-
- if (~0 == fib_index || ~0 == mfib_index)
- {
- return (VNET_API_ERROR_NO_SUCH_FIB);
- }
+ table_id = fib_table_get_table_id (fib_index, fproto);
+ ASSERT (table_id != ~0);
if (FIB_PROTOCOL_IP6 == fproto)
{
/*
- * If the interface already has in IP address, then a change int
- * VRF is not allowed. The IP address applied must first be removed.
- * We do not do that automatically here, since VPP has no knowledge
- * of whether those subnets are valid in the destination VRF.
- */
- /* *INDENT-OFF* */
- foreach_ip_interface_address (&ip6_main.lookup_main,
- ia, sw_if_index,
- 1 /* honor unnumbered */ ,
- ({
- return (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE);
- }));
- /* *INDENT-ON* */
-
- /*
* tell those that are interested that the binding is changing.
*/
ip6_table_bind_callback_t *cb;
@@ -518,39 +490,18 @@ ip_table_bind (fib_protocol_t fproto, u32 sw_if_index, u32 table_id)
if (0 != ip6_main.fib_index_by_sw_if_index[sw_if_index])
fib_table_unlock (ip6_main.fib_index_by_sw_if_index[sw_if_index],
FIB_PROTOCOL_IP6, FIB_SOURCE_INTERFACE);
- if (0 != ip6_main.mfib_index_by_sw_if_index[sw_if_index])
- mfib_table_unlock (ip6_main.mfib_index_by_sw_if_index[sw_if_index],
- FIB_PROTOCOL_IP6, MFIB_SOURCE_INTERFACE);
if (0 != table_id)
{
/* we need to lock the table now it's inuse */
fib_table_lock (fib_index, FIB_PROTOCOL_IP6, FIB_SOURCE_INTERFACE);
- mfib_table_lock (mfib_index, FIB_PROTOCOL_IP6,
- MFIB_SOURCE_INTERFACE);
}
ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
- ip6_main.mfib_index_by_sw_if_index[sw_if_index] = mfib_index;
}
else
{
/*
- * If the interface already has in IP address, then a change int
- * VRF is not allowed. The IP address applied must first be removed.
- * We do not do that automatically here, since VPP has no knowledge
- * of whether those subnets are valid in the destination VRF.
- */
- /* *INDENT-OFF* */
- foreach_ip_interface_address (&ip4_main.lookup_main,
- ia, sw_if_index,
- 1 /* honor unnumbered */ ,
- ({
- return (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE);
- }));
- /* *INDENT-ON* */
-
- /*
* tell those that are interested that the binding is changing.
*/
ip4_table_bind_callback_t *cb;
@@ -564,23 +515,93 @@ ip_table_bind (fib_protocol_t fproto, u32 sw_if_index, u32 table_id)
if (0 != ip4_main.fib_index_by_sw_if_index[sw_if_index])
fib_table_unlock (ip4_main.fib_index_by_sw_if_index[sw_if_index],
FIB_PROTOCOL_IP4, FIB_SOURCE_INTERFACE);
- if (0 != ip4_main.mfib_index_by_sw_if_index[sw_if_index])
- mfib_table_unlock (ip4_main.mfib_index_by_sw_if_index[sw_if_index],
- FIB_PROTOCOL_IP4, MFIB_SOURCE_INTERFACE);
if (0 != table_id)
{
/* we need to lock the table now it's inuse */
fib_index = fib_table_find_or_create_and_lock (
FIB_PROTOCOL_IP4, table_id, FIB_SOURCE_INTERFACE);
+ }
+
+ ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
+ }
+}
+
+void
+mfib_table_bind (fib_protocol_t fproto, u32 sw_if_index, u32 mfib_index)
+{
+ u32 table_id;
+
+ table_id = mfib_table_get_table_id (mfib_index, fproto);
+ ASSERT (table_id != ~0);
+
+ if (FIB_PROTOCOL_IP6 == fproto)
+ {
+ if (0 != ip6_main.mfib_index_by_sw_if_index[sw_if_index])
+ mfib_table_unlock (ip6_main.mfib_index_by_sw_if_index[sw_if_index],
+ FIB_PROTOCOL_IP6, MFIB_SOURCE_INTERFACE);
+
+ if (0 != table_id)
+ {
+ /* we need to lock the table now it's inuse */
+ mfib_table_lock (mfib_index, FIB_PROTOCOL_IP6,
+ MFIB_SOURCE_INTERFACE);
+ }
+ ip6_main.mfib_index_by_sw_if_index[sw_if_index] = mfib_index;
+ }
+ else
+ {
+ if (0 != ip4_main.mfib_index_by_sw_if_index[sw_if_index])
+ mfib_table_unlock (ip4_main.mfib_index_by_sw_if_index[sw_if_index],
+ FIB_PROTOCOL_IP4, MFIB_SOURCE_INTERFACE);
+
+ if (0 != table_id)
+ {
+ /* we need to lock the table now it's inuse */
mfib_index = mfib_table_find_or_create_and_lock (
FIB_PROTOCOL_IP4, table_id, MFIB_SOURCE_INTERFACE);
}
- ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
ip4_main.mfib_index_by_sw_if_index[sw_if_index] = mfib_index;
}
+}
+
+int
+ip_table_bind (fib_protocol_t fproto, u32 sw_if_index, u32 table_id)
+{
+ CLIB_UNUSED (ip_interface_address_t * ia);
+ u32 fib_index, mfib_index;
+
+ /*
+ * This if table does not exist = error is what we want in the end.
+ */
+ fib_index = fib_table_find (fproto, table_id);
+ mfib_index = mfib_table_find (fproto, table_id);
+
+ if (~0 == fib_index || ~0 == mfib_index)
+ {
+ return (VNET_API_ERROR_NO_SUCH_FIB);
+ }
+
+ /*
+ * If the interface already has in IP address, then a change int
+ * VRF is not allowed. The IP address applied must first be removed.
+ * We do not do that automatically here, since VPP has no knowledge
+ * of whether those subnets are valid in the destination VRF.
+ */
+ /* clang-format off */
+ foreach_ip_interface_address (FIB_PROTOCOL_IP6 == fproto ?
+ &ip6_main.lookup_main : &ip4_main.lookup_main,
+ ia, sw_if_index,
+ 1 /* honor unnumbered */ ,
+ ({
+ return (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE);
+ }));
+ /* clang-format on */
+
+ fib_table_bind (fproto, sw_if_index, fib_index);
+ mfib_table_bind (fproto, sw_if_index, mfib_index);
return (0);
}
@@ -787,14 +808,12 @@ link_state_process (vlib_main_t * vm,
if (event_by_sw_if_index[i] == 0)
continue;
- /* *INDENT-OFF* */
pool_foreach (reg, vam->interface_events_registrations)
{
vl_reg = vl_api_client_index_to_registration (reg->client_index);
if (vl_reg)
send_sw_interface_event (vam, reg, vl_reg, i, event_by_sw_if_index[i]);
}
- /* *INDENT-ON* */
}
vec_reset_length (event_by_sw_if_index);
}
@@ -810,13 +829,11 @@ static clib_error_t *sw_interface_add_del_function (vnet_main_t * vm,
u32 sw_if_index,
u32 flags);
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (link_state_process_node,static) = {
.function = link_state_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "vpe-link-state-process",
};
-/* *INDENT-ON* */
VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (admin_up_down_function);
VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (link_up_down_function);
@@ -1003,21 +1020,19 @@ vl_api_sw_interface_set_interface_name_t_handler (
{
vl_api_sw_interface_set_interface_name_reply_t *rmp;
vnet_main_t *vnm = vnet_get_main ();
- u32 sw_if_index = ntohl (mp->sw_if_index);
- vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
clib_error_t *error;
int rv = 0;
+ VALIDATE_SW_IF_INDEX (mp);
+
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+
if (mp->name[0] == 0)
{
rv = VNET_API_ERROR_INVALID_VALUE;
goto out;
}
- if (si == 0)
- {
- rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
- goto out;
- }
error = vnet_rename_interface (vnm, si->hw_if_index, (char *) mp->name);
if (error)
@@ -1027,6 +1042,7 @@ vl_api_sw_interface_set_interface_name_t_handler (
}
out:
+ BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_SW_INTERFACE_SET_INTERFACE_NAME_REPLY);
}
@@ -1191,6 +1207,164 @@ out:
}
static void
+send_interface_tx_placement_details (vnet_hw_if_tx_queue_t **all_queues,
+ u32 index, vl_api_registration_t *rp,
+ u32 context)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vl_api_sw_interface_tx_placement_details_t *rmp;
+ u32 n_bits = 0, v = ~0;
+ vnet_hw_if_tx_queue_t **q = vec_elt_at_index (all_queues, index);
+ uword *bitmap = q[0]->threads;
+ u32 hw_if_index = q[0]->hw_if_index;
+ vnet_hw_interface_t *hw_if = vnet_get_hw_interface (vnm, hw_if_index);
+
+ n_bits = clib_bitmap_count_set_bits (bitmap);
+ u32 n = n_bits * sizeof (u32);
+
+ REPLY_MACRO_DETAILS5_END (VL_API_SW_INTERFACE_TX_PLACEMENT_DETAILS, n, rp,
+ context, ({
+ rmp->sw_if_index = hw_if->sw_if_index;
+ rmp->queue_id = q[0]->queue_id;
+ rmp->shared = q[0]->shared_queue;
+ rmp->array_size = n_bits;
+
+ v = clib_bitmap_first_set (bitmap);
+ for (u32 i = 0; i < n_bits; i++)
+ {
+ rmp->threads[i] = v;
+ v = clib_bitmap_next_set (bitmap, v + 1);
+ }
+ }));
+}
+
+static void
+vl_api_sw_interface_tx_placement_get_t_handler (
+ vl_api_sw_interface_tx_placement_get_t *mp)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vl_api_sw_interface_tx_placement_get_reply_t *rmp = 0;
+ vnet_hw_if_tx_queue_t **all_queues = 0;
+ vnet_hw_if_tx_queue_t *q;
+ u32 sw_if_index = mp->sw_if_index;
+ i32 rv = 0;
+
+ if (pool_elts (vnm->interface_main.hw_if_tx_queues) == 0)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto err;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ pool_foreach (q, vnm->interface_main.hw_if_tx_queues)
+ vec_add1 (all_queues, q);
+ vec_sort_with_function (all_queues, vnet_hw_if_txq_cmp_cli_api);
+ }
+ else
+ {
+ u32 qi = ~0;
+ vnet_sw_interface_t *si;
+
+ if (!vnet_sw_if_index_is_api_valid (sw_if_index))
+ {
+ clib_warning ("sw_if_index %u does not exist", sw_if_index);
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ goto err;
+ }
+
+ si = vnet_get_sw_interface (vnm, sw_if_index);
+ if (si->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
+ {
+ clib_warning ("interface type is not HARDWARE! P2P, PIPE and SUB"
+ " interfaces are not supported");
+ rv = VNET_API_ERROR_INVALID_INTERFACE;
+ goto err;
+ }
+
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, si->hw_if_index);
+ for (qi = 0; qi < vec_len (hw->tx_queue_indices); qi++)
+ {
+ q = vnet_hw_if_get_tx_queue (vnm, hw->tx_queue_indices[qi]);
+ vec_add1 (all_queues, q);
+ }
+ }
+
+ REPLY_AND_DETAILS_VEC_MACRO_END (VL_API_SW_INTERFACE_TX_PLACEMENT_GET_REPLY,
+ all_queues, mp, rmp, rv, ({
+ send_interface_tx_placement_details (
+ all_queues, cursor, rp, mp->context);
+ }));
+
+ vec_free (all_queues);
+ return;
+
+err:
+ REPLY_MACRO_END (VL_API_SW_INTERFACE_TX_PLACEMENT_GET_REPLY);
+}
+
+static void
+vl_api_sw_interface_set_tx_placement_t_handler (
+ vl_api_sw_interface_set_tx_placement_t *mp)
+{
+ vl_api_sw_interface_set_tx_placement_reply_t *rmp;
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index = mp->sw_if_index;
+ vnet_sw_interface_t *si;
+ uword *bitmap = 0;
+ u32 queue_id = ~0;
+ u32 size = 0;
+ clib_error_t *error = 0;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX_END (mp);
+
+ si = vnet_get_sw_interface (vnm, sw_if_index);
+ if (si->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
+ {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto bad_sw_if_index;
+ }
+
+ size = mp->array_size;
+ for (u32 i = 0; i < size; i++)
+ {
+ u32 thread_index = mp->threads[i];
+ bitmap = clib_bitmap_set (bitmap, thread_index, 1);
+ }
+
+ queue_id = mp->queue_id;
+ rv = set_hw_interface_tx_queue (si->hw_if_index, queue_id, bitmap);
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_INVALID_VALUE:
+ error = clib_error_return (
+ 0, "please specify valid thread(s) - last thread index %u",
+ clib_bitmap_last_set (bitmap));
+ break;
+ case VNET_API_ERROR_INVALID_QUEUE:
+ error = clib_error_return (
+ 0, "unknown queue %u on interface %s", queue_id,
+ vnet_get_hw_interface (vnet_get_main (), si->hw_if_index)->name);
+ break;
+ default:
+ break;
+ }
+
+ if (error)
+ {
+ clib_error_report (error);
+ goto out;
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+out:
+ REPLY_MACRO_END (VL_API_SW_INTERFACE_SET_TX_PLACEMENT_REPLY);
+ clib_bitmap_free (bitmap);
+}
+
+static void
vl_api_create_vlan_subif_t_handler (vl_api_create_vlan_subif_t * mp)
{
vl_api_create_vlan_subif_reply_t *rmp;
@@ -1296,12 +1470,10 @@ vl_api_create_subif_t_handler (vl_api_create_subif_t * mp)
BAD_SW_IF_INDEX_LABEL;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_CREATE_SUBIF_REPLY,
({
rmp->sw_if_index = ntohl(sub_sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -1343,12 +1515,10 @@ vl_api_create_loopback_t_handler (vl_api_create_loopback_t * mp)
mac_address_decode (mp->mac_address, &mac);
rv = vnet_create_loopback_interface (&sw_if_index, (u8 *) & mac, 0, 0);
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_CREATE_LOOPBACK_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
}));
- /* *INDENT-ON* */
}
static void vl_api_create_loopback_instance_t_handler
@@ -1365,12 +1535,10 @@ static void vl_api_create_loopback_instance_t_handler
rv = vnet_create_loopback_interface (&sw_if_index, (u8 *) & mac,
is_specified, user_instance);
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_CREATE_LOOPBACK_INSTANCE_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -1424,6 +1592,92 @@ static void
REPLY_MACRO (VL_API_SW_INTERFACE_ADDRESS_REPLACE_END_REPLY);
}
+static void
+vl_api_pcap_set_filter_function_t_handler (
+ vl_api_pcap_set_filter_function_t *mp)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_pcap_t *pp = &vnm->pcap;
+ vl_api_pcap_set_filter_function_reply_t *rmp;
+ unformat_input_t input = { 0 };
+ vlib_is_packet_traced_fn_t *f;
+ char *filter_name;
+ int rv = 0;
+ filter_name = vl_api_from_api_to_new_c_string (&mp->filter_function_name);
+ unformat_init_cstring (&input, filter_name);
+ if (unformat (&input, "%U", unformat_vlib_trace_filter_function, &f) == 0)
+ {
+ rv = -1;
+ goto done;
+ }
+
+ pp->current_filter_function = f;
+
+done:
+ unformat_free (&input);
+ vec_free (filter_name);
+ REPLY_MACRO (VL_API_PCAP_SET_FILTER_FUNCTION_REPLY);
+}
+
+static void
+vl_api_pcap_trace_on_t_handler (vl_api_pcap_trace_on_t *mp)
+{
+ vl_api_pcap_trace_on_reply_t *rmp;
+ unformat_input_t filename, drop_err_name;
+ vnet_pcap_dispatch_trace_args_t capture_args;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ unformat_init_cstring (&filename, (char *) mp->filename);
+ if (!unformat_user (&filename, unformat_vlib_tmpfile,
+ &capture_args.filename))
+ {
+ rv = VNET_API_ERROR_ILLEGAL_NAME;
+ goto out;
+ }
+
+ capture_args.rx_enable = mp->capture_rx;
+ capture_args.tx_enable = mp->capture_tx;
+ capture_args.preallocate_data = mp->preallocate_data;
+ capture_args.free_data = mp->free_data;
+ capture_args.drop_enable = mp->capture_drop;
+ capture_args.status = 0;
+ capture_args.packets_to_capture = ntohl (mp->max_packets);
+ capture_args.sw_if_index = ntohl (mp->sw_if_index);
+ capture_args.filter = mp->filter;
+ capture_args.max_bytes_per_pkt = ntohl (mp->max_bytes_per_packet);
+ capture_args.drop_err = ~0;
+
+ unformat_init_cstring (&drop_err_name, (char *) mp->error);
+ unformat_user (&drop_err_name, unformat_vlib_error, vlib_get_main (),
+ &capture_args.drop_err);
+
+ rv = vnet_pcap_dispatch_trace_configure (&capture_args);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+out:
+ unformat_free (&filename);
+ unformat_free (&drop_err_name);
+
+ REPLY_MACRO (VL_API_PCAP_TRACE_ON_REPLY);
+}
+
+static void
+vl_api_pcap_trace_off_t_handler (vl_api_pcap_trace_off_t *mp)
+{
+ vl_api_pcap_trace_off_reply_t *rmp;
+ vnet_pcap_dispatch_trace_args_t capture_args;
+ int rv = 0;
+
+ clib_memset (&capture_args, 0, sizeof (capture_args));
+
+ rv = vnet_pcap_dispatch_trace_configure (&capture_args);
+
+ REPLY_MACRO (VL_API_PCAP_TRACE_OFF_REPLY);
+}
+
/*
* vpe_api_hookup
* Add vpe's API message handlers to the table.
@@ -1440,20 +1694,31 @@ interface_api_hookup (vlib_main_t * vm)
{
api_main_t *am = vlibapi_get_main ();
- /* Mark these APIs as mp safe */
- am->is_mp_safe[VL_API_SW_INTERFACE_DUMP] = 1;
- am->is_mp_safe[VL_API_SW_INTERFACE_DETAILS] = 1;
- am->is_mp_safe[VL_API_SW_INTERFACE_TAG_ADD_DEL] = 1;
- am->is_mp_safe[VL_API_SW_INTERFACE_SET_INTERFACE_NAME] = 1;
-
- /* Do not replay VL_API_SW_INTERFACE_DUMP messages */
- am->api_trace_cfg[VL_API_SW_INTERFACE_DUMP].replay_enable = 0;
-
/*
* Set up the (msg_name, crc, message-id) table
*/
REPLY_MSG_ID_BASE = setup_message_id_table ();
+ /* Mark these APIs as mp safe */
+ vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_SW_INTERFACE_DUMP,
+ 1);
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_SW_INTERFACE_DETAILS, 1);
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_SW_INTERFACE_TAG_ADD_DEL, 1);
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_SW_INTERFACE_SET_INTERFACE_NAME, 1);
+
+ /* Do not replay VL_API_SW_INTERFACE_DUMP messages */
+ vl_api_allow_msg_replay (am, REPLY_MSG_ID_BASE + VL_API_SW_INTERFACE_DUMP,
+ 0);
+
+ /* Mark these APIs as autoendian */
+ vl_api_set_msg_autoendian (
+ am, REPLY_MSG_ID_BASE + VL_API_SW_INTERFACE_SET_TX_PLACEMENT, 1);
+ vl_api_set_msg_autoendian (
+ am, REPLY_MSG_ID_BASE + VL_API_SW_INTERFACE_TX_PLACEMENT_GET, 1);
+
return 0;
}
diff --git a/src/vnet/interface_cli.c b/src/vnet/interface_cli.c
index 4f6f2cf05a5..c56eb9777cf 100644
--- a/src/vnet/interface_cli.c
+++ b/src/vnet/interface_cli.c
@@ -53,6 +53,10 @@
#include <vnet/classify/vnet_classify.h>
#include <vnet/interface/rx_queue_funcs.h>
#include <vnet/interface/tx_queue_funcs.h>
+#include <vnet/hash/hash.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/dev_funcs.h>
+
static int
compare_interface_names (void *a1, void *a2)
{
@@ -68,33 +72,37 @@ show_or_clear_hw_interfaces (vlib_main_t * vm,
vlib_cli_command_t * cmd, int is_show)
{
clib_error_t *error = 0;
+ unformat_input_t _line_input, *line_input = &_line_input;
vnet_main_t *vnm = vnet_get_main ();
vnet_interface_main_t *im = &vnm->interface_main;
vnet_hw_interface_t *hi;
u32 hw_if_index, *hw_if_indices = 0;
int i, verbose = -1, show_bond = 0;
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ if (!unformat_user (input, unformat_line_input, line_input))
+ goto skip_unformat;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
/* See if user wants to show a specific interface. */
- if (unformat
- (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
+ if (unformat (line_input, "%U", unformat_vnet_hw_interface, vnm,
+ &hw_if_index))
vec_add1 (hw_if_indices, hw_if_index);
/* See if user wants to show an interface with a specific hw_if_index. */
- else if (unformat (input, "%u", &hw_if_index))
+ else if (unformat (line_input, "%u", &hw_if_index))
vec_add1 (hw_if_indices, hw_if_index);
- else if (unformat (input, "verbose"))
+ else if (unformat (line_input, "verbose"))
verbose = 1; /* this is also the default */
- else if (unformat (input, "detail"))
+ else if (unformat (line_input, "detail"))
verbose = 2;
- else if (unformat (input, "brief"))
+ else if (unformat (line_input, "brief"))
verbose = 0;
- else if (unformat (input, "bond"))
+ else if (unformat (line_input, "bond"))
{
show_bond = 1;
if (verbose < 0)
@@ -104,11 +112,15 @@ show_or_clear_hw_interfaces (vlib_main_t * vm,
else
{
error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
+ format_unformat_error, line_input);
+ unformat_free (line_input);
goto done;
}
}
+ unformat_free (line_input);
+
+skip_unformat:
/* Gather interfaces. */
if (vec_len (hw_if_indices) == 0)
pool_foreach (hi, im->hw_interfaces)
@@ -137,14 +149,12 @@ show_or_clear_hw_interfaces (vlib_main_t * vm,
vlib_cli_output (vm, "%U\n", format_vnet_hw_interface, vnm,
hi, verbose);
- /* *INDENT-OFF* */
clib_bitmap_foreach (hw_idx, hi->bond_info)
{
shi = vnet_get_hw_interface(vnm, hw_idx);
vlib_cli_output (vm, "%U\n",
format_vnet_hw_interface, vnm, shi, verbose);
}
- /* *INDENT-ON* */
}
}
}
@@ -238,14 +248,12 @@ clear_hw_interfaces (vlib_main_t * vm,
* cpu socket 0
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_hw_interfaces_command, static) = {
.path = "show hardware-interfaces",
.short_help = "show hardware-interfaces [brief|verbose|detail] [bond] "
"[<interface> [<interface> [..]]] [<sw_idx> [<sw_idx> [..]]]",
.function = show_hw_interfaces,
};
-/* *INDENT-ON* */
/*?
@@ -259,14 +267,12 @@ VLIB_CLI_COMMAND (show_hw_interfaces_command, static) = {
* name and software index (where 2 is the software index):
* @cliexcmd{clear hardware-interfaces GigabitEthernet7/0/0 2}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_hw_interface_counters_command, static) = {
.path = "clear hardware-interfaces",
.short_help = "clear hardware-interfaces "
"[<interface> [<interface> [..]]] [<sw_idx> [<sw_idx> [..]]]",
.function = clear_hw_interfaces,
};
-/* *INDENT-ON* */
static int
sw_interface_name_compare (void *a1, void *a2)
@@ -407,15 +413,13 @@ show_sw_interfaces (vlib_main_t * vm,
/* Gather interfaces. */
sorted_sis =
vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces));
- _vec_len (sorted_sis) = 0;
- /* *INDENT-OFF* */
+ vec_set_len (sorted_sis, 0);
pool_foreach (si, im->sw_interfaces)
{
int visible = vnet_swif_is_api_visible (si);
if (visible)
vec_add1 (sorted_sis, si[0]);
}
- /* *INDENT-ON* */
/* Sort by name. */
vec_sort_with_function (sorted_sis, sw_interface_name_compare);
}
@@ -457,7 +461,6 @@ show_sw_interfaces (vlib_main_t * vm,
/* Display any L2 info */
vlib_cli_output (vm, "%U", format_l2_input, si->sw_if_index);
- /* *INDENT-OFF* */
/* Display any IP4 addressing info */
foreach_ip_interface_address (lm4, ia, si->sw_if_index,
1 /* honor unnumbered */,
@@ -472,9 +475,7 @@ show_sw_interfaces (vlib_main_t * vm,
vlib_cli_output (vm, " L3 %U/%d",
format_ip4_address, r4, ia->address_length);
}));
- /* *INDENT-ON* */
- /* *INDENT-OFF* */
/* Display any IP6 addressing info */
foreach_ip_interface_address (lm6, ia, si->sw_if_index,
1 /* honor unnumbered */,
@@ -489,7 +490,6 @@ show_sw_interfaces (vlib_main_t * vm,
vlib_cli_output (vm, " L3 %U/%d",
format_ip6_address, r6, ia->address_length);
}));
- /* *INDENT-ON* */
}
}
else
@@ -505,29 +505,24 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_sw_interfaces_command, static) = {
.path = "show interface",
- .short_help = "show interface [address|addr|features|feat|vtr] [<interface> [<interface> [..]]] [verbose]",
+ .short_help = "show interface [address|addr|features|feat|vtr|tag] "
+ "[<interface> [<interface> [..]]] [verbose]",
.function = show_sw_interfaces,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/* Root of all interface commands. */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vnet_cli_interface_command, static) = {
.path = "interface",
.short_help = "Interface commands",
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vnet_cli_set_interface_command, static) = {
.path = "set interface",
.short_help = "Interface commands",
};
-/* *INDENT-ON* */
static clib_error_t *
clear_interface_counters (vlib_main_t * vm,
@@ -568,13 +563,11 @@ clear_interface_counters (vlib_main_t * vm,
* Example of how to clear the statistics for all interfaces:
* @cliexcmd{clear interfaces}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_interface_counters_command, static) = {
.path = "clear interfaces",
.short_help = "clear interfaces",
.function = clear_interface_counters,
};
-/* *INDENT-ON* */
/**
* Parse subinterface names.
@@ -899,7 +892,6 @@ done:
* @cliexcmd{set interface GigabitEthernet2/0/0.7 up}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_sub_interfaces_command, static) = {
.path = "create sub-interfaces",
.short_help = "create sub-interfaces <interface> "
@@ -908,7 +900,6 @@ VLIB_CLI_COMMAND (create_sub_interfaces_command, static) = {
"{<subId> dot1q|dot1ad <vlanId>|any [inner-dot1q <vlanId>|any] [exact-match]}",
.function = create_sub_interfaces,
};
-/* *INDENT-ON* */
static clib_error_t *
set_state (vlib_main_t * vm,
@@ -957,13 +948,11 @@ done:
'<em>down</em>':
* @cliexcmd{set interface state GigabitEthernet2/0/0 down}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_state_command, static) = {
.path = "set interface state",
.short_help = "set interface state <interface> [up|down|punt|enable]",
.function = set_state,
};
-/* *INDENT-ON* */
static clib_error_t *
set_unnumbered (vlib_main_t * vm,
@@ -1013,13 +1002,11 @@ set_unnumbered (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_unnumbered_command, static) = {
.path = "set interface unnumbered",
.short_help = "set interface unnumbered [<interface> use <interface> | del <interface>]",
.function = set_unnumbered,
};
-/* *INDENT-ON* */
@@ -1056,13 +1043,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_hw_class_command, static) = {
.path = "set interface hw-class",
.short_help = "Set interface hardware class",
.function = set_hw_class,
};
-/* *INDENT-ON* */
static clib_error_t *
vnet_interface_cli_init (vlib_main_t * vm)
@@ -1106,13 +1091,11 @@ renumber_interface_command_fn (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (renumber_interface_command, static) = {
.path = "renumber interface",
.short_help = "renumber interface <interface> <new-dev-instance>",
.function = renumber_interface_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
promiscuous_cmd (vlib_main_t * vm,
@@ -1142,13 +1125,11 @@ promiscuous_cmd (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_promiscuous_cmd, static) = {
.path = "set interface promiscuous",
.short_help = "set interface promiscuous [on|off] <interface>",
.function = promiscuous_cmd,
};
-/* *INDENT-ON* */
static clib_error_t *
mtu_cmd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
@@ -1157,6 +1138,7 @@ mtu_cmd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
u32 hw_if_index, sw_if_index, mtu;
ethernet_main_t *em = &ethernet_main;
u32 mtus[VNET_N_MTU] = { 0, 0, 0, 0 };
+ clib_error_t *err;
if (unformat (input, "%d %U", &mtu,
unformat_vnet_hw_interface, vnm, &hw_if_index))
@@ -1165,22 +1147,14 @@ mtu_cmd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
* Change physical MTU on interface. Only supported for Ethernet
* interfaces
*/
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
ethernet_interface_t *eif = ethernet_get_interface (em, hw_if_index);
if (!eif)
return clib_error_return (0, "not supported");
- if (mtu < hi->min_supported_packet_bytes)
- return clib_error_return (0, "Invalid mtu (%d): "
- "must be >= min pkt bytes (%d)", mtu,
- hi->min_supported_packet_bytes);
-
- if (mtu > hi->max_supported_packet_bytes)
- return clib_error_return (0, "Invalid mtu (%d): must be <= (%d)", mtu,
- hi->max_supported_packet_bytes);
-
- vnet_hw_interface_set_mtu (vnm, hw_if_index, mtu);
+ err = vnet_hw_interface_set_mtu (vnm, hw_if_index, mtu);
+ if (err)
+ return err;
goto done;
}
else if (unformat (input, "packet %d %U", &mtu,
@@ -1206,13 +1180,11 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_mtu_cmd, static) = {
.path = "set interface mtu",
.short_help = "set interface mtu [packet|ip4|ip6|mpls] <value> <interface>",
.function = mtu_cmd,
};
-/* *INDENT-ON* */
static clib_error_t *
show_interface_sec_mac_addr_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -1235,15 +1207,13 @@ show_interface_sec_mac_addr_fn (vlib_main_t * vm, unformat_input_t * input,
{
sorted_sis =
vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces));
- _vec_len (sorted_sis) = 0;
- /* *INDENT-OFF* */
+ vec_set_len (sorted_sis, 0);
pool_foreach (si, im->sw_interfaces)
{
int visible = vnet_swif_is_api_visible (si);
if (visible)
vec_add1 (sorted_sis, si[0]);
}
- /* *INDENT-ON* */
/* Sort by name. */
vec_sort_with_function (sorted_sis, sw_interface_name_compare);
}
@@ -1284,13 +1254,11 @@ show_interface_sec_mac_addr_fn (vlib_main_t * vm, unformat_input_t * input,
* @cliexstart{show interface secondary-mac-address}
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_interface_sec_mac_addr, static) = {
.path = "show interface secondary-mac-address",
.short_help = "show interface secondary-mac-address [<interface>]",
.function = show_interface_sec_mac_addr_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
interface_add_del_mac_address (vlib_main_t * vm, unformat_input_t * input,
@@ -1358,13 +1326,11 @@ done:
* @cliexcmd{set interface secondary-mac-address GigabitEthernet0/8/0 aa:bb:cc:dd:ee:01 del}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (interface_add_del_mac_address_cmd, static) = {
.path = "set interface secondary-mac-address",
.short_help = "set interface secondary-mac-address <interface> <mac-address> [(add|del)]",
.function = interface_add_del_mac_address,
};
-/* *INDENT-ON* */
static clib_error_t *
set_interface_mac_address (vlib_main_t * vm, unformat_input_t * input,
@@ -1408,13 +1374,11 @@ done:
* @cliexcmd{set interface mac address pg0 aa:bb:cc:dd:ee:04}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_mac_address_cmd, static) = {
.path = "set interface mac address",
.short_help = "set interface mac address <interface> <mac-address>",
.function = set_interface_mac_address,
};
-/* *INDENT-ON* */
static clib_error_t *
set_tag (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
@@ -1433,13 +1397,11 @@ set_tag (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_tag_command, static) = {
.path = "set interface tag",
.short_help = "set interface tag <interface> <tag>",
.function = set_tag,
};
-/* *INDENT-ON* */
static clib_error_t *
clear_tag (vlib_main_t * vm, unformat_input_t * input,
@@ -1457,13 +1419,11 @@ clear_tag (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_tag_command, static) = {
.path = "clear interface tag",
.short_help = "clear interface tag <interface>",
.function = clear_tag,
};
-/* *INDENT-ON* */
static clib_error_t *
set_ip_directed_broadcast (vlib_main_t * vm,
@@ -1497,13 +1457,11 @@ set_ip_directed_broadcast (vlib_main_t * vm,
* subnet broadcast address will be sent L2 broadcast on the interface,
* otherwise it is dropped.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ip_directed_broadcast_command, static) = {
.path = "set interface ip directed-broadcast",
.short_help = "set interface enable <interface> <enable|disable>",
.function = set_ip_directed_broadcast,
};
-/* *INDENT-ON* */
clib_error_t *
set_hw_interface_change_rx_mode (vnet_main_t * vnm, u32 hw_if_index,
@@ -1513,6 +1471,33 @@ set_hw_interface_change_rx_mode (vnet_main_t * vnm, u32 hw_if_index,
clib_error_t *error = 0;
vnet_hw_interface_t *hw;
u32 *queue_indices = 0;
+ vnet_dev_port_t *port;
+
+ port = vnet_dev_get_port_from_hw_if_index (hw_if_index);
+
+ if (port)
+ {
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_rv_t rv;
+
+ vnet_dev_port_cfg_change_req_t req = {
+ .type = mode == VNET_HW_IF_RX_MODE_POLLING ?
+ VNET_DEV_PORT_CFG_RXQ_INTR_MODE_DISABLE :
+ VNET_DEV_PORT_CFG_RXQ_INTR_MODE_ENABLE,
+ .queue_id = queue_id_valid ? queue_id : 0,
+ .all_queues = queue_id_valid ? 0 : 1,
+ };
+
+ if ((rv = vnet_dev_port_cfg_change_req_validate (vm, port, &req)))
+ return vnet_dev_port_err (
+ vm, port, rv, "rx queue interupt mode enable/disable not supported");
+
+ if ((rv = vnet_dev_process_port_cfg_change_req (vm, port, &req)))
+ return vnet_dev_port_err (
+ vm, port, rv,
+ "device failed to enable/disable queue interrupt mode");
+ return 0;
+ }
hw = vnet_get_hw_interface (vnm, hw_if_index);
@@ -1533,7 +1518,12 @@ set_hw_interface_change_rx_mode (vnet_main_t * vnm, u32 hw_if_index,
{
int rv = vnet_hw_if_set_rx_queue_mode (vnm, queue_indices[i], mode);
if (rv)
- goto done;
+ {
+ error = clib_error_return (
+ 0, "unable to set rx-mode on interface %v queue-id %u.\n",
+ hw->name, queue_id);
+ goto done;
+ }
}
done:
@@ -1627,13 +1617,11 @@ set_interface_rx_mode (vlib_main_t * vm, unformat_input_t * input,
* VirtualEthernet0/0/13 queue 3 (polling)
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_set_if_rx_mode,static) = {
.path = "set interface rx-mode",
.short_help = "set interface rx-mode <interface> [queue <n>] [polling | interrupt | adaptive]",
.function = set_interface_rx_mode,
};
-/* *INDENT-ON* */
static clib_error_t *
show_interface_rx_placement_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -1699,13 +1687,11 @@ show_interface_rx_placement_fn (vlib_main_t * vm, unformat_input_t * input,
* VirtualEthernet0/0/13 queue 3 (polling)
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_interface_rx_placement, static) = {
.path = "show interface rx-placement",
.short_help = "show interface rx-placement",
.function = show_interface_rx_placement_fn,
};
-/* *INDENT-ON* */
clib_error_t *
set_hw_interface_rx_placement (u32 hw_if_index, u32 queue_id,
u32 thread_index, u8 is_main)
@@ -1830,7 +1816,6 @@ set_interface_rx_placement (vlib_main_t *vm, unformat_input_t *input,
* VirtualEthernet0/0/13 queue 3 (polling)
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_set_if_rx_placement,static) = {
.path = "set interface rx-placement",
.short_help = "set interface rx-placement <interface> [queue <n>] "
@@ -1838,30 +1823,25 @@ VLIB_CLI_COMMAND (cmd_set_if_rx_placement,static) = {
.function = set_interface_rx_placement,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
-clib_error_t *
+int
set_hw_interface_tx_queue (u32 hw_if_index, u32 queue_id, uword *bitmap)
{
vnet_main_t *vnm = vnet_get_main ();
- vnet_device_main_t *vdm = &vnet_device_main;
- vnet_hw_interface_t *hw;
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
vnet_hw_if_tx_queue_t *txq;
u32 queue_index;
u32 thread_index;
- hw = vnet_get_hw_interface (vnm, hw_if_index);
-
/* highest set bit in bitmap should not exceed last worker thread index */
thread_index = clib_bitmap_last_set (bitmap);
- if ((thread_index != ~0) && (thread_index > vdm->last_worker_thread_index))
- return clib_error_return (0, "please specify valid thread(s)");
+ if ((thread_index != ~0) && (thread_index >= vtm->n_vlib_mains))
+ return VNET_API_ERROR_INVALID_VALUE;
queue_index =
vnet_hw_if_get_tx_queue_index_by_id (vnm, hw_if_index, queue_id);
if (queue_index == ~0)
- return clib_error_return (0, "unknown queue %u on interface %s", queue_id,
- hw->name);
+ return VNET_API_ERROR_INVALID_QUEUE;
txq = vnet_hw_if_get_tx_queue (vnm, queue_index);
@@ -1889,6 +1869,7 @@ set_interface_tx_queue (vlib_main_t *vm, unformat_input_t *input,
u32 hw_if_index = (u32) ~0;
u32 queue_id = (u32) 0;
uword *bitmap = 0;
+ int rv = 0;
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
@@ -1920,7 +1901,23 @@ set_interface_tx_queue (vlib_main_t *vm, unformat_input_t *input,
goto error;
}
- error = set_hw_interface_tx_queue (hw_if_index, queue_id, bitmap);
+ rv = set_hw_interface_tx_queue (hw_if_index, queue_id, bitmap);
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_INVALID_VALUE:
+ error = clib_error_return (
+ 0, "please specify valid thread(s) - last thread index %u",
+ clib_bitmap_last_set (bitmap));
+ break;
+ case VNET_API_ERROR_INVALID_QUEUE:
+ error = clib_error_return (
+ 0, "unknown queue %u on interface %s", queue_id,
+ vnet_get_hw_interface (vnet_get_main (), hw_if_index)->name);
+ break;
+ default:
+ break;
+ }
error:
clib_bitmap_free (bitmap);
@@ -2010,13 +2007,11 @@ done:
* @cliexstart{set interface rss queues VirtualFunctionEthernet18/1/0 list 0,2-5,7}
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_set_interface_rss_queues,static) = {
.path = "set interface rss queues",
.short_help = "set interface rss queues <interface> <list <queue-list>>",
.function = set_interface_rss_queues_fn,
};
-/* *INDENT-ON* */
static u8 *
format_vnet_pcap (u8 * s, va_list * args)
@@ -2364,13 +2359,13 @@ pcap_trace_command_fn (vlib_main_t * vm,
* packet capture are preserved, so '<em>any</em>' can be used to reset
* the interface setting.
*
- * - <b>filter</b> - Use the pcap rx / tx / drop trace filter, which
+ * - <b>filter</b> - Use the pcap trace rx / tx / drop filter, which
* must be configured. Use <b>classify filter pcap...</b> to configure the
* filter. The filter will only be executed if the per-interface or
* any-interface tests fail.
*
* - <b>error <node>.<error></b> - filter packets based on a specific error.
- * For example: error {ip4-udp-lookup}.{No listener for dst port}
+ * For example: error {ip4-udp-lookup}.{no_listener}
*
* - <b>file <name></b> - Used to specify the output filename. The file will
* be placed in the '<em>/tmp</em>' directory, so only the filename is
@@ -2406,7 +2401,6 @@ pcap_trace_command_fn (vlib_main_t * vm,
* saved to /tmp/vppTest.pcap...
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (pcap_tx_trace_command, static) = {
.path = "pcap trace",
@@ -2416,7 +2410,72 @@ VLIB_CLI_COMMAND (pcap_tx_trace_command, static) = {
" [preallocate-data][free-data]",
.function = pcap_trace_command_fn,
};
-/* *INDENT-ON* */
+
+static clib_error_t *
+set_pcap_filter_function (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_pcap_t *pp = &vnet_get_main ()->pcap;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vlib_is_packet_traced_fn_t *res = 0;
+ clib_error_t *error = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != (uword) UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U", unformat_vlib_trace_filter_function,
+ &res))
+ ;
+ else
+ {
+ error = clib_error_create (
+ "expected valid trace filter function, got `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+ pp->current_filter_function = res;
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (set_pcap_filter_function_cli, static) = {
+ .path = "set pcap filter function",
+ .short_help = "set pcap filter function <func_name>",
+ .function = set_pcap_filter_function,
+};
+
+static clib_error_t *
+show_pcap_filter_function (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_pcap_t *pp = &vnet_get_main ()->pcap;
+ vlib_trace_filter_main_t *tfm = &vlib_trace_filter_main;
+ vlib_is_packet_traced_fn_t *current_trace_filter_fn =
+ pp->current_filter_function;
+ vlib_trace_filter_function_registration_t *reg =
+ tfm->trace_filter_registration;
+
+ while (reg)
+ {
+ vlib_cli_output (vm, "%sname:%s description: %s priority: %u",
+ reg->function == current_trace_filter_fn ? "(*) " : "",
+ reg->name, reg->description, reg->priority);
+ reg = reg->next;
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_pcap_filter_function_cli, static) = {
+ .path = "show pcap filter function",
+ .short_help = "show pcap filter function",
+ .function = show_pcap_filter_function,
+};
static clib_error_t *
set_interface_name (vlib_main_t *vm, unformat_input_t *input,
@@ -2467,6 +2526,138 @@ VLIB_CLI_COMMAND (cmd_set_if_name, static) = {
.function = set_interface_name,
.is_mp_safe = 1,
};
+
+static clib_error_t *
+set_interface_tx_hash_cmd (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ clib_error_t *error = 0;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_hw_interface_t *hi;
+ u8 *hash_name = 0;
+ u32 hw_if_index = (u32) ~0;
+ vnet_hash_fn_t hf;
+ vnet_hash_fn_type_t ftype;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U", unformat_vnet_hw_interface, vnm,
+ &hw_if_index))
+ ;
+ else if (unformat (line_input, "hash-name %s", &hash_name))
+ ;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ unformat_free (line_input);
+ return error;
+ }
+ }
+
+ unformat_free (line_input);
+
+ if (hw_if_index == (u32) ~0)
+ {
+ error = clib_error_return (0, "please specify valid interface name");
+ goto error;
+ }
+
+ if (hash_name == 0)
+ {
+ error = clib_error_return (0, "hash-name is required");
+ goto error;
+ }
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ ftype =
+ vnet_get_hw_interface_class (vnm, hi->hw_class_index)->tx_hash_fn_type;
+ hf = vnet_hash_function_from_name ((const char *) hash_name, ftype);
+
+ if (!hf)
+ {
+ error = clib_error_return (0, "please specify valid hash name");
+ goto error;
+ }
+
+ hi->hf = hf;
+error:
+ vec_free (hash_name);
+ return (error);
+}
+
+VLIB_CLI_COMMAND (cmd_set_if_tx_hash, static) = {
+ .path = "set interface tx-hash",
+ .short_help = "set interface tx-hash <interface> hash-name <hash-name>",
+ .function = set_interface_tx_hash_cmd,
+};
+
+static clib_error_t *
+show_tx_hash (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ clib_error_t *error = 0;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_hw_interface_t *hi;
+ vnet_hash_function_registration_t *hash;
+ u32 hw_if_index = (u32) ~0;
+ vnet_hash_fn_type_t ftype;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U", unformat_vnet_hw_interface, vnm,
+ &hw_if_index))
+ ;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ unformat_free (line_input);
+ goto error;
+ }
+ }
+
+ unformat_free (line_input);
+
+ if (hw_if_index == (u32) ~0)
+ {
+ error = clib_error_return (0, "please specify valid interface name");
+ goto error;
+ }
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ ftype =
+ vnet_get_hw_interface_class (vnm, hi->hw_class_index)->tx_hash_fn_type;
+
+ if (hi->hf)
+ {
+ hash = vnet_hash_function_from_func (hi->hf, ftype);
+ if (hash)
+ vlib_cli_output (vm, "%U", format_vnet_hash, hash);
+ else
+ vlib_cli_output (vm, "no matching hash function found");
+ }
+ else
+ vlib_cli_output (vm, "no hashing function set");
+
+error:
+ return (error);
+}
+
+VLIB_CLI_COMMAND (cmd_show_tx_hash, static) = {
+ .path = "show interface tx-hash",
+ .short_help = "show interface tx-hash [interface]",
+ .function = show_tx_hash,
+};
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/interface_format.c b/src/vnet/interface_format.c
index 4acd6ab63e6..0eff8c4597c 100644
--- a/src/vnet/interface_format.c
+++ b/src/vnet/interface_format.c
@@ -120,7 +120,7 @@ format_vnet_hw_interface_link_speed (u8 * s, va_list * args)
{
u32 link_speed = va_arg (*args, u32);
- if (link_speed == 0)
+ if (link_speed == 0 || link_speed == UINT32_MAX)
return format (s, "unknown");
if (link_speed >= 1000000)
@@ -143,11 +143,9 @@ format_vnet_hw_interface_rss_queues (u8 * s, va_list * args)
if (bitmap)
{
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, bitmap) {
s = format (s, "%u ", i);
}
- /* *INDENT-ON* */
}
return s;
@@ -212,6 +210,9 @@ format_vnet_hw_interface (u8 * s, va_list * args)
if (vec_len (hi->tx_queue_indices))
{
s = format (s, "\n%UTX Queues:", format_white_space, indent + 2);
+ s = format (
+ s, "\n%UTX Hash: %U", format_white_space, indent + 4, format_vnet_hash,
+ vnet_hash_function_from_func (hi->hf, hw_class->tx_hash_fn_type));
s = format (s, "\n%U%-6s%-7s%-15s", format_white_space, indent + 4,
"queue", "shared", "thread(s)");
for (int i = 0; i < vec_len (hi->tx_queue_indices); i++)
@@ -287,7 +288,7 @@ format_vnet_sw_if_index_name (u8 * s, va_list * args)
if (NULL == si)
{
- return format (s, "DELETED");
+ return format (s, "DELETED (%u)", sw_if_index);
}
return format (s, "%U", format_vnet_sw_interface_name, vnm, si);
}
@@ -302,7 +303,7 @@ format_vnet_hw_if_index_name (u8 * s, va_list * args)
hi = vnet_get_hw_interface (vnm, hw_if_index);
if (hi == 0)
- return format (s, "DELETED");
+ return format (s, "DELETED (%u)", hw_if_index);
return format (s, "%v", hi->name);
}
@@ -366,11 +367,11 @@ format_vnet_sw_interface_cntrs (u8 * s, vnet_interface_main_t * im,
n_printed += 2;
if (n)
- _vec_len (n) = 0;
+ vec_set_len (n, 0);
n = format (n, "%s packets", cm->name);
s = format (s, "%-16v%16Ld", n, vtotal.packets);
- _vec_len (n) = 0;
+ vec_set_len (n, 0);
n = format (n, "%s bytes", cm->name);
s = format (s, "\n%U%-16v%16Ld",
format_white_space, indent, n, vtotal.bytes);
@@ -599,9 +600,9 @@ format_vnet_buffer_opaque (u8 * s, va_list * args)
s = format (s,
"l2_classify.table_index: %d, l2_classify.opaque_index: %d, "
- "l2_classify.hash: 0x%llx",
- o->l2_classify.table_index,
- o->l2_classify.opaque_index, o->l2_classify.hash);
+ "l2_classify.hash: 0x%lx",
+ o->l2_classify.table_index, o->l2_classify.opaque_index,
+ o->l2_classify.hash);
vec_add1 (s, '\n');
s = format (s, "policer.index: %d", o->policer.index);
@@ -694,17 +695,10 @@ format_vnet_buffer_opaque2 (u8 * s, va_list * args)
s = format (s, "loop_counter: %d", o->loop_counter);
vec_add1 (s, '\n');
- s = format (s, "gbp.flags: %x, gbp.sclass: %d",
- (u32) (o->gbp.flags), (u32) (o->gbp.sclass));
- vec_add1 (s, '\n');
-
s = format (s, "gso_size: %d, gso_l4_hdr_sz: %d",
(u32) (o->gso_size), (u32) (o->gso_l4_hdr_sz));
vec_add1 (s, '\n');
- s = format (s, "pg_replay_timestamp: %llu", (u32) (o->pg_replay_timestamp));
- vec_add1 (s, '\n');
-
for (i = 0; i < vec_len (im->buffer_opaque2_format_helpers); i++)
{
helper_fp = im->buffer_opaque2_format_helpers[i];
diff --git a/src/vnet/interface_funcs.h b/src/vnet/interface_funcs.h
index 28312d4c85a..511df4920e4 100644
--- a/src/vnet/interface_funcs.h
+++ b/src/vnet/interface_funcs.h
@@ -231,6 +231,10 @@ u32 vnet_register_interface (vnet_main_t * vnm,
void vnet_set_interface_output_node (vnet_main_t * vnm,
u32 hw_if_index, u32 node_index);
+void vnet_set_interface_l3_output_node (vlib_main_t *vm, u32 sw_if_index,
+ u8 *output_node);
+void vnet_reset_interface_l3_output_node (vlib_main_t *vm, u32 sw_if_index);
+
/* Creates a software interface given template. */
clib_error_t *vnet_create_sw_interface (vnet_main_t * vnm,
vnet_sw_interface_t * template,
@@ -306,7 +310,7 @@ always_inline u32
vnet_hw_interface_get_mtu (vnet_main_t * vnm, u32 hw_if_index)
{
vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
- return hw->max_packet_bytes;
+ return hw->max_frame_size - hw->frame_overhead;
}
always_inline u32
@@ -423,9 +427,16 @@ clib_error_t *set_hw_interface_change_rx_mode (vnet_main_t * vnm,
/* Set rx-placement on the interface */
clib_error_t *set_hw_interface_rx_placement (u32 hw_if_index, u32 queue_id,
u32 thread_index, u8 is_main);
+/* Set tx-queue placement on the interface */
+int set_hw_interface_tx_queue (u32 hw_if_index, u32 queue_id, uword *bitmap);
+/* Set the Max Frame Size on the HW interface */
+clib_error_t *vnet_hw_interface_set_max_frame_size (vnet_main_t *vnm,
+ u32 hw_if_index,
+ u32 max_frame_size);
/* Set the MTU on the HW interface */
-void vnet_hw_interface_set_mtu (vnet_main_t * vnm, u32 hw_if_index, u32 mtu);
+clib_error_t *vnet_hw_interface_set_mtu (vnet_main_t *vnm, u32 hw_if_index,
+ u32 mtu);
/* Set the MTU on the SW interface */
void vnet_sw_interface_set_mtu (vnet_main_t * vnm, u32 sw_if_index, u32 mtu);
@@ -472,12 +483,14 @@ unformat_function_t unformat_vnet_sw_interface_flags;
format_function_t format_vtr;
/* Node runtime for interface output function. */
+struct vnet_dev_tx_queue;
typedef struct
{
u32 hw_if_index;
u32 sw_if_index;
u32 dev_instance;
- u32 is_deleted;
+ u8 is_deleted;
+ struct vnet_dev_tx_queue *tx_queue;
} vnet_interface_output_runtime_t;
/* Interface output function. */
@@ -505,6 +518,7 @@ typedef enum
{
VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DOWN,
VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DELETED,
+ VNET_INTERFACE_OUTPUT_ERROR_NO_TX_QUEUE,
} vnet_interface_output_error_t;
/* Format for interface output traces. */
@@ -534,6 +548,7 @@ pcap_add_buffer (pcap_main_t *pm, struct vlib_main_t *vm, u32 buffer_index,
if (PREDICT_TRUE (pm->n_packets_captured < pm->n_packets_to_capture))
{
+ time_now += vm->clib_time.init_reference_time;
clib_spinlock_lock_if_init (&pm->lock);
d = pcap_add_packet (pm, time_now, n_left, n);
while (1)
@@ -550,6 +565,31 @@ pcap_add_buffer (pcap_main_t *pm, struct vlib_main_t *vm, u32 buffer_index,
clib_spinlock_unlock_if_init (&pm->lock);
}
}
+
+typedef struct
+{
+ vnet_hw_if_caps_t val;
+ vnet_hw_if_caps_t mask;
+} vnet_hw_if_caps_change_t;
+
+void vnet_hw_if_change_caps (vnet_main_t *vnm, u32 hw_if_index,
+ vnet_hw_if_caps_change_t *caps);
+
+static_always_inline void
+vnet_hw_if_set_caps (vnet_main_t *vnm, u32 hw_if_index, vnet_hw_if_caps_t caps)
+{
+ vnet_hw_if_caps_change_t cc = { .val = caps, .mask = caps };
+ vnet_hw_if_change_caps (vnm, hw_if_index, &cc);
+}
+
+static_always_inline void
+vnet_hw_if_unset_caps (vnet_main_t *vnm, u32 hw_if_index,
+ vnet_hw_if_caps_t caps)
+{
+ vnet_hw_if_caps_change_t cc = { .val = 0, .mask = caps };
+ vnet_hw_if_change_caps (vnm, hw_if_index, &cc);
+}
+
#endif /* included_vnet_interface_funcs_h */
/*
diff --git a/src/vnet/interface_output.c b/src/vnet/interface_output.c
index c4a4b0f228d..47844dcd68a 100644
--- a/src/vnet/interface_output.c
+++ b/src/vnet/interface_output.c
@@ -46,9 +46,12 @@
#include <vnet/udp/udp_packet.h>
#include <vnet/feature/feature.h>
#include <vnet/classify/pcap_classify.h>
+#include <vnet/hash/hash.h>
#include <vnet/interface_output.h>
#include <vppinfra/vector/mask_compare.h>
#include <vppinfra/vector/compress.h>
+#include <vppinfra/vector/count_equal.h>
+#include <vppinfra/vector/array_mask.h>
typedef struct
{
@@ -82,9 +85,8 @@ format_vnet_interface_output_trace (u8 * s, va_list * va)
else
{
si = vnet_get_sw_interface (vnm, t->sw_if_index);
- s =
- format (s, "%U ", format_vnet_sw_interface_name, vnm, si,
- t->flags);
+ s = format (s, "%U flags 0x%08x", format_vnet_sw_interface_name, vnm,
+ si, t->flags);
}
s =
format (s, "\n%U%U", format_white_space, indent,
@@ -167,16 +169,19 @@ vnet_interface_output_trace (vlib_main_t * vm,
static_always_inline void
vnet_interface_output_handle_offload (vlib_main_t *vm, vlib_buffer_t *b)
{
+ if (b->flags & VNET_BUFFER_F_GSO)
+ return;
vnet_calc_checksums_inline (vm, b, b->flags & VNET_BUFFER_F_IS_IP4,
b->flags & VNET_BUFFER_F_IS_IP6);
+ vnet_calc_outer_checksums_inline (vm, b);
}
static_always_inline uword
vnet_interface_output_node_inline (vlib_main_t *vm, u32 sw_if_index,
vlib_combined_counter_main_t *ccm,
- vlib_buffer_t **b, u32 config_index, u8 arc,
- u32 n_left, int do_tx_offloads,
- int arc_or_subif)
+ vlib_buffer_t **b, void **p,
+ u32 config_index, u8 arc, u32 n_left,
+ int processing_level)
{
u32 n_bytes = 0;
u32 n_bytes0, n_bytes1, n_bytes2, n_bytes3;
@@ -192,7 +197,7 @@ vnet_interface_output_node_inline (vlib_main_t *vm, u32 sw_if_index,
vlib_prefetch_buffer_header (b[6], LOAD);
vlib_prefetch_buffer_header (b[7], LOAD);
- if (do_tx_offloads)
+ if (processing_level >= 1)
or_flags = b[0]->flags | b[1]->flags | b[2]->flags | b[3]->flags;
/* Be grumpy about zero length buffers for benefit of
@@ -207,7 +212,16 @@ vnet_interface_output_node_inline (vlib_main_t *vm, u32 sw_if_index,
n_bytes += n_bytes2 = vlib_buffer_length_in_chain (vm, b[2]);
n_bytes += n_bytes3 = vlib_buffer_length_in_chain (vm, b[3]);
- if (arc_or_subif)
+ if (processing_level >= 3)
+ {
+ p[0] = vlib_buffer_get_current (b[0]);
+ p[1] = vlib_buffer_get_current (b[1]);
+ p[2] = vlib_buffer_get_current (b[2]);
+ p[3] = vlib_buffer_get_current (b[3]);
+ p += 4;
+ }
+
+ if (processing_level >= 2)
{
u32 tx_swif0, tx_swif1, tx_swif2, tx_swif3;
tx_swif0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
@@ -241,7 +255,7 @@ vnet_interface_output_node_inline (vlib_main_t *vm, u32 sw_if_index,
}
}
- if (do_tx_offloads && (or_flags & VNET_BUFFER_F_OFFLOAD))
+ if (processing_level >= 1 && (or_flags & VNET_BUFFER_F_OFFLOAD))
{
vnet_interface_output_handle_offload (vm, b[0]);
vnet_interface_output_handle_offload (vm, b[1]);
@@ -261,7 +275,13 @@ vnet_interface_output_node_inline (vlib_main_t *vm, u32 sw_if_index,
n_bytes += n_bytes0 = vlib_buffer_length_in_chain (vm, b[0]);
- if (arc_or_subif)
+ if (processing_level >= 3)
+ {
+ p[0] = vlib_buffer_get_current (b[0]);
+ p += 1;
+ }
+
+ if (processing_level >= 2)
{
u32 tx_swif0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
@@ -275,7 +295,7 @@ vnet_interface_output_node_inline (vlib_main_t *vm, u32 sw_if_index,
vlib_increment_combined_counter (ccm, ti, tx_swif0, 1, n_bytes0);
}
- if (do_tx_offloads)
+ if (processing_level >= 1)
vnet_interface_output_handle_offload (vm, b[0]);
n_left -= 1;
@@ -343,39 +363,71 @@ vnet_interface_pcap_tx_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
}
static_always_inline void
-store_tx_frame_scalar_data (vnet_hw_if_output_node_runtime_t *r,
- vnet_hw_if_tx_frame_t *tf)
+hash_func_with_mask (void **p, u32 *hash, u32 n_packets, u32 *lookup_table,
+ u32 mask, vnet_hash_fn_t hf)
{
- if (r)
- clib_memcpy_fast (tf, &r->frame, sizeof (vnet_hw_if_tx_frame_t));
+ u32 n_left_from = n_packets;
+
+ hf (p, hash, n_packets);
+
+ clib_array_mask_u32 (hash, mask, n_packets);
+
+ while (n_left_from >= 4)
+ {
+ hash[0] = lookup_table[hash[0]];
+ hash[1] = lookup_table[hash[1]];
+ hash[2] = lookup_table[hash[2]];
+ hash[3] = lookup_table[hash[3]];
+
+ hash += 4;
+ n_left_from -= 4;
+ }
+
+ while (n_left_from > 0)
+ {
+ hash[0] = lookup_table[hash[0]];
+
+ hash += 1;
+ n_left_from -= 1;
+ }
}
static_always_inline void
-enqueu_to_tx_node (vlib_main_t *vm, vlib_node_runtime_t *node,
- vnet_hw_interface_t *hi, u32 *from, u32 n_vectors)
+store_tx_frame_scalar_data (vnet_hw_if_tx_frame_t *copy_frame,
+ vnet_hw_if_tx_frame_t *tf)
{
- u32 next_index = VNET_INTERFACE_OUTPUT_NEXT_TX;
- vnet_hw_if_output_node_runtime_t *r = 0;
- u32 n_free, n_copy, *to;
- vnet_hw_if_tx_frame_t *tf;
- vlib_frame_t *f;
-
- ASSERT (n_vectors <= VLIB_FRAME_SIZE);
+ if (copy_frame)
+ clib_memcpy_fast (tf, copy_frame, sizeof (vnet_hw_if_tx_frame_t));
+}
- if (hi->output_node_thread_runtimes)
- r = vec_elt_at_index (hi->output_node_thread_runtimes, vm->thread_index);
+static_always_inline u32
+enqueue_one_to_tx_node (vlib_main_t *vm, vlib_node_runtime_t *node, u32 *ppqi,
+ u32 *from, vnet_hw_if_tx_frame_t *copy_frame,
+ u32 n_vectors, u32 n_left, u32 next_index)
+{
+ u32 tmp[VLIB_FRAME_SIZE];
+ vlib_frame_bitmap_t mask = {};
+ vlib_frame_t *f;
+ vnet_hw_if_tx_frame_t *tf;
+ u32 *to;
+ u32 n_copy = 0, n_free = 0;
f = vlib_get_next_frame_internal (vm, node, next_index, 0);
tf = vlib_frame_scalar_args (f);
- if (f->n_vectors > 0 && (r == 0 || tf->queue_id == r->frame.queue_id))
+ if (f->n_vectors > 0 &&
+ (!copy_frame || (tf->queue_id == copy_frame->queue_id)))
{
/* append current next frame */
n_free = VLIB_FRAME_SIZE - f->n_vectors;
- n_copy = clib_min (n_vectors, n_free);
- n_vectors -= n_copy;
- to = vlib_frame_vector_args (f);
- to += f->n_vectors;
+ /*
+ * if frame contains enough space for worst case scenario,
+ * we can avoid use of tmp
+ */
+ if (n_free >= n_left)
+ to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
+ else
+ to = tmp;
}
else
{
@@ -387,25 +439,113 @@ enqueu_to_tx_node (vlib_main_t *vm, vlib_node_runtime_t *node,
}
/* empty frame - store scalar data */
- store_tx_frame_scalar_data (r, tf);
+ store_tx_frame_scalar_data (copy_frame, tf);
to = vlib_frame_vector_args (f);
n_free = VLIB_FRAME_SIZE;
- n_copy = n_vectors;
- n_vectors = 0;
}
- vlib_buffer_copy_indices (to, from, n_copy);
- vlib_put_next_frame (vm, node, next_index, n_free - n_copy);
+ /*
+ * per packet queue id array
+ * compare with given queue_id, if match, copy respective buffer index from
+ * -> to
+ */
+ if (ppqi)
+ {
+ clib_mask_compare_u32 (copy_frame->queue_id, ppqi, mask, n_vectors);
+ n_copy = clib_compress_u32 (to, from, mask, n_vectors);
- if (n_vectors == 0)
- return;
+ if (n_copy == 0)
+ return n_left;
+ }
+ else
+ {
+ /*
+ * no work required, just copy all buffer indices from -> to
+ */
+ n_copy = n_left;
+ vlib_buffer_copy_indices (to, from, n_copy);
+ }
- /* we have more indices to store, take empty frame */
- from += n_copy;
- f = vlib_get_next_frame_internal (vm, node, next_index, 1);
- store_tx_frame_scalar_data (r, vlib_frame_scalar_args (f));
- vlib_buffer_copy_indices (vlib_frame_vector_args (f), from, n_vectors);
- vlib_put_next_frame (vm, node, next_index, VLIB_FRAME_SIZE - n_vectors);
+ if (to != tmp)
+ {
+ /* indices already written to frame, just close it */
+ vlib_put_next_frame (vm, node, next_index, n_free - n_copy);
+ }
+ else if (n_free >= n_copy)
+ {
+ /* enough space in the existing frame */
+ to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
+ vlib_buffer_copy_indices (to, tmp, n_copy);
+ vlib_put_next_frame (vm, node, next_index, n_free - n_copy);
+ }
+ else
+ {
+ /* full frame */
+ to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
+ vlib_buffer_copy_indices (to, tmp, n_free);
+ vlib_put_next_frame (vm, node, next_index, 0);
+
+ /* second frame */
+ u32 n_2nd_frame = n_copy - n_free;
+ f = vlib_get_next_frame_internal (vm, node, next_index, 1);
+ tf = vlib_frame_scalar_args (f);
+ /* empty frame - store scalar data */
+ store_tx_frame_scalar_data (copy_frame, tf);
+ to = vlib_frame_vector_args (f);
+ vlib_buffer_copy_indices (to, tmp + n_free, n_2nd_frame);
+ vlib_put_next_frame (vm, node, next_index,
+ VLIB_FRAME_SIZE - n_2nd_frame);
+ }
+
+ return n_left - n_copy;
+}
+
+static_always_inline void
+enqueue_to_tx_node (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vnet_hw_interface_t *hi, u32 next_index,
+ vnet_hw_if_output_node_runtime_t *r, u32 *from, void **p,
+ u32 n_vectors)
+{
+ u32 n_left = n_vectors;
+
+ ASSERT (n_vectors <= VLIB_FRAME_SIZE);
+
+ /*
+ * backward compatible for drivers not integrated with new tx infra.
+ */
+ if (r == 0)
+ {
+ n_left = enqueue_one_to_tx_node (vm, node, NULL, from, NULL, n_vectors,
+ n_left, next_index);
+ }
+ /*
+ * only 1 tx queue of given interface is available on given thread
+ */
+ else if (r->n_queues == 1)
+ {
+ n_left = enqueue_one_to_tx_node (vm, node, NULL, from, r->frame,
+ n_vectors, n_left, next_index);
+ }
+ /*
+ * multi tx-queues use case
+ */
+ else if (r->n_queues > 1)
+ {
+ u32 qids[VLIB_FRAME_SIZE];
+
+ hash_func_with_mask (p, qids, n_vectors, r->lookup_table,
+ vec_len (r->lookup_table) - 1, hi->hf);
+
+ for (u32 i = 0; i < r->n_queues; i++)
+ {
+ n_left = enqueue_one_to_tx_node (vm, node, qids, from, &r->frame[i],
+ n_vectors, n_left, next_index);
+ if (n_left == 0)
+ break;
+ }
+ }
+ else
+ ASSERT (0);
}
VLIB_NODE_FN (vnet_interface_output_node)
@@ -417,6 +557,7 @@ VLIB_NODE_FN (vnet_interface_output_node)
vnet_hw_interface_t *hi;
vnet_sw_interface_t *si;
vnet_interface_output_runtime_t *rt = (void *) node->runtime_data;
+ vnet_hw_if_output_node_runtime_t *r = 0;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
u32 n_bytes, n_buffers = frame->n_vectors;
u32 config_index = ~0;
@@ -426,6 +567,8 @@ VLIB_NODE_FN (vnet_interface_output_node)
u8 arc = im->output_feature_arc_index;
int arc_or_subif = 0;
int do_tx_offloads = 0;
+ void *ptr[VLIB_FRAME_SIZE], **p = ptr;
+ u8 is_parr = 0;
u32 *from;
if (node->flags & VLIB_NODE_FLAG_TRACE)
@@ -461,6 +604,27 @@ VLIB_NODE_FN (vnet_interface_output_node)
node->node_index, VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DOWN);
}
+ if (hi->output_node_thread_runtimes)
+ r = vec_elt_at_index (hi->output_node_thread_runtimes, vm->thread_index);
+
+ if (r)
+ {
+ /*
+ * tx queue of given interface is not available on given thread
+ */
+ if (r->n_queues == 0)
+ return vlib_error_drop_buffers (
+ vm, node, from,
+ /* buffer stride */ 1, n_buffers, VNET_INTERFACE_OUTPUT_NEXT_DROP,
+ node->node_index, VNET_INTERFACE_OUTPUT_ERROR_NO_TX_QUEUE);
+ /*
+ * multiple tx queues available on given thread
+ */
+ else if (r->n_queues > 1)
+ /* construct array of pointer */
+ is_parr = 1;
+ }
+
/* interface-output feature arc handling */
if (PREDICT_FALSE (vnet_have_features (arc, sw_if_index)))
{
@@ -477,27 +641,31 @@ VLIB_NODE_FN (vnet_interface_output_node)
/* if not all three flags IP4_,TCP_,UDP_CKSUM set, do compute them
* here before sending to the interface */
- if ((hi->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TX_CKSUM) !=
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_CKSUM)
+ if ((hi->caps & VNET_HW_IF_CAP_TX_CKSUM) != VNET_HW_IF_CAP_TX_CKSUM)
do_tx_offloads = 1;
- if (do_tx_offloads == 0 && arc_or_subif == 0)
+ // basic processing
+ if (do_tx_offloads == 0 && arc_or_subif == 0 && is_parr == 0)
n_bytes = vnet_interface_output_node_inline (
- vm, sw_if_index, ccm, bufs, config_index, arc, n_buffers, 0, 0);
- else if (do_tx_offloads == 0 && arc_or_subif == 1)
+ vm, sw_if_index, ccm, bufs, NULL, config_index, arc, n_buffers, 0);
+ // basic processing + tx offloads
+ else if (do_tx_offloads == 1 && arc_or_subif == 0 && is_parr == 0)
n_bytes = vnet_interface_output_node_inline (
- vm, sw_if_index, ccm, bufs, config_index, arc, n_buffers, 0, 1);
- else if (do_tx_offloads == 1 && arc_or_subif == 0)
+ vm, sw_if_index, ccm, bufs, NULL, config_index, arc, n_buffers, 1);
+ // basic processing + tx offloads + vlans + arcs
+ else if (do_tx_offloads == 1 && arc_or_subif == 1 && is_parr == 0)
n_bytes = vnet_interface_output_node_inline (
- vm, sw_if_index, ccm, bufs, config_index, arc, n_buffers, 1, 0);
+ vm, sw_if_index, ccm, bufs, NULL, config_index, arc, n_buffers, 2);
+ // basic processing + tx offloads + vlans + arcs + multi-txqs
else
n_bytes = vnet_interface_output_node_inline (
- vm, sw_if_index, ccm, bufs, config_index, arc, n_buffers, 1, 1);
+ vm, sw_if_index, ccm, bufs, p, config_index, arc, n_buffers, 3);
from = vlib_frame_vector_args (frame);
if (PREDICT_TRUE (next_index == VNET_INTERFACE_OUTPUT_NEXT_TX))
{
- enqueu_to_tx_node (vm, node, hi, from, frame->n_vectors);
+ enqueue_to_tx_node (vm, node, hi, next_index, r, from, ptr,
+ frame->n_vectors);
}
else
{
@@ -1053,7 +1221,6 @@ VLIB_NODE_FN (interface_punt) (vlib_main_t * vm,
return interface_drop_punt (vm, node, frame, VNET_ERROR_DISPOSITION_PUNT);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (interface_drop) = {
.name = "error-drop",
.vector_size = sizeof (u32),
@@ -1064,9 +1231,7 @@ VLIB_REGISTER_NODE (interface_drop) = {
[0] = "drop",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (interface_punt) = {
.name = "error-punt",
.vector_size = sizeof (u32),
@@ -1077,7 +1242,6 @@ VLIB_REGISTER_NODE (interface_punt) = {
[0] = "punt",
},
};
-/* *INDENT-ON* */
VLIB_REGISTER_NODE (vnet_per_buffer_interface_output_node) = {
.name = "interface-output",
@@ -1089,16 +1253,13 @@ VLIB_NODE_FN (vnet_interface_output_arc_end_node)
{
vnet_main_t *vnm = vnet_get_main ();
vnet_interface_main_t *im = &vnm->interface_main;
- vnet_hw_if_output_node_runtime_t *r = 0;
vnet_hw_interface_t *hi;
- vnet_hw_if_tx_frame_t *tf;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
u32 sw_if_indices[VLIB_FRAME_SIZE], *sw_if_index = sw_if_indices;
- u64 used_elts[VLIB_FRAME_SIZE / 64] = {};
- u64 mask[VLIB_FRAME_SIZE / 64] = {};
- u32 *tmp, *from, n_left, n_free, n_comp, *to, swif, off;
+ vlib_frame_bitmap_t used_elts = {}, mask = {};
+ u32 *tmp, *from, n_left, n_comp, n_p_comp, swif, off;
u16 next_index;
- vlib_frame_t *f;
+ void *ptr[VLIB_FRAME_SIZE], **p = ptr;
from = vlib_frame_vector_args (frame);
n_left = frame->n_vectors;
@@ -1110,11 +1271,17 @@ VLIB_NODE_FN (vnet_interface_output_arc_end_node)
vlib_prefetch_buffer_header (b[5], LOAD);
vlib_prefetch_buffer_header (b[6], LOAD);
vlib_prefetch_buffer_header (b[7], LOAD);
+
+ p[0] = vlib_buffer_get_current (b[0]);
+ p[1] = vlib_buffer_get_current (b[1]);
+ p[2] = vlib_buffer_get_current (b[2]);
+ p[3] = vlib_buffer_get_current (b[3]);
sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
sw_if_index[1] = vnet_buffer (b[1])->sw_if_index[VLIB_TX];
sw_if_index[2] = vnet_buffer (b[2])->sw_if_index[VLIB_TX];
sw_if_index[3] = vnet_buffer (b[3])->sw_if_index[VLIB_TX];
+ p += 4;
b += 4;
sw_if_index += 4;
n_left -= 4;
@@ -1122,7 +1289,9 @@ VLIB_NODE_FN (vnet_interface_output_arc_end_node)
while (n_left)
{
+ p[0] = vlib_buffer_get_current (b[0]);
sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
+ p++;
b++;
sw_if_index++;
n_left--;
@@ -1139,74 +1308,45 @@ VLIB_NODE_FN (vnet_interface_output_arc_end_node)
more:
next_index = vec_elt (im->if_out_arc_end_next_index_by_sw_if_index, swif);
hi = vnet_get_sup_hw_interface (vnm, swif);
+ vnet_hw_if_output_node_runtime_t *r = 0;
+ void *ptr_tmp[VLIB_FRAME_SIZE], **p_tmp = ptr_tmp;
+
if (hi->output_node_thread_runtimes)
r = vec_elt_at_index (hi->output_node_thread_runtimes, vm->thread_index);
- f = vlib_get_next_frame_internal (vm, node, next_index, 0);
- tf = vlib_frame_scalar_args (f);
-
- if (f->n_vectors > 0 && (r == 0 || r->frame.queue_id == tf->queue_id))
- {
- /* append frame */
- n_free = VLIB_FRAME_SIZE - f->n_vectors;
- if (n_free >= f->n_vectors)
- to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
- else
- to = tmp;
- }
- else
- {
- if (f->n_vectors > 0)
- {
- /* current frame doesn't fit - grab empty one */
- f = vlib_get_next_frame_internal (vm, node, next_index, 1);
- tf = vlib_frame_scalar_args (f);
- }
-
- /* empty frame - store scalar data */
- store_tx_frame_scalar_data (r, tf);
- n_free = VLIB_FRAME_SIZE;
- to = vlib_frame_vector_args (f);
- }
/* compare and compress based on comparison mask */
clib_mask_compare_u32 (swif, sw_if_indices, mask, frame->n_vectors);
- n_comp = clib_compress_u32 (to, from, mask, frame->n_vectors);
+ n_comp = clib_compress_u32 (tmp, from, mask, frame->n_vectors);
- if (tmp != to)
- {
- /* indices already written to frame, just close it */
- vlib_put_next_frame (vm, node, next_index, n_free - n_comp);
- }
- else if (n_free >= n_comp)
+ /*
+ * tx queue of given interface is not available on given thread
+ */
+ if (r)
{
- /* enough space in the existing frame */
- to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
- vlib_buffer_copy_indices (to, tmp, n_comp);
- vlib_put_next_frame (vm, node, next_index, n_free - n_comp);
+ if (r->n_queues == 0)
+ {
+ vlib_error_drop_buffers (
+ vm, node, tmp,
+ /* buffer stride */ 1, n_comp, VNET_INTERFACE_OUTPUT_NEXT_DROP,
+ node->node_index, VNET_INTERFACE_OUTPUT_ERROR_NO_TX_QUEUE);
+ goto drop;
+ }
+ else if (r->n_queues > 1)
+ {
+ n_p_comp = clib_compress_u64 ((u64 *) p_tmp, (u64 *) ptr, mask,
+ frame->n_vectors);
+ ASSERT (n_p_comp == n_comp);
+ }
}
- else
- {
- /* full frame */
- to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
- vlib_buffer_copy_indices (to, tmp, n_free);
- vlib_put_next_frame (vm, node, next_index, 0);
- /* second frame */
- u32 n_frame2 = n_comp - n_free;
- f = vlib_get_next_frame_internal (vm, node, next_index, 1);
- to = vlib_frame_vector_args (f);
- vlib_buffer_copy_indices (to, tmp + n_free, n_frame2);
- tf = vlib_frame_scalar_args (f);
- store_tx_frame_scalar_data (r, tf);
- vlib_put_next_frame (vm, node, next_index, VLIB_FRAME_SIZE - n_frame2);
- }
+ enqueue_to_tx_node (vm, node, hi, next_index, r, tmp, ptr_tmp, n_comp);
+drop:
n_left -= n_comp;
if (n_left)
{
/* store comparison mask so we can find next unused element */
- for (int i = 0; i < ARRAY_LEN (used_elts); i++)
- used_elts[i] |= mask[i];
+ vlib_frame_bitmap_or (used_elts, mask);
/* fine first unused sw_if_index by scanning trough used_elts bitmap */
while (PREDICT_FALSE (used_elts[off] == ~0))
diff --git a/src/vnet/interface_output.h b/src/vnet/interface_output.h
index 15b0a1d3ccc..b512d9a04a8 100644
--- a/src/vnet/interface_output.h
+++ b/src/vnet/interface_output.h
@@ -41,6 +41,7 @@
#define __INTERFACE_INLINES_H__
#include <vnet/vnet.h>
+#include <vnet/tcp/tcp_packet.h>
static_always_inline void
vnet_calc_ip4_checksums (vlib_main_t *vm, vlib_buffer_t *b, ip4_header_t *ip4,
@@ -114,6 +115,36 @@ vnet_calc_checksums_inline (vlib_main_t * vm, vlib_buffer_t * b,
VNET_BUFFER_OFFLOAD_F_TCP_CKSUM));
}
+static_always_inline void
+vnet_calc_outer_checksums_inline (vlib_main_t *vm, vlib_buffer_t *b)
+{
+
+ if (!(b->flags & VNET_BUFFER_F_OFFLOAD))
+ return;
+
+ vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags;
+ if (oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM)
+ {
+ ip4_header_t *ip4;
+ ip4 = (ip4_header_t *) (b->data + vnet_buffer2 (b)->outer_l3_hdr_offset);
+ ip4->checksum = ip4_header_checksum (ip4);
+ vnet_buffer_offload_flags_clear (b,
+ VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM);
+ }
+ else if (oflags & VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM)
+ {
+ int bogus;
+ ip6_header_t *ip6;
+ udp_header_t *uh;
+
+ ip6 = (ip6_header_t *) (b->data + vnet_buffer2 (b)->outer_l3_hdr_offset);
+ uh = (udp_header_t *) (b->data + vnet_buffer2 (b)->outer_l4_hdr_offset);
+ uh->checksum = 0;
+ uh->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6, &bogus);
+ vnet_buffer_offload_flags_clear (b,
+ VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM);
+ }
+}
#endif
/*
diff --git a/src/vnet/interface_stats.c b/src/vnet/interface_stats.c
index 3afde0ea54f..ff1a2af9130 100644
--- a/src/vnet/interface_stats.c
+++ b/src/vnet/interface_stats.c
@@ -170,7 +170,6 @@ VLIB_NODE_FN (stats_collect_tx_node) (vlib_main_t * vm,
return stats_collect_inline (vm, node, frame, VLIB_TX);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (stats_collect_rx_node) = {
.vector_size = sizeof (u32),
.format_trace = format_stats_collect_trace,
@@ -201,7 +200,6 @@ VNET_FEATURE_INIT (stats_collect_tx_node, static) = {
.runs_before = VNET_FEATURES ("interface-output-arc-end"),
};
-/* *INDENT-ON* */
static clib_error_t *
stats_collect_init (vlib_main_t * vm)
diff --git a/src/vnet/interface_test.c b/src/vnet/interface_test.c
index 4a1681f4eac..2d0c0ee81d1 100644
--- a/src/vnet/interface_test.c
+++ b/src/vnet/interface_test.c
@@ -570,6 +570,63 @@ api_sw_interface_set_rx_placement (vat_main_t *vam)
}
static int
+api_sw_interface_set_tx_placement (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_set_tx_placement_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ int ret;
+ uword *bitmap = 0;
+ u32 queue_id, n_bits = 0;
+ u32 v;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "queue %d", &queue_id))
+ ;
+ else if (unformat (i, "threads %U", unformat_bitmap_list, &bitmap))
+ ;
+ else if (unformat (i, "mask %U", unformat_bitmap_mask, &bitmap))
+ ;
+ else if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ n_bits = clib_bitmap_count_set_bits (bitmap);
+ /* Construct the API message */
+ M2 (SW_INTERFACE_SET_TX_PLACEMENT, mp, sizeof (u32) * n_bits);
+ mp->sw_if_index = htonl (sw_if_index);
+ mp->queue_id = htonl (queue_id);
+ mp->array_size = htonl (n_bits);
+
+ v = clib_bitmap_first_set (bitmap);
+ for (u32 j = 0; j < n_bits; j++)
+ {
+ mp->threads[j] = htonl (v);
+ v = clib_bitmap_next_set (bitmap, v + 1);
+ }
+
+ /* send it... */
+ S (mp);
+ /* Wait for a reply, return the good/bad news... */
+ W (ret);
+ clib_bitmap_free (bitmap);
+ return ret;
+}
+
+static int
api_interface_name_renumber (vat_main_t *vam)
{
unformat_input_t *line_input = vam->input;
@@ -844,6 +901,25 @@ vl_api_sw_interface_rx_placement_details_t_handler (
((mp->mode == 2) ? "interrupt" : "adaptive"));
}
+static __clib_unused void
+vl_api_sw_interface_tx_placement_details_t_handler (
+ vl_api_sw_interface_tx_placement_details_t *mp)
+{
+ vat_main_t *vam = interface_test_main.vat_main;
+ u32 size = ntohl (mp->array_size);
+ uword *bitmap = 0;
+
+ for (u32 i = 0; i < size; i++)
+ {
+ u32 thread_index = ntohl (mp->threads[i]);
+ bitmap = clib_bitmap_set (bitmap, thread_index, 1);
+ }
+
+ print (vam->ofp, "\n%-11d %-6d %-7s %U", ntohl (mp->sw_if_index),
+ ntohl (mp->queue_id), (mp->shared == 1) ? "yes" : "no",
+ format_bitmap_list, bitmap);
+}
+
static void
vl_api_create_vlan_subif_reply_t_handler (vl_api_create_vlan_subif_reply_t *mp)
{
@@ -961,6 +1037,52 @@ api_sw_interface_rx_placement_dump (vat_main_t *vam)
}
static int
+api_sw_interface_tx_placement_get (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_tx_placement_get_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set++;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set++;
+ else
+ break;
+ }
+
+ fformat (vam->ofp, "\n%-11s %-6s %-7s %-11s", "sw_if_index", "queue",
+ "shared", "threads");
+
+ /* Dump Interface tx placement */
+ M (SW_INTERFACE_TX_PLACEMENT_GET, mp);
+
+ if (sw_if_index_set)
+ mp->sw_if_index = htonl (sw_if_index);
+ else
+ mp->sw_if_index = ~0;
+
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ PING (&interface_test_main, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_sw_interface_tx_placement_get_reply_t_handler ()
+{
+}
+
+static int
api_sw_interface_clear_stats (vat_main_t *vam)
{
unformat_input_t *i = vam->input;
@@ -1161,6 +1283,30 @@ api_sw_interface_set_interface_name (vat_main_t *vam)
return -1;
}
+static int
+api_pcap_set_filter_function (vat_main_t *vam)
+{
+ vl_api_pcap_set_filter_function_t *mp;
+ int ret;
+
+ M (PCAP_SET_FILTER_FUNCTION, mp);
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_pcap_trace_on (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_pcap_trace_off (vat_main_t *vam)
+{
+ return -1;
+}
+
#include <vnet/interface.api_test.c>
/*
diff --git a/src/vnet/ip-neighbor/ip4_neighbor.c b/src/vnet/ip-neighbor/ip4_neighbor.c
index cf0e81a0b43..61b9e768fe5 100644
--- a/src/vnet/ip-neighbor/ip4_neighbor.c
+++ b/src/vnet/ip-neighbor/ip4_neighbor.c
@@ -38,6 +38,7 @@
*/
#include <vnet/ip-neighbor/ip4_neighbor.h>
+#include <vnet/ip-neighbor/ip_neighbor.api_enum.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/util/throttle.h>
#include <vnet/fib/fib_sas.h>
@@ -55,7 +56,8 @@ VLIB_REGISTER_LOG_CLASS (ip4_neighbor_log, static) = {
vlib_log_debug (ip4_neighbor_log.class, fmt, __VA_ARGS__)
void
-ip4_neighbor_probe_dst (u32 sw_if_index, const ip4_address_t * dst)
+ip4_neighbor_probe_dst (u32 sw_if_index, u32 thread_index,
+ const ip4_address_t *dst)
{
ip4_address_t src;
adj_index_t ai;
@@ -71,9 +73,8 @@ ip4_neighbor_probe_dst (u32 sw_if_index, const ip4_address_t * dst)
}
void
-ip4_neighbor_advertise (vlib_main_t * vm,
- vnet_main_t * vnm,
- u32 sw_if_index, const ip4_address_t * addr)
+ip4_neighbor_advertise (vlib_main_t *vm, vnet_main_t *vnm, u32 sw_if_index,
+ u32 thread_index, const ip4_address_t *addr)
{
vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
ip4_main_t *i4m = &ip4_main;
@@ -126,6 +127,10 @@ ip4_neighbor_advertise (vlib_main_t * vm,
to_next[0] = bi;
f->n_vectors = 1;
vlib_put_frame_to_node (vm, hi->output_node_index, f);
+
+ vlib_increment_simple_counter (
+ &ip_neighbor_counters[AF_IP4].ipnc[VLIB_TX][IP_NEIGHBOR_CTR_GRAT],
+ thread_index, sw_if_index, 1);
}
}
@@ -182,18 +187,23 @@ ip4_arp_inline (vlib_main_t * vm,
/* resolve the packet's destination */
ip4_header_t *ip0 = vlib_buffer_get_current (p0);
resolve0 = ip0->dst_address;
- src0 = adj0->sub_type.glean.rx_pfx.fp_addr.ip4;
}
else
+ /* resolve the incomplete adj */
+ resolve0 = adj0->sub_type.nbr.next_hop.ip4;
+
+ if (is_glean && adj0->sub_type.glean.rx_pfx.fp_len)
+ /* the glean is for a connected, local prefix */
+ src0 = adj0->sub_type.glean.rx_pfx.fp_addr.ip4;
+ else
{
- /* resolve the incomplete adj */
- resolve0 = adj0->sub_type.nbr.next_hop.ip4;
/* Src IP address in ARP header. */
if (!fib_sas4_get (sw_if_index0, &resolve0, &src0) &&
!ip4_sas_by_sw_if_index (sw_if_index0, &resolve0, &src0))
{
/* No source address available */
- p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
+ p0->error =
+ node->errors[IP4_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS];
continue;
}
}
@@ -204,7 +214,7 @@ ip4_arp_inline (vlib_main_t * vm,
if (throttle_check (&arp_throttle, thread_index, r0, seed))
{
- p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
+ p0->error = node->errors[IP4_NEIGHBOR_ERROR_THROTTLED];
continue;
}
@@ -214,7 +224,7 @@ ip4_arp_inline (vlib_main_t * vm,
*/
if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
{
- p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
+ p0->error = node->errors[IP4_NEIGHBOR_ERROR_RESOLVED];
continue;
}
@@ -225,7 +235,7 @@ ip4_arp_inline (vlib_main_t * vm,
if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
|| (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
{
- p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
+ p0->error = node->errors[IP4_NEIGHBOR_ERROR_NON_ARP_ADJ];
continue;
}
@@ -237,11 +247,11 @@ ip4_arp_inline (vlib_main_t * vm,
/* copy the persistent fields from the original */
clib_memcpy_fast (b0->opaque2, p0->opaque2,
sizeof (p0->opaque2));
- p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
+ p0->error = node->errors[IP4_NEIGHBOR_ERROR_REQUEST_SENT];
}
else
{
- p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
+ p0->error = node->errors[IP4_NEIGHBOR_ERROR_NO_BUFFERS];
continue;
}
}
@@ -264,23 +274,13 @@ VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return (ip4_arp_inline (vm, node, frame, 1));
}
-static char *ip4_arp_error_strings[] = {
- [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
- [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
- [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
- [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
- [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
- [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
-};
-
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_arp_node) =
{
.name = "ip4-arp",
.vector_size = sizeof (u32),
.format_trace = format_ip4_forward_next_trace,
- .n_errors = ARRAY_LEN (ip4_arp_error_strings),
- .error_strings = ip4_arp_error_strings,
+ .n_errors = IP4_NEIGHBOR_N_ERROR,
+ .error_counters = ip4_neighbor_error_counters,
.n_next_nodes = IP4_ARP_N_NEXT,
.next_nodes = {
[IP4_ARP_NEXT_DROP] = "ip4-drop",
@@ -292,14 +292,13 @@ VLIB_REGISTER_NODE (ip4_glean_node) =
.name = "ip4-glean",
.vector_size = sizeof (u32),
.format_trace = format_ip4_forward_next_trace,
- .n_errors = ARRAY_LEN (ip4_arp_error_strings),
- .error_strings = ip4_arp_error_strings,
+ .n_errors = IP4_NEIGHBOR_N_ERROR,
+ .error_counters = ip4_neighbor_error_counters,
.n_next_nodes = IP4_ARP_N_NEXT,
.next_nodes = {
[IP4_ARP_NEXT_DROP] = "ip4-drop",
},
};
-/* *INDENT-ON* */
#define foreach_notrace_ip4_arp_error \
_(THROTTLED) \
@@ -315,10 +314,9 @@ arp_notrace_init (vlib_main_t * vm)
vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
/* don't trace ARP request packets */
-#define _(a) \
- vnet_pcap_drop_trace_filter_add_del \
- (rt->errors[IP4_ARP_ERROR_##a], \
- 1 /* is_add */);
+#define _(a) \
+ vnet_pcap_drop_trace_filter_add_del (rt->errors[IP4_NEIGHBOR_ERROR_##a], \
+ 1 /* is_add */);
foreach_notrace_ip4_arp_error;
#undef _
return 0;
@@ -332,7 +330,7 @@ ip4_neighbor_main_loop_enter (vlib_main_t * vm)
vlib_thread_main_t *tm = &vlib_thread_main;
u32 n_vlib_mains = tm->n_vlib_mains;
- throttle_init (&arp_throttle, n_vlib_mains, 1e-3);
+ throttle_init (&arp_throttle, n_vlib_mains, THROTTLE_BITS, 1e-3);
return (NULL);
}
diff --git a/src/vnet/ip-neighbor/ip4_neighbor.h b/src/vnet/ip-neighbor/ip4_neighbor.h
index c330dfa59e7..7941ebdbced 100644
--- a/src/vnet/ip-neighbor/ip4_neighbor.h
+++ b/src/vnet/ip-neighbor/ip4_neighbor.h
@@ -18,19 +18,18 @@
#include <vnet/ip/ip.h>
#include <vnet/ethernet/arp_packet.h>
+#include <vnet/ip-neighbor/ip_neighbor_types.h>
-extern void ip4_neighbor_probe_dst (u32 sw_if_index,
- const ip4_address_t * dst);
-extern void ip4_neighbor_advertise (vlib_main_t * vm,
- vnet_main_t * vnm,
- u32 sw_if_index,
- const ip4_address_t * addr);
+extern void ip4_neighbor_probe_dst (u32 sw_if_index, u32 thread_index,
+ const ip4_address_t *dst);
+extern void ip4_neighbor_advertise (vlib_main_t *vm, vnet_main_t *vnm,
+ u32 sw_if_index, u32 thread_index,
+ const ip4_address_t *addr);
always_inline vlib_buffer_t *
-ip4_neighbor_probe (vlib_main_t * vm,
- vnet_main_t * vnm,
- const ip_adjacency_t * adj0,
- const ip4_address_t * src, const ip4_address_t * dst)
+ip4_neighbor_probe (vlib_main_t *vm, vnet_main_t *vnm,
+ const ip_adjacency_t *adj0, const ip4_address_t *src,
+ const ip4_address_t *dst)
{
vnet_hw_interface_t *hw_if0;
ethernet_arp_header_t *h0;
@@ -62,6 +61,7 @@ ip4_neighbor_probe (vlib_main_t * vm,
h0->ip4_over_ethernet[1].ip4 = *dst;
vnet_buffer (b0)->sw_if_index[VLIB_TX] = adj0->rewrite_header.sw_if_index;
+ b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
@@ -73,6 +73,10 @@ ip4_neighbor_probe (vlib_main_t * vm,
vlib_put_frame_to_node (vm, hw_if0->output_node_index, f);
}
+ vlib_increment_simple_counter (
+ &ip_neighbor_counters[AF_IP4].ipnc[VLIB_TX][IP_NEIGHBOR_CTR_REQUEST],
+ vm->thread_index, adj0->rewrite_header.sw_if_index, 1);
+
return b0;
}
diff --git a/src/vnet/ip-neighbor/ip6_neighbor.c b/src/vnet/ip-neighbor/ip6_neighbor.c
index cf14954e96d..ca8aed3d4ca 100644
--- a/src/vnet/ip-neighbor/ip6_neighbor.c
+++ b/src/vnet/ip-neighbor/ip6_neighbor.c
@@ -16,6 +16,7 @@
*/
#include <vnet/ip-neighbor/ip6_neighbor.h>
+#include <vnet/ip-neighbor/ip_neighbor.api_enum.h>
#include <vnet/util/throttle.h>
#include <vnet/fib/fib_sas.h>
#include <vnet/ip/ip_sas.h>
@@ -31,20 +32,20 @@ VLIB_REGISTER_LOG_CLASS (ip6_neighbor_log, static) = {
#define log_debug(fmt, ...) \
vlib_log_debug (ip6_neighbor_log.class, fmt, __VA_ARGS__)
void
-ip6_neighbor_probe_dst (u32 sw_if_index, const ip6_address_t * dst)
+ip6_neighbor_probe_dst (u32 sw_if_index, u32 thread_index,
+ const ip6_address_t *dst)
{
ip6_address_t src;
if (fib_sas6_get (sw_if_index, dst, &src) ||
ip6_sas_by_sw_if_index (sw_if_index, dst, &src))
- ip6_neighbor_probe (vlib_get_main (), vnet_get_main (),
- sw_if_index, &src, dst);
+ ip6_neighbor_probe (vlib_get_main (), vnet_get_main (), sw_if_index,
+ thread_index, &src, dst);
}
void
-ip6_neighbor_advertise (vlib_main_t * vm,
- vnet_main_t * vnm,
- u32 sw_if_index, const ip6_address_t * addr)
+ip6_neighbor_advertise (vlib_main_t *vm, vnet_main_t *vnm, u32 sw_if_index,
+ u32 thread_index, const ip6_address_t *addr)
{
vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
ip6_main_t *i6m = &ip6_main;
@@ -105,6 +106,10 @@ ip6_neighbor_advertise (vlib_main_t * vm,
to_next[0] = bi;
f->n_vectors = 1;
vlib_put_frame_to_node (vm, hi->output_node_index, f);
+
+ vlib_increment_simple_counter (
+ &ip_neighbor_counters[AF_IP6].ipnc[VLIB_TX][IP_NEIGHBOR_CTR_GRAT],
+ thread_index, sw_if_index, 1);
}
}
@@ -115,14 +120,6 @@ typedef enum
IP6_NBR_N_NEXT,
} ip6_discover_neighbor_next_t;
-typedef enum
-{
- IP6_NBR_ERROR_DROP,
- IP6_NBR_ERROR_REQUEST_SENT,
- IP6_NBR_ERROR_NO_SOURCE_ADDRESS,
- IP6_NBR_ERROR_NO_BUFFERS,
-} ip6_discover_neighbor_error_t;
-
static uword
ip6_discover_neighbor_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -188,6 +185,12 @@ ip6_discover_neighbor_inline (vlib_main_t * vm,
to_next_drop += 1;
n_left_to_next_drop -= 1;
+ if (drop0)
+ {
+ p0->error = node->errors[IP6_NEIGHBOR_ERROR_THROTTLED];
+ continue;
+ }
+
hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
/* If the interface is link-down, drop the pkt */
@@ -206,7 +209,7 @@ ip6_discover_neighbor_inline (vlib_main_t * vm,
if (drop0)
{
- p0->error = node->errors[IP6_NBR_ERROR_DROP];
+ p0->error = node->errors[IP6_NEIGHBOR_ERROR_DROP];
continue;
}
@@ -214,16 +217,17 @@ ip6_discover_neighbor_inline (vlib_main_t * vm,
* Choose source address based on destination lookup
* adjacency.
*/
- if (!fib_sas6_get (sw_if_index0, &ip0->dst_address, &src) ||
- !ip6_sas_by_sw_if_index (sw_if_index0, &ip0->dst_address, &src))
+ const ip6_address_t *ll = ip6_get_link_local_address (sw_if_index0);
+ if (!ll)
{
/* There is no address on the interface */
- p0->error = node->errors[IP6_NBR_ERROR_NO_SOURCE_ADDRESS];
+ p0->error = node->errors[IP6_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS];
continue;
}
+ ip6_address_copy (&src, ll);
- b0 = ip6_neighbor_probe (vm, vnm, sw_if_index0,
- &src, &ip0->dst_address);
+ b0 = ip6_neighbor_probe (vm, vnm, sw_if_index0, thread_index, &src,
+ &ip0->dst_address);
if (PREDICT_TRUE (NULL != b0))
{
@@ -231,12 +235,12 @@ ip6_discover_neighbor_inline (vlib_main_t * vm,
sizeof (p0->opaque2));
b0->flags |= p0->flags & VLIB_BUFFER_IS_TRACED;
b0->trace_handle = p0->trace_handle;
- p0->error = node->errors[IP6_NBR_ERROR_REQUEST_SENT];
+ p0->error = node->errors[IP6_NEIGHBOR_ERROR_REQUEST_SENT];
}
else
{
/* There is no address on the interface */
- p0->error = node->errors[IP6_NBR_ERROR_NO_BUFFERS];
+ p0->error = node->errors[IP6_NEIGHBOR_ERROR_NO_BUFFERS];
continue;
}
}
@@ -260,22 +264,14 @@ ip6_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
return (ip6_discover_neighbor_inline (vm, node, frame, 1));
}
-static char *ip6_discover_neighbor_error_strings[] = {
- [IP6_NBR_ERROR_DROP] = "address overflow drops",
- [IP6_NBR_ERROR_REQUEST_SENT] = "neighbor solicitations sent",
- [IP6_NBR_ERROR_NO_SOURCE_ADDRESS] = "no source address for ND solicitation",
- [IP6_NBR_ERROR_NO_BUFFERS] = "no buffers",
-};
-
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_glean_node) =
{
.function = ip6_glean,
.name = "ip6-glean",
.vector_size = sizeof (u32),
.format_trace = format_ip6_forward_next_trace,
- .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings),
- .error_strings = ip6_discover_neighbor_error_strings,
+ .n_errors = IP6_NEIGHBOR_N_ERROR,
+ .error_counters = ip6_neighbor_error_counters,
.n_next_nodes = IP6_NBR_N_NEXT,
.next_nodes =
{
@@ -289,8 +285,8 @@ VLIB_REGISTER_NODE (ip6_discover_neighbor_node) =
.name = "ip6-discover-neighbor",
.vector_size = sizeof (u32),
.format_trace = format_ip6_forward_next_trace,
- .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings),
- .error_strings = ip6_discover_neighbor_error_strings,
+ .n_errors = IP6_NEIGHBOR_N_ERROR,
+ .error_counters = ip6_neighbor_error_counters,
.n_next_nodes = IP6_NBR_N_NEXT,
.next_nodes =
{
@@ -298,7 +294,6 @@ VLIB_REGISTER_NODE (ip6_discover_neighbor_node) =
[IP6_NBR_NEXT_REPLY_TX] = "ip6-rewrite-mcast",
},
};
-/* *INDENT-ON* */
/* Template used to generate IP6 neighbor solicitation packets. */
vlib_packet_template_t ip6_neighbor_packet_template;
@@ -342,7 +337,7 @@ ip6_nd_main_loop_enter (vlib_main_t * vm)
{
vlib_thread_main_t *tm = &vlib_thread_main;
- throttle_init (&nd_throttle, tm->n_vlib_mains, 1e-3);
+ throttle_init (&nd_throttle, tm->n_vlib_mains, THROTTLE_BITS, 1e-3);
return 0;
}
diff --git a/src/vnet/ip-neighbor/ip6_neighbor.h b/src/vnet/ip-neighbor/ip6_neighbor.h
index ad2ace21948..c6e718dc2ff 100644
--- a/src/vnet/ip-neighbor/ip6_neighbor.h
+++ b/src/vnet/ip-neighbor/ip6_neighbor.h
@@ -25,23 +25,22 @@
#include <vnet/ip/icmp46_packet.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/adj/adj_internal.h>
+#include <vnet/ip-neighbor/ip_neighbor_types.h>
/* Template used to generate IP6 neighbor solicitation packets. */
extern vlib_packet_template_t ip6_neighbor_packet_template;
-extern void ip6_neighbor_advertise (vlib_main_t * vm,
- vnet_main_t * vnm,
- u32 sw_if_index,
- const ip6_address_t * addr);
+extern void ip6_neighbor_advertise (vlib_main_t *vm, vnet_main_t *vnm,
+ u32 sw_if_index, u32 thread_index,
+ const ip6_address_t *addr);
-extern void ip6_neighbor_probe_dst (u32 sw_if_index,
- const ip6_address_t * dst);
+extern void ip6_neighbor_probe_dst (u32 sw_if_index, u32 thread_index,
+ const ip6_address_t *dst);
always_inline vlib_buffer_t *
-ip6_neighbor_probe (vlib_main_t * vm,
- vnet_main_t * vnm,
- u32 sw_if_index,
- const ip6_address_t * src, const ip6_address_t * dst)
+ip6_neighbor_probe (vlib_main_t *vm, vnet_main_t *vnm, u32 sw_if_index,
+ u32 thread_index, const ip6_address_t *src,
+ const ip6_address_t *dst)
{
icmp6_neighbor_solicitation_header_t *h0;
vnet_hw_interface_t *hw_if0;
@@ -104,6 +103,10 @@ ip6_neighbor_probe (vlib_main_t * vm,
vlib_put_frame_to_node (vm, adj->ia_node_index, f);
}
+ vlib_increment_simple_counter (
+ &ip_neighbor_counters[AF_IP6].ipnc[VLIB_TX][IP_NEIGHBOR_CTR_REQUEST],
+ thread_index, sw_if_index, 1);
+
return b0;
}
diff --git a/src/vnet/ip-neighbor/ip_neighbor.api b/src/vnet/ip-neighbor/ip_neighbor.api
index 62730e7c1e3..24cddd42fab 100644
--- a/src/vnet/ip-neighbor/ip_neighbor.api
+++ b/src/vnet/ip-neighbor/ip_neighbor.api
@@ -20,7 +20,7 @@
called through a shared memory interface.
*/
-option version = "1.0.0";
+option version = "1.0.1";
import "vnet/ip/ip_types.api";
import "vnet/ethernet/ethernet_types.api";
@@ -126,6 +126,40 @@ autoreply define ip_neighbor_config
bool recycle;
};
+/** \brief Get neighbor database configuration per AF
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param af - Address family (v4/v6)
+*/
+define ip_neighbor_config_get
+{
+ option in_progress;
+ u32 client_index;
+ u32 context;
+ vl_api_address_family_t af;
+};
+
+/** \brief Neighbor database configuration reply
+ @param context - sender context, to match reply w/ request
+ @param retval - error (0 is "no error")
+ @param af - Address family (v4/v6)
+ @param max_number - The maximum number of neighbours that will be created
+ @param max_age - The maximum age (in seconds) before an inactive neighbour
+ is flushed
+ @param recycle - If max_number of neighbours is reached and new ones need
+ to be created, should the oldest neighbour be 'recycled'
+*/
+define ip_neighbor_config_get_reply
+{
+ option in_progress;
+ u32 context;
+ i32 retval;
+ vl_api_address_family_t af;
+ u32 max_number;
+ u32 max_age;
+ bool recycle;
+};
+
/** \brief IP neighbour replace begin
The use-case is that, for some unspecified reason, the control plane
@@ -264,6 +298,85 @@ service {
events ip_neighbor_event_v2;
};
+counters ip4_neighbor {
+ throttled {
+ severity info;
+ type counter64;
+ units "packets";
+ description "ARP requests throttled";
+ };
+ resolved {
+ severity info;
+ type counter64;
+ units "packets";
+ description "ARP requests resolved";
+ };
+ no_buffers {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ARP requests out of buffer";
+ };
+ request_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "ARP requests sent";
+ };
+ non_arp_adj {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ARPs to non-ARP adjacencies";
+ };
+ no_source_address {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no source address for ARP request";
+ };
+};
+
+counters ip6_neighbor {
+ throttled {
+ severity info;
+ type counter64;
+ units "packets";
+ description "throttled";
+ };
+ drop {
+ severity error;
+ type counter64;
+ units "packets";
+ description "address overflow drops";
+ };
+ request_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "neighbor solicitations sent";
+ };
+ no_source_address {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no source address for ND solicitation";
+ };
+ no_buffers {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no buffers";
+ };
+};
+
+paths {
+ "/err/ip4-arp" "ip4_neighbor";
+ "/err/ip4-glean" "ip4_neighbor";
+ "/err/ip6-arp" "ip6_neighbor";
+ "/err/ip6-glean" "ip6_neighbor";
+};
+
/*
* Local Variables:
* eval: (c-set-style "gnu")
diff --git a/src/vnet/ip-neighbor/ip_neighbor.c b/src/vnet/ip-neighbor/ip_neighbor.c
index 6c97356dd98..614b78489cd 100644
--- a/src/vnet/ip-neighbor/ip_neighbor.c
+++ b/src/vnet/ip-neighbor/ip_neighbor.c
@@ -27,6 +27,74 @@
#include <vnet/fib/fib_table.h>
#include <vnet/adj/adj_mcast.h>
+ip_neighbor_counters_t ip_neighbor_counters[] =
+{
+ [AF_IP4] = {
+ .ipnc = {
+ [VLIB_RX] = {
+ [IP_NEIGHBOR_CTR_REPLY] = {
+ .name = "arp-rx-replies",
+ .stat_segment_name = "/net/arp/rx/replies",
+ },
+ [IP_NEIGHBOR_CTR_REQUEST] = {
+ .name = "arp-rx-requests",
+ .stat_segment_name = "/net/arp/rx/requests",
+ },
+ [IP_NEIGHBOR_CTR_GRAT] = {
+ .name = "arp-rx-gratuitous",
+ .stat_segment_name = "/net/arp/rx/gratuitous",
+ },
+ },
+ [VLIB_TX] = {
+ [IP_NEIGHBOR_CTR_REPLY] = {
+ .name = "arp-tx-replies",
+ .stat_segment_name = "/net/arp/tx/replies",
+ },
+ [IP_NEIGHBOR_CTR_REQUEST] = {
+ .name = "arp-tx-requests",
+ .stat_segment_name = "/net/arp/tx/requests",
+ },
+ [IP_NEIGHBOR_CTR_GRAT] = {
+ .name = "arp-tx-gratuitous",
+ .stat_segment_name = "/net/arp/tx/gratuitous",
+ },
+ },
+ },
+ },
+ [AF_IP6] = {
+ .ipnc = {
+ [VLIB_RX] = {
+ [IP_NEIGHBOR_CTR_REPLY] = {
+ .name = "ip6-nd-rx-replies",
+ .stat_segment_name = "/net/ip6-nd/rx/replies",
+ },
+ [IP_NEIGHBOR_CTR_REQUEST] = {
+ .name = "ip6-nd-rx-requests",
+ .stat_segment_name = "/net/ip6-nd/rx/requests",
+ },
+ [IP_NEIGHBOR_CTR_GRAT] = {
+ .name = "ip6-nd-rx-gratuitous",
+ .stat_segment_name = "/net/ip6-nd/rx/gratuitous",
+ },
+ },
+ [VLIB_TX] = {
+ [IP_NEIGHBOR_CTR_REPLY] = {
+ .name = "ip6-nd-tx-replies",
+ .stat_segment_name = "/net/ip6-nd/tx/replies",
+ },
+ [IP_NEIGHBOR_CTR_REQUEST] = {
+ .name = "ip6-nd-tx-requests",
+ .stat_segment_name = "/net/ip6-nd/tx/requests",
+ },
+ [IP_NEIGHBOR_CTR_GRAT] = {
+ .name = "ip6-nd-tx-gratuitous",
+ .stat_segment_name = "/net/ip6-nd/tx/gratuitous",
+ },
+ },
+ },
+ },
+};
+
/** Pool for All IP neighbors */
static ip_neighbor_t *ip_neighbor_pool;
@@ -62,7 +130,6 @@ typedef struct ip_neighbor_db_t_
static vlib_log_class_t ipn_logger;
/* DBs of neighbours one per AF */
-/* *INDENT-OFF* */
static ip_neighbor_db_t ip_neighbor_db[N_AF] = {
[AF_IP4] = {
.ipndb_limit = 50000,
@@ -77,7 +144,6 @@ static ip_neighbor_db_t ip_neighbor_db[N_AF] = {
.ipndb_recycle = false,
}
};
-/* *INDENT-ON* */
#define IP_NEIGHBOR_DBG(...) \
vlib_log_debug (ipn_logger, __VA_ARGS__);
@@ -394,6 +460,7 @@ ip_neighbor_destroy (ip_neighbor_t * ipn)
af = ip_neighbor_get_af (ipn);
IP_NEIGHBOR_DBG ("free: %U", format_ip_neighbor,
+ vlib_time_now (vlib_get_main ()),
ip_neighbor_get_index (ipn));
ip_neighbor_publish (ip_neighbor_get_index (ipn),
@@ -729,7 +796,7 @@ ip_neighbor_cmd (vlib_main_t * vm,
vnet_main_t *vnm = vnet_get_main ();
ip_neighbor_flags_t flags;
u32 sw_if_index = ~0;
- int is_add = 1;
+ int is_add = 1, is_flush = 0;
int count = 1;
flags = IP_NEIGHBOR_FLAG_DYNAMIC;
@@ -743,6 +810,8 @@ ip_neighbor_cmd (vlib_main_t * vm,
;
else if (unformat (input, "delete") || unformat (input, "del"))
is_add = 0;
+ else if (unformat (input, "flush"))
+ is_flush = 1;
else if (unformat (input, "static"))
{
flags |= IP_NEIGHBOR_FLAG_STATIC;
@@ -756,6 +825,13 @@ ip_neighbor_cmd (vlib_main_t * vm,
break;
}
+ if (is_flush)
+ {
+ ip_neighbor_del_all (AF_IP4, sw_if_index);
+ ip_neighbor_del_all (AF_IP6, sw_if_index);
+ return NULL;
+ }
+
if (sw_if_index == ~0 ||
ip_address_is_zero (&ip) || mac_address_is_zero (&mac))
return clib_error_return (0,
@@ -778,11 +854,10 @@ ip_neighbor_cmd (vlib_main_t * vm,
return NULL;
}
-/* *INDENT-OFF* */
/*?
* Add or delete IPv4 ARP cache entries.
*
- * @note 'set ip neighbor' options (e.g. delete, static, 'fib-id <id>',
+ * @note 'set ip neighbor' options (e.g. delete, static,
* 'count <number>', 'interface ip4_addr mac_addr') can be added in
* any order and combination.
*
@@ -791,35 +866,39 @@ ip_neighbor_cmd (vlib_main_t * vm,
* Add or delete IPv4 ARP cache entries as follows. MAC Address can be in
* either aa:bb:cc:dd:ee:ff format or aabb.ccdd.eeff format.
* @cliexcmd{set ip neighbor GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
- * @cliexcmd{set ip neighbor delete GigabitEthernet2/0/0 6.0.0.3 de:ad:be:ef:ba:be}
+ * @cliexcmd{set ip neighbor delete GigabitEthernet2/0/0 6.0.0.3
+ * de:ad:be:ef:ba:be}
*
- * To add or delete an IPv4 ARP cache entry to or from a specific fib
+ * To add or delete an IPv4 ARP cache entry
* table:
- * @cliexcmd{set ip neighbor fib-id 1 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
- * @cliexcmd{set ip neighbor fib-id 1 delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
+ * @cliexcmd{set ip neighbor GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
+ * @cliexcmd{set ip neighbor delete GigabitEthernet2/0/0 6.0.0.3
+ * dead.beef.babe}
*
* Add or delete IPv4 static ARP cache entries as follows:
- * @cliexcmd{set ip neighbor static GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
- * @cliexcmd{set ip neighbor static delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
+ * @cliexcmd{set ip neighbor static GigabitEthernet2/0/0 6.0.0.3
+ * dead.beef.babe}
+ * @cliexcmd{set ip neighbor static delete GigabitEthernet2/0/0 6.0.0.3
+ * dead.beef.babe}
*
* For testing / debugging purposes, the 'set ip neighbor' command can add or
* delete multiple entries. Supply the 'count N' parameter:
- * @cliexcmd{set ip neighbor count 10 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
+ * @cliexcmd{set ip neighbor count 10 GigabitEthernet2/0/0 6.0.0.3
+ * dead.beef.babe}
* @endparblock
?*/
VLIB_CLI_COMMAND (ip_neighbor_command, static) = {
.path = "set ip neighbor",
- .short_help =
- "set ip neighbor [del] <intfc> <ip-address> <mac-address> [static] [no-fib-entry] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
+ .short_help = "set ip neighbor [del] <intfc> <ip-address> <mac-address> "
+ "[static] [no-fib-entry] [count <count>]",
.function = ip_neighbor_cmd,
};
VLIB_CLI_COMMAND (ip_neighbor_command2, static) = {
.path = "ip neighbor",
- .short_help =
- "ip neighbor [del] <intfc> <ip-address> <mac-address> [static] [no-fib-entry] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
+ .short_help = "ip neighbor [del] [flush] <intfc> <ip-address> <mac-address> "
+ "[static] [no-fib-entry] [count <count>]",
.function = ip_neighbor_cmd,
};
-/* *INDENT-ON* */
static int
ip_neighbor_sort (void *a1, void *a2)
@@ -845,7 +924,6 @@ ip_neighbor_entries (u32 sw_if_index, ip_address_family_t af)
index_t *ipnis = NULL;
ip_neighbor_t *ipn;
- /* *INDENT-OFF* */
pool_foreach (ipn, ip_neighbor_pool)
{
if ((sw_if_index == ~0 ||
@@ -855,7 +933,6 @@ ip_neighbor_entries (u32 sw_if_index, ip_address_family_t af)
vec_add1 (ipnis, ip_neighbor_get_index(ipn));
}
- /* *INDENT-ON* */
if (ipnis)
vec_sort_with_function (ipnis, ip_neighbor_sort);
@@ -868,22 +945,20 @@ ip_neighbor_show_sorted_i (vlib_main_t * vm,
vlib_cli_command_t * cmd, ip_address_family_t af)
{
ip_neighbor_elt_t *elt, *head;
+ f64 now;
head = pool_elt_at_index (ip_neighbor_elt_pool, ip_neighbor_list_head[af]);
+ now = vlib_time_now (vm);
+ vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Age", "IP", "Flags",
+ "Ethernet", "Interface");
- vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Time", "IP",
- "Flags", "Ethernet", "Interface");
-
- /* *INDENT-OFF*/
/* the list is time sorted, newest first, so start from the back
* and work forwards. Stop when we get to one that is alive */
- clib_llist_foreach_reverse(ip_neighbor_elt_pool,
- ipne_anchor, head, elt,
- ({
- vlib_cli_output (vm, "%U", format_ip_neighbor, elt->ipne_index);
- }));
- /* *INDENT-ON*/
+ clib_llist_foreach_reverse (ip_neighbor_elt_pool, ipne_anchor, head, elt, ({
+ vlib_cli_output (vm, "%U", format_ip_neighbor,
+ now, elt->ipne_index);
+ }));
return (NULL);
}
@@ -895,6 +970,7 @@ ip_neighbor_show_i (vlib_main_t * vm,
{
index_t *ipni, *ipnis = NULL;
u32 sw_if_index;
+ f64 now;
/* Filter entries by interface if given. */
sw_if_index = ~0;
@@ -902,14 +978,15 @@ ip_neighbor_show_i (vlib_main_t * vm,
&sw_if_index);
ipnis = ip_neighbor_entries (sw_if_index, af);
+ now = vlib_time_now (vm);
if (ipnis)
- vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Time", "IP",
- "Flags", "Ethernet", "Interface");
+ vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Age", "IP", "Flags",
+ "Ethernet", "Interface");
vec_foreach (ipni, ipnis)
{
- vlib_cli_output (vm, "%U", format_ip_neighbor, *ipni);
+ vlib_cli_output (vm, "%U", format_ip_neighbor, now, *ipni);
}
vec_free (ipnis);
@@ -965,7 +1042,6 @@ ip4_neighbor_show_sorted (vlib_main_t * vm,
* Fib_index 0 6.0.0.1 - 6.0.0.11
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip_neighbors_cmd_node, static) = {
.path = "show ip neighbors",
.function = ip_neighbor_show,
@@ -1006,7 +1082,6 @@ VLIB_CLI_COMMAND (show_ip6_neighbor_sorted_cmd_node, static) = {
.function = ip6_neighbor_show_sorted,
.short_help = "show ip6 neighbor-sorted",
};
-/* *INDENT-ON* */
static ip_neighbor_vft_t ip_nbr_vfts[N_AF];
@@ -1017,8 +1092,8 @@ ip_neighbor_register (ip_address_family_t af, const ip_neighbor_vft_t * vft)
}
void
-ip_neighbor_probe_dst (u32 sw_if_index,
- ip_address_family_t af, const ip46_address_t * dst)
+ip_neighbor_probe_dst (u32 sw_if_index, u32 thread_index,
+ ip_address_family_t af, const ip46_address_t *dst)
{
if (!vnet_sw_interface_is_admin_up (vnet_get_main (), sw_if_index))
return;
@@ -1026,10 +1101,10 @@ ip_neighbor_probe_dst (u32 sw_if_index,
switch (af)
{
case AF_IP6:
- ip6_neighbor_probe_dst (sw_if_index, &dst->ip6);
+ ip6_neighbor_probe_dst (sw_if_index, thread_index, &dst->ip6);
break;
case AF_IP4:
- ip4_neighbor_probe_dst (sw_if_index, &dst->ip4);
+ ip4_neighbor_probe_dst (sw_if_index, thread_index, &dst->ip4);
break;
}
}
@@ -1038,6 +1113,7 @@ void
ip_neighbor_probe (const ip_adjacency_t * adj)
{
ip_neighbor_probe_dst (adj->rewrite_header.sw_if_index,
+ vlib_get_thread_index (),
ip_address_family_from_fib_proto (adj->ia_nh_proto),
&adj->sub_type.nbr.next_hop);
}
@@ -1055,13 +1131,11 @@ ip_neighbor_walk (ip_address_family_t af,
vec_foreach (hash, ip_neighbor_db[af].ipndb_hash)
{
- /* *INDENT-OFF* */
hash_foreach (key, ipni, *hash,
({
if (WALK_STOP == cb (ipni, ctx))
break;
}));
- /* *INDENT-ON* */
}
}
else
@@ -1072,13 +1146,11 @@ ip_neighbor_walk (ip_address_family_t af,
return;
hash = ip_neighbor_db[af].ipndb_hash[sw_if_index];
- /* *INDENT-OFF* */
hash_foreach (key, ipni, hash,
({
if (WALK_STOP == cb (ipni, ctx))
break;
}));
- /* *INDENT-ON* */
}
}
@@ -1157,14 +1229,12 @@ ip_neighbor_populate (ip_address_family_t af, u32 sw_if_index)
format_vnet_sw_if_index_name, vnet_get_main (),
sw_if_index, format_ip_address_family, af);
- /* *INDENT-OFF* */
pool_foreach (ipn, ip_neighbor_pool)
{
if (ip_neighbor_get_af(ipn) == af &&
ipn->ipn_key->ipnk_sw_if_index == sw_if_index)
vec_add1 (ipnis, ipn - ip_neighbor_pool);
}
- /* *INDENT-ON* */
vec_foreach (ipni, ipnis)
{
@@ -1190,7 +1260,6 @@ ip_neighbor_flush (ip_address_family_t af, u32 sw_if_index)
format_vnet_sw_if_index_name, vnet_get_main (),
sw_if_index, format_ip_address_family, af);
- /* *INDENT-OFF* */
pool_foreach (ipn, ip_neighbor_pool)
{
if (ip_neighbor_get_af(ipn) == af &&
@@ -1198,13 +1267,12 @@ ip_neighbor_flush (ip_address_family_t af, u32 sw_if_index)
ip_neighbor_is_dynamic (ipn))
vec_add1 (ipnis, ipn - ip_neighbor_pool);
}
- /* *INDENT-ON* */
vec_foreach (ipni, ipnis) ip_neighbor_destroy (ip_neighbor_get (*ipni));
vec_free (ipnis);
}
-static walk_rc_t
+walk_rc_t
ip_neighbor_mark_one (index_t ipni, void *ctx)
{
ip_neighbor_t *ipn;
@@ -1291,8 +1359,8 @@ VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip_neighbor_interface_admin_change);
* Remove any arp entries associated with the specified interface
*/
static clib_error_t *
-ip_neighbor_delete_sw_interface (vnet_main_t * vnm,
- u32 sw_if_index, u32 is_add)
+ip_neighbor_add_del_sw_interface (vnet_main_t *vnm, u32 sw_if_index,
+ u32 is_add)
{
IP_NEIGHBOR_DBG ("interface-change: %U %s",
format_vnet_sw_if_index_name, vnet_get_main (),
@@ -1305,10 +1373,16 @@ ip_neighbor_delete_sw_interface (vnet_main_t * vnm,
FOR_EACH_IP_ADDRESS_FAMILY (af) ip_neighbor_flush (af, sw_if_index);
}
+ if (is_add)
+ {
+ ip_neighbor_alloc_ctr (&ip_neighbor_counters[AF_IP4], sw_if_index);
+ ip_neighbor_alloc_ctr (&ip_neighbor_counters[AF_IP6], sw_if_index);
+ }
+
return (NULL);
}
-VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip_neighbor_delete_sw_interface);
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip_neighbor_add_del_sw_interface);
typedef struct ip_neighbor_walk_covered_ctx_t_
{
@@ -1366,14 +1440,12 @@ ip_neighbor_add_del_interface_address_v4 (ip4_main_t * im,
* Flush the ARP cache of all entries covered by the address
* that is being removed.
*/
- IP_NEIGHBOR_DBG ("addr-%d: %U, %U/%d",
- (is_del ? "del" : "add"),
- format_vnet_sw_if_index_name, vnet_get_main (),
- sw_if_index, format_ip4_address, address, address_length);
+ IP_NEIGHBOR_DBG ("addr-%s: %U, %U/%d", (is_del ? "del" : "add"),
+ format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index,
+ format_ip4_address, address, address_length);
if (is_del)
{
- /* *INDENT-OFF* */
ip_neighbor_walk_covered_ctx_t ctx = {
.addr = {
.ip.ip4 = *address,
@@ -1381,7 +1453,6 @@ ip_neighbor_add_del_interface_address_v4 (ip4_main_t * im,
},
.length = address_length,
};
- /* *INDENT-ON* */
index_t *ipni;
ip_neighbor_walk (AF_IP4, sw_if_index, ip_neighbor_walk_covered, &ctx);
@@ -1415,7 +1486,6 @@ ip_neighbor_add_del_interface_address_v6 (ip6_main_t * im,
if (is_del)
{
- /* *INDENT-OFF* */
ip_neighbor_walk_covered_ctx_t ctx = {
.addr = {
.ip.ip6 = *address,
@@ -1423,7 +1493,6 @@ ip_neighbor_add_del_interface_address_v6 (ip6_main_t * im,
},
.length = address_length,
};
- /* *INDENT-ON* */
index_t *ipni;
ip_neighbor_walk (AF_IP6, sw_if_index, ip_neighbor_walk_covered, &ctx);
@@ -1507,20 +1576,20 @@ ip_neighbour_age_out (index_t ipni, f64 now, f64 * wait)
if (ttl > ipndb_age)
{
- IP_NEIGHBOR_DBG ("aged: %U @%f - %f > %d",
- format_ip_neighbor, ipni, now,
- ipn->ipn_time_last_updated, ipndb_age);
+ IP_NEIGHBOR_DBG ("aged: %U @%f - %f > %d", format_ip_neighbor, now, ipni,
+ now, ipn->ipn_time_last_updated, ipndb_age);
if (ipn->ipn_n_probes > 2)
{
/* 3 strikes and yea-re out */
- IP_NEIGHBOR_DBG ("dead: %U", format_ip_neighbor, ipni);
+ IP_NEIGHBOR_DBG ("dead: %U", format_ip_neighbor, now, ipni);
*wait = 1;
return (IP_NEIGHBOR_AGE_DEAD);
}
else
{
ip_neighbor_probe_dst (ip_neighbor_get_sw_if_index (ipn),
- af, &ip_addr_46 (&ipn->ipn_key->ipnk_ip));
+ vlib_get_thread_index (), af,
+ &ip_addr_46 (&ipn->ipn_key->ipnk_ip));
ipn->ipn_n_probes++;
*wait = 1;
@@ -1578,7 +1647,6 @@ ip_neighbor_age_loop (vlib_main_t * vm,
head = pool_elt_at_index (ip_neighbor_elt_pool,
ip_neighbor_list_head[af]);
- /* *INDENT-OFF*/
/* the list is time sorted, newest first, so start from the back
* and work forwards. Stop when we get to one that is alive */
restart:
@@ -1603,7 +1671,6 @@ ip_neighbor_age_loop (vlib_main_t * vm,
timeout = clib_min (wait, timeout);
}));
- /* *INDENT-ON* */
break;
}
case IP_NEIGHBOR_AGE_PROCESS_WAKEUP:
@@ -1650,7 +1717,6 @@ ip6_neighbor_age_process (vlib_main_t * vm,
return (ip_neighbor_age_loop (vm, rt, f, AF_IP6));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_neighbor_age_process_node,static) = {
.function = ip4_neighbor_age_process,
.type = VLIB_NODE_TYPE_PROCESS,
@@ -1661,7 +1727,6 @@ VLIB_REGISTER_NODE (ip6_neighbor_age_process_node,static) = {
.type = VLIB_NODE_TYPE_PROCESS,
.name = "ip6-neighbor-age-process",
};
-/* *INDENT-ON* */
int
ip_neighbor_config (ip_address_family_t af, u32 limit, u32 age, bool recycle)
@@ -1679,13 +1744,23 @@ ip_neighbor_config (ip_address_family_t af, u32 limit, u32 age, bool recycle)
return (0);
}
+int
+ip_neighbor_get_config (ip_address_family_t af, u32 *limit, u32 *age,
+ bool *recycle)
+{
+ *limit = ip_neighbor_db[af].ipndb_limit;
+ *age = ip_neighbor_db[af].ipndb_age;
+ *recycle = ip_neighbor_db[af].ipndb_recycle;
+
+ return (0);
+}
+
static clib_error_t *
ip_neighbor_config_show (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
ip_address_family_t af;
- /* *INDENT-OFF* */
FOR_EACH_IP_ADDRESS_FAMILY(af) {
vlib_cli_output (vm, "%U:", format_ip_address_family, af);
vlib_cli_output (vm, " limit:%d, age:%d, recycle:%d",
@@ -1694,7 +1769,6 @@ ip_neighbor_config_show (vlib_main_t * vm,
ip_neighbor_db[af].ipndb_recycle);
}
- /* *INDENT-ON* */
return (NULL);
}
@@ -1745,7 +1819,47 @@ done:
return error;
}
-/* *INDENT-OFF* */
+static void
+ip_neighbor_stats_show_one (vlib_main_t *vm, vnet_main_t *vnm, u32 sw_if_index)
+{
+ vlib_cli_output (vm, " %U", format_vnet_sw_if_index_name, vnm, sw_if_index);
+ vlib_cli_output (vm, " arp:%U", format_ip_neighbor_counters,
+ &ip_neighbor_counters[AF_IP4], sw_if_index);
+ vlib_cli_output (vm, " nd: %U", format_ip_neighbor_counters,
+ &ip_neighbor_counters[AF_IP6], sw_if_index);
+}
+
+static walk_rc_t
+ip_neighbor_stats_show_cb (vnet_main_t *vnm, vnet_sw_interface_t *si,
+ void *ctx)
+{
+ ip_neighbor_stats_show_one (ctx, vnm, si->sw_if_index);
+
+ return (WALK_CONTINUE);
+}
+
+static clib_error_t *
+ip_neighbor_stats_show (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_main_t *vnm;
+ u32 sw_if_index;
+
+ vnm = vnet_get_main ();
+ sw_if_index = ~0;
+ (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index);
+
+ if (~0 == sw_if_index)
+ {
+ vnet_sw_interface_walk (vnm, ip_neighbor_stats_show_cb, vm);
+ }
+ else
+ {
+ ip_neighbor_stats_show_one (vm, vnm, sw_if_index);
+ }
+ return (NULL);
+}
+
VLIB_CLI_COMMAND (show_ip_neighbor_cfg_cmd_node, static) = {
.path = "show ip neighbor-config",
.function = ip_neighbor_config_show,
@@ -1757,7 +1871,11 @@ VLIB_CLI_COMMAND (set_ip_neighbor_cfg_cmd_node, static) = {
.short_help = "set ip neighbor-config ip4|ip6 [limit <limit>] [age <age>] "
"[recycle|norecycle]",
};
-/* *INDENT-ON* */
+VLIB_CLI_COMMAND (show_ip_neighbor_stats_cmd_node, static) = {
+ .path = "show ip neighbor-stats",
+ .function = ip_neighbor_stats_show,
+ .short_help = "show ip neighbor-stats [interface]",
+};
static clib_error_t *
ip_neighbor_init (vlib_main_t * vm)
@@ -1797,12 +1915,10 @@ ip_neighbor_init (vlib_main_t * vm)
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip_neighbor_init) =
{
.runs_after = VLIB_INITS("ip_main_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip-neighbor/ip_neighbor.h b/src/vnet/ip-neighbor/ip_neighbor.h
index 064569b56ce..cc888ba2054 100644
--- a/src/vnet/ip-neighbor/ip_neighbor.h
+++ b/src/vnet/ip-neighbor/ip_neighbor.h
@@ -36,6 +36,8 @@ extern int ip_neighbor_del (const ip_address_t * ip, u32 sw_if_index);
extern int ip_neighbor_config (ip_address_family_t af,
u32 limit, u32 age, bool recycle);
+extern int ip_neighbor_get_config (ip_address_family_t af, u32 *limit,
+ u32 *age, bool *recycle);
extern void ip_neighbor_del_all (ip_address_family_t af, u32 sw_if_index);
@@ -54,12 +56,13 @@ extern void ip_neighbor_learn (const ip_neighbor_learn_t * l);
extern void ip_neighbor_update (vnet_main_t * vnm, adj_index_t ai);
extern void ip_neighbor_probe (const ip_adjacency_t * adj);
-extern void ip_neighbor_probe_dst (u32 sw_if_index,
+extern void ip_neighbor_probe_dst (u32 sw_if_index, u32 thread_index,
ip_address_family_t af,
- const ip46_address_t * ip);
+ const ip46_address_t *ip);
extern void ip_neighbor_mark (ip_address_family_t af);
extern void ip_neighbor_sweep (ip_address_family_t af);
+extern walk_rc_t ip_neighbor_mark_one (index_t ipni, void *ctx);
/**
* From the watcher to the API to publish a new neighbor
@@ -111,7 +114,6 @@ typedef struct ip_neighbor_vft_t_
extern void ip_neighbor_register (ip_address_family_t af,
const ip_neighbor_vft_t * vft);
-
#endif /* __INCLUDE_IP_NEIGHBOR_H__ */
/*
diff --git a/src/vnet/ip-neighbor/ip_neighbor_api.c b/src/vnet/ip-neighbor/ip_neighbor_api.c
index 81af86211de..2297546f111 100644
--- a/src/vnet/ip-neighbor/ip_neighbor_api.c
+++ b/src/vnet/ip-neighbor/ip_neighbor_api.c
@@ -234,12 +234,10 @@ vl_api_ip_neighbor_add_del_t_handler (vl_api_ip_neighbor_add_del_t * mp,
BAD_SW_IF_INDEX_LABEL;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IP_NEIGHBOR_ADD_DEL_REPLY,
({
rmp->stats_index = htonl (stats_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -314,6 +312,32 @@ vl_api_ip_neighbor_config_t_handler (vl_api_ip_neighbor_config_t * mp)
}
static void
+vl_api_ip_neighbor_config_get_t_handler (vl_api_ip_neighbor_config_get_t *mp)
+{
+ vl_api_ip_neighbor_config_get_reply_t *rmp;
+ int rv;
+ ip_address_family_t af = AF_IP4;
+ u32 max_number = ~0;
+ u32 max_age = ~0;
+ bool recycle = false;
+
+ rv = ip_address_family_decode (mp->af, &af);
+
+ if (!rv)
+ rv = ip_neighbor_get_config (af, &max_number, &max_age, &recycle);
+
+ // clang-format off
+ REPLY_MACRO2 (VL_API_IP_NEIGHBOR_CONFIG_GET_REPLY,
+ ({
+ rmp->af = ip_address_family_encode (af);
+ rmp->max_number = htonl (max_number);
+ rmp->max_age = htonl (max_age);
+ rmp->recycle = recycle;
+ }));
+ // clang-format on
+}
+
+static void
vl_api_ip_neighbor_replace_begin_t_handler (vl_api_ip_neighbor_replace_begin_t
* mp)
{
diff --git a/src/vnet/ip-neighbor/ip_neighbor_types.c b/src/vnet/ip-neighbor/ip_neighbor_types.c
index 76fbc5ac8a9..a6f3c26d42f 100644
--- a/src/vnet/ip-neighbor/ip_neighbor_types.c
+++ b/src/vnet/ip-neighbor/ip_neighbor_types.c
@@ -68,19 +68,65 @@ format_ip_neighbor_watcher (u8 * s, va_list * va)
u8 *
format_ip_neighbor (u8 * s, va_list * va)
{
+ f64 now = va_arg (*va, f64);
index_t ipni = va_arg (*va, index_t);
ip_neighbor_t *ipn;
ipn = ip_neighbor_get (ipni);
- return (format (s, "%=12U%=40U%=6U%=20U%U",
- format_vlib_time, vlib_get_main (),
- ipn->ipn_time_last_updated,
- format_ip_address, &ipn->ipn_key->ipnk_ip,
- format_ip_neighbor_flags, ipn->ipn_flags,
- format_mac_address_t, &ipn->ipn_mac,
- format_vnet_sw_if_index_name, vnet_get_main (),
- ipn->ipn_key->ipnk_sw_if_index));
+ return (
+ format (s, "%=12U%=40U%=6U%=20U%U", format_vlib_time, vlib_get_main (),
+ now - ipn->ipn_time_last_updated, format_ip_address,
+ &ipn->ipn_key->ipnk_ip, format_ip_neighbor_flags, ipn->ipn_flags,
+ format_mac_address_t, &ipn->ipn_mac, format_vnet_sw_if_index_name,
+ vnet_get_main (), ipn->ipn_key->ipnk_sw_if_index));
+}
+
+static void
+ip_neighbor_alloc_one_ctr (ip_neighbor_counters_t *ctr, vlib_dir_t dir,
+ ip_neighbor_counter_type_t type, u32 sw_if_index)
+{
+ vlib_validate_simple_counter (&(ctr->ipnc[dir][type]), sw_if_index);
+ vlib_zero_simple_counter (&(ctr->ipnc[dir][type]), sw_if_index);
+}
+
+void
+ip_neighbor_alloc_ctr (ip_neighbor_counters_t *ctr, u32 sw_if_index)
+{
+ ip_neighbor_counter_type_t type;
+ vlib_dir_t dir;
+
+ FOREACH_VLIB_DIR (dir)
+ {
+ FOREACH_IP_NEIGHBOR_CTR (type)
+ {
+ ip_neighbor_alloc_one_ctr (ctr, dir, type, sw_if_index);
+ }
+ }
+}
+
+u8 *
+format_ip_neighbor_counters (u8 *s, va_list *args)
+{
+ ip_neighbor_counters_t *ctr = va_arg (*args, ip_neighbor_counters_t *);
+ u32 sw_if_index = va_arg (*args, u32);
+ vlib_dir_t dir;
+
+ FOREACH_VLIB_DIR (dir)
+ {
+ s = format (s, " %U:[", format_vlib_rx_tx, dir);
+
+#define _(a, b) \
+ s = format (s, "%s:%lld ", b, \
+ vlib_get_simple_counter (&ctr->ipnc[dir][IP_NEIGHBOR_CTR_##a], \
+ sw_if_index));
+ foreach_ip_neighbor_counter_type
+#undef _
+
+ s = format (s, "]");
+ }
+
+ return (s);
}
/*
diff --git a/src/vnet/ip-neighbor/ip_neighbor_types.h b/src/vnet/ip-neighbor/ip_neighbor_types.h
index 2eb8fd0841f..d7e818ba252 100644
--- a/src/vnet/ip-neighbor/ip_neighbor_types.h
+++ b/src/vnet/ip-neighbor/ip_neighbor_types.h
@@ -120,7 +120,37 @@ extern void ip_neighbor_clone (const ip_neighbor_t * ipn,
extern void ip_neighbor_free (ip_neighbor_t * ipn);
+/**
+ * Keep RX and TX counts per-AF
+ */
+#define foreach_ip_neighbor_counter_type \
+ _ (REPLY, "reply") \
+ _ (REQUEST, "request") \
+ _ (GRAT, "gratuitous")
+
+typedef enum ip_neighbor_counter_type_t_
+{
+#define _(a, b) IP_NEIGHBOR_CTR_##a,
+ foreach_ip_neighbor_counter_type
+#undef _
+} ip_neighbor_counter_type_t;
+
+#define N_IP_NEIGHBOR_CTRS (IP_NEIGHBOR_CTR_GRAT + 1)
+
+#define FOREACH_IP_NEIGHBOR_CTR(_type) \
+ for (_type = 0; _type < N_IP_NEIGHBOR_CTRS; _type++)
+
+typedef struct ip_neighbor_counters_t_
+{
+ vlib_simple_counter_main_t ipnc[VLIB_N_DIR][N_IP_NEIGHBOR_CTRS];
+} ip_neighbor_counters_t;
+
+extern u8 *format_ip_neighbor_counters (u8 *s, va_list *args);
+
+extern void ip_neighbor_alloc_ctr (ip_neighbor_counters_t *ctr,
+ u32 sw_if_index);
+extern ip_neighbor_counters_t ip_neighbor_counters[N_AF];
#endif /* __INCLUDE_IP_NEIGHBOR_H__ */
diff --git a/src/vnet/ip-neighbor/ip_neighbor_watch.c b/src/vnet/ip-neighbor/ip_neighbor_watch.c
index 72908f4e613..74f450114e1 100644
--- a/src/vnet/ip-neighbor/ip_neighbor_watch.c
+++ b/src/vnet/ip-neighbor/ip_neighbor_watch.c
@@ -66,13 +66,11 @@ ip_neighbor_event_process (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip_neighbor_event_process_node) = {
.function = ip_neighbor_event_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "ip-neighbor-event",
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -84,7 +82,6 @@ want_ip_neighbor_events_reaper (u32 client_index)
i32 pos;
/* walk the entire IP neighbour DB and removes the client's registrations */
- /* *INDENT-OFF* */
mhash_foreach(key, v, &ipnw_db.ipnwdb_hash,
({
watchers = (ip_neighbor_watcher_t*) *v;
@@ -97,7 +94,6 @@ want_ip_neighbor_events_reaper (u32 client_index)
if (vec_len(watchers) == 0)
vec_add1 (empty_keys, *key);
}));
- /* *INDENT-OFF* */
vec_foreach (key, empty_keys)
mhash_unset (&ipnw_db.ipnwdb_hash, key, NULL);
@@ -236,7 +232,6 @@ ip_neighbor_watchers_show (vlib_main_t * vm,
ip_neighbor_key_t *key;
uword *v;
- /* *INDENT-OFF* */
mhash_foreach(key, v, &ipnw_db.ipnwdb_hash,
({
watchers = (ip_neighbor_watcher_t*) *v;
@@ -247,17 +242,14 @@ ip_neighbor_watchers_show (vlib_main_t * vm,
vec_foreach (watcher, watchers)
vlib_cli_output (vm, " %U", format_ip_neighbor_watcher, watcher);
}));
- /* *INDENT-ON* */
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip_neighbor_watchers_cmd_node, static) = {
.path = "show ip neighbor-watcher",
.function = ip_neighbor_watchers_show,
.short_help = "show ip neighbors-watcher",
};
-/* *INDENT-ON* */
static clib_error_t *
ip_neighbor_watch_init (vlib_main_t * vm)
@@ -267,12 +259,10 @@ ip_neighbor_watch_init (vlib_main_t * vm)
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip_neighbor_watch_init) =
{
.runs_after = VLIB_INITS("ip_neighbor_init"),
};
-/* *INDENT-ON* */
/*
diff --git a/src/vnet/ip/icmp4.c b/src/vnet/ip/icmp4.c
index 5f9ffa3b2b7..fa4a0e12276 100644
--- a/src/vnet/ip/icmp4.c
+++ b/src/vnet/ip/icmp4.c
@@ -41,12 +41,10 @@
#include <vnet/ip/ip.h>
#include <vnet/pg/pg.h>
#include <vnet/ip/ip_sas.h>
+#include <vnet/util/throttle.h>
-static char *icmp_error_strings[] = {
-#define _(f,s) s,
- foreach_icmp4_error
-#undef _
-};
+/** ICMP throttling */
+static throttle_t icmp_throttle;
static u8 *
format_ip4_icmp_type_and_code (u8 * s, va_list * args)
@@ -206,7 +204,6 @@ ip4_icmp_input (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_icmp_input_node) = {
.function = ip4_icmp_input,
.name = "ip4-icmp-input",
@@ -215,15 +212,14 @@ VLIB_REGISTER_NODE (ip4_icmp_input_node) = {
.format_trace = format_icmp_input_trace,
- .n_errors = ARRAY_LEN (icmp_error_strings),
- .error_strings = icmp_error_strings,
+ .n_errors = ICMP4_N_ERROR,
+ .error_counters = icmp4_error_counters,
.n_next_nodes = 1,
.next_nodes = {
[ICMP_INPUT_NEXT_ERROR] = "ip4-punt",
},
};
-/* *INDENT-ON* */
typedef enum
{
@@ -255,11 +251,14 @@ ip4_icmp_error (vlib_main_t * vm,
u32 *from, *to_next;
uword n_left_from, n_left_to_next;
ip4_icmp_error_next_t next_index;
+ u32 thread_index = vm->thread_index;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
+ u64 seed = throttle_seed (&icmp_throttle, thread_index, vlib_time_now (vm));
+
if (node->flags & VLIB_NODE_FLAG_TRACE)
vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
/* stride */ 1,
@@ -289,6 +288,21 @@ ip4_icmp_error (vlib_main_t * vm,
ip_csum_t sum;
org_p0 = vlib_get_buffer (vm, org_pi0);
+ ip0 = vlib_buffer_get_current (org_p0);
+
+ /* Rate limit based on the src,dst addresses in the original packet
+ */
+ u64 r0 =
+ (u64) ip0->dst_address.as_u32 << 32 | ip0->src_address.as_u32;
+
+ if (throttle_check (&icmp_throttle, thread_index, r0, seed))
+ {
+ vlib_error_count (vm, node->node_index, ICMP4_ERROR_DROP, 1);
+ from += 1;
+ n_left_from -= 1;
+ continue;
+ }
+
p0 = vlib_buffer_copy_no_chain (vm, org_p0, &pi0);
if (!p0 || pi0 == ~0) /* Out of buffers */
continue;
@@ -300,14 +314,16 @@ ip4_icmp_error (vlib_main_t * vm,
n_left_from -= 1;
n_left_to_next -= 1;
- ip0 = vlib_buffer_get_current (p0);
sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ vlib_buffer_copy_trace_flag (vm, org_p0, pi0);
+
/* Add IP header and ICMPv4 header including a 4 byte data field */
vlib_buffer_advance (p0,
-sizeof (ip4_header_t) -
sizeof (icmp46_header_t) - 4);
+ p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
p0->current_length =
p0->current_length > 576 ? 576 : p0->current_length;
out_ip0 = vlib_buffer_get_current (p0);
@@ -325,7 +341,7 @@ ip4_icmp_error (vlib_main_t * vm,
/* Prefer a source address from "offending interface" */
if (!ip4_sas_by_sw_if_index (sw_if_index0, &out_ip0->dst_address,
&out_ip0->src_address))
- { /* interface has no IP6 address - should not happen */
+ { /* interface has no IP4 address - should not happen */
next0 = IP4_ICMP_ERROR_NEXT_DROP;
error0 = ICMP4_ERROR_DROP;
}
@@ -370,14 +386,13 @@ ip4_icmp_error (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_icmp_error_node) = {
.function = ip4_icmp_error,
.name = "ip4-icmp-error",
.vector_size = sizeof (u32),
- .n_errors = ARRAY_LEN (icmp_error_strings),
- .error_strings = icmp_error_strings,
+ .n_errors = ICMP4_N_ERROR,
+ .error_counters = icmp4_error_counters,
.n_next_nodes = IP4_ICMP_ERROR_N_NEXT,
.next_nodes = {
@@ -387,7 +402,6 @@ VLIB_REGISTER_NODE (ip4_icmp_error_node) = {
.format_trace = format_icmp_input_trace,
};
-/* *INDENT-ON* */
static uword
@@ -570,6 +584,11 @@ icmp4_init (vlib_main_t * vm)
ICMP_INPUT_NEXT_ERROR,
sizeof (cm->ip4_input_next_index_by_type));
+ vlib_thread_main_t *tm = &vlib_thread_main;
+ u32 n_vlib_mains = tm->n_vlib_mains;
+
+ throttle_init (&icmp_throttle, n_vlib_mains, THROTTLE_BITS, 1e-5);
+
return 0;
}
diff --git a/src/vnet/ip/icmp4.h b/src/vnet/ip/icmp4.h
index e2a95673fc7..22a4fc508e5 100644
--- a/src/vnet/ip/icmp4.h
+++ b/src/vnet/ip/icmp4.h
@@ -15,29 +15,6 @@
#ifndef included_vnet_icmp4_h
#define included_vnet_icmp4_h
-#define foreach_icmp4_error \
- _ (NONE, "valid packets") \
- _ (UNKNOWN_TYPE, "unknown type") \
- _ (INVALID_CODE_FOR_TYPE, "invalid code for type") \
- _ (INVALID_HOP_LIMIT_FOR_TYPE, "hop_limit != 255") \
- _ (LENGTH_TOO_SMALL_FOR_TYPE, "payload length too small for type") \
- _ (OPTIONS_WITH_ODD_LENGTH, \
- "total option length not multiple of 8 bytes") \
- _ (OPTION_WITH_ZERO_LENGTH, "option has zero length") \
- _ (ECHO_REPLIES_SENT, "echo replies sent") \
- _ (DST_LOOKUP_MISS, "icmp6 dst address lookup misses") \
- _ (DEST_UNREACH_SENT, "destination unreachable response sent") \
- _ (TTL_EXPIRE_SENT, "hop limit exceeded response sent") \
- _ (PARAM_PROBLEM_SENT, "parameter problem response sent") \
- _ (DROP, "error message dropped")
-
-typedef enum
-{
-#define _(f,s) ICMP4_ERROR_##f,
- foreach_icmp4_error
-#undef _
-} icmp4_error_t;
-
typedef struct
{
u8 packet_data[64];
diff --git a/src/vnet/ip/icmp46_packet.h b/src/vnet/ip/icmp46_packet.h
index 0545046fe60..08e73f6cd7d 100644
--- a/src/vnet/ip/icmp46_packet.h
+++ b/src/vnet/ip/icmp46_packet.h
@@ -187,7 +187,6 @@ typedef enum
#undef _
} icmp6_code_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
u8 type;
@@ -195,7 +194,6 @@ typedef CLIB_PACKED (struct
/* IP checksum of icmp header plus data which follows. */
u16 checksum;
}) icmp46_header_t;
-/* *INDENT-ON* */
/* ip6 neighbor discovery */
#define foreach_icmp6_neighbor_discovery_option \
@@ -238,7 +236,6 @@ typedef enum icmp6_neighbor_discovery_option_type
#undef _
} icmp6_neighbor_discovery_option_type_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
/* Option type. */
@@ -357,6 +354,5 @@ typedef CLIB_PACKED (struct
icmp6_neighbor_discovery_ethernet_link_layer_address_option_t
link_layer_option;
}) icmp6_neighbor_solicitation_header_t;
-/* *INDENT-ON* */
#endif /* included_vnet_icmp46_packet_h */
diff --git a/src/vnet/ip/icmp6.c b/src/vnet/ip/icmp6.c
index b0fdadb2667..f93ebce4bf1 100644
--- a/src/vnet/ip/icmp6.c
+++ b/src/vnet/ip/icmp6.c
@@ -41,6 +41,10 @@
#include <vnet/ip/ip.h>
#include <vnet/pg/pg.h>
#include <vnet/ip/ip_sas.h>
+#include <vnet/util/throttle.h>
+
+/** ICMP throttling */
+static throttle_t icmp_throttle;
static u8 *
format_ip6_icmp_type_and_code (u8 * s, va_list * args)
@@ -123,12 +127,6 @@ format_icmp6_input_trace (u8 * s, va_list * va)
return s;
}
-static char *icmp_error_strings[] = {
-#define _(f,s) s,
- foreach_icmp6_error
-#undef _
-};
-
typedef enum
{
ICMP_INPUT_NEXT_PUNT,
@@ -237,7 +235,6 @@ ip6_icmp_input (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_icmp_input_node) = {
.function = ip6_icmp_input,
.name = "ip6-icmp-input",
@@ -246,199 +243,14 @@ VLIB_REGISTER_NODE (ip6_icmp_input_node) = {
.format_trace = format_icmp6_input_trace,
- .n_errors = ARRAY_LEN (icmp_error_strings),
- .error_strings = icmp_error_strings,
+ .n_errors = ICMP6_N_ERROR,
+ .error_counters = icmp6_error_counters,
.n_next_nodes = 1,
.next_nodes = {
[ICMP_INPUT_NEXT_PUNT] = "ip6-punt",
},
};
-/* *INDENT-ON* */
-
-typedef enum
-{
- ICMP6_ECHO_REQUEST_NEXT_LOOKUP,
- ICMP6_ECHO_REQUEST_NEXT_OUTPUT,
- ICMP6_ECHO_REQUEST_N_NEXT,
-} icmp6_echo_request_next_t;
-
-static uword
-ip6_icmp_echo_request (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame)
-{
- u32 *from, *to_next;
- u32 n_left_from, n_left_to_next, next_index;
- ip6_main_t *im = &ip6_main;
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
-
- if (node->flags & VLIB_NODE_FLAG_TRACE)
- vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
- /* stride */ 1,
- sizeof (icmp6_input_trace_t));
-
- while (n_left_from > 0)
- {
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 2 && n_left_to_next > 2)
- {
- vlib_buffer_t *p0, *p1;
- ip6_header_t *ip0, *ip1;
- icmp46_header_t *icmp0, *icmp1;
- ip6_address_t tmp0, tmp1;
- ip_csum_t sum0, sum1;
- u32 bi0, bi1;
- u32 fib_index0, fib_index1;
- u32 next0 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
- u32 next1 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
-
- bi0 = to_next[0] = from[0];
- bi1 = to_next[1] = from[1];
-
- from += 2;
- n_left_from -= 2;
- to_next += 2;
- n_left_to_next -= 2;
-
- p0 = vlib_get_buffer (vm, bi0);
- p1 = vlib_get_buffer (vm, bi1);
- ip0 = vlib_buffer_get_current (p0);
- ip1 = vlib_buffer_get_current (p1);
- icmp0 = ip6_next_header (ip0);
- icmp1 = ip6_next_header (ip1);
-
- /* Check icmp type to echo reply and update icmp checksum. */
- sum0 = icmp0->checksum;
- sum1 = icmp1->checksum;
-
- ASSERT (icmp0->type == ICMP6_echo_request);
- ASSERT (icmp1->type == ICMP6_echo_request);
- sum0 = ip_csum_update (sum0, ICMP6_echo_request, ICMP6_echo_reply,
- icmp46_header_t, type);
- sum1 = ip_csum_update (sum1, ICMP6_echo_request, ICMP6_echo_reply,
- icmp46_header_t, type);
-
- icmp0->checksum = ip_csum_fold (sum0);
- icmp1->checksum = ip_csum_fold (sum1);
-
- icmp0->type = ICMP6_echo_reply;
- icmp1->type = ICMP6_echo_reply;
-
- /* Swap source and destination address. */
- tmp0 = ip0->src_address;
- tmp1 = ip1->src_address;
-
- ip0->src_address = ip0->dst_address;
- ip1->src_address = ip1->dst_address;
-
- ip0->dst_address = tmp0;
- ip1->dst_address = tmp1;
-
- /* New hop count. */
- ip0->hop_limit = im->host_config.ttl;
- ip1->hop_limit = im->host_config.ttl;
-
- /* Determine the correct lookup fib indices... */
- fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
- vnet_buffer (p0)->sw_if_index[VLIB_RX]);
- vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0;
- /* Determine the correct lookup fib indices... */
- fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
- vnet_buffer (p1)->sw_if_index[VLIB_RX]);
- vnet_buffer (p1)->sw_if_index[VLIB_TX] = fib_index1;
-
- p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
- p1->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
-
- /* verify speculative enqueues, maybe switch current next frame */
- /* if next0==next1==next_index then nothing special needs to be done */
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- vlib_buffer_t *p0;
- ip6_header_t *ip0;
- icmp46_header_t *icmp0;
- u32 bi0;
- ip6_address_t tmp0;
- ip_csum_t sum0;
- u32 fib_index0;
- u32 next0 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
-
- bi0 = to_next[0] = from[0];
-
- from += 1;
- n_left_from -= 1;
- to_next += 1;
- n_left_to_next -= 1;
-
- p0 = vlib_get_buffer (vm, bi0);
- ip0 = vlib_buffer_get_current (p0);
- icmp0 = ip6_next_header (ip0);
-
- /* Check icmp type to echo reply and update icmp checksum. */
- sum0 = icmp0->checksum;
-
- ASSERT (icmp0->type == ICMP6_echo_request);
- sum0 = ip_csum_update (sum0, ICMP6_echo_request, ICMP6_echo_reply,
- icmp46_header_t, type);
-
- icmp0->checksum = ip_csum_fold (sum0);
-
- icmp0->type = ICMP6_echo_reply;
-
- /* Swap source and destination address. */
- tmp0 = ip0->src_address;
- ip0->src_address = ip0->dst_address;
- ip0->dst_address = tmp0;
-
- ip0->hop_limit = im->host_config.ttl;
-
- /* if the packet is link local, we'll bounce through the link-local
- * table with the RX interface correctly set */
- fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
- vnet_buffer (p0)->sw_if_index[VLIB_RX]);
- vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0;
-
- p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
- /* Verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- vlib_error_count (vm, ip6_icmp_input_node.index,
- ICMP6_ERROR_ECHO_REPLIES_SENT, frame->n_vectors);
-
- return frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip6_icmp_echo_request_node,static) = {
- .function = ip6_icmp_echo_request,
- .name = "ip6-icmp-echo-request",
-
- .vector_size = sizeof (u32),
-
- .format_trace = format_icmp6_input_trace,
-
- .n_next_nodes = ICMP6_ECHO_REQUEST_N_NEXT,
- .next_nodes = {
- [ICMP6_ECHO_REQUEST_NEXT_LOOKUP] = "ip6-lookup",
- [ICMP6_ECHO_REQUEST_NEXT_OUTPUT] = "interface-output",
- },
-};
-/* *INDENT-ON* */
typedef enum
{
@@ -480,11 +292,14 @@ ip6_icmp_error (vlib_main_t * vm,
u32 *from, *to_next;
uword n_left_from, n_left_to_next;
ip6_icmp_error_next_t next_index;
+ u32 thread_index = vm->thread_index;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
+ u64 seed = throttle_seed (&icmp_throttle, thread_index, vlib_time_now (vm));
+
if (node->flags & VLIB_NODE_FLAG_TRACE)
vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
/* stride */ 1,
@@ -514,6 +329,21 @@ ip6_icmp_error (vlib_main_t * vm,
int bogus_length;
org_p0 = vlib_get_buffer (vm, org_pi0);
+ ip0 = vlib_buffer_get_current (org_p0);
+
+ /* Rate limit based on the src,dst addresses in the original packet
+ */
+ u64 r0 = (ip6_address_hash_to_u64 (&ip0->dst_address) ^
+ ip6_address_hash_to_u64 (&ip0->src_address));
+
+ if (throttle_check (&icmp_throttle, thread_index, r0, seed))
+ {
+ vlib_error_count (vm, node->node_index, ICMP6_ERROR_DROP, 1);
+ from += 1;
+ n_left_from -= 1;
+ continue;
+ }
+
p0 = vlib_buffer_copy_no_chain (vm, org_p0, &pi0);
if (!p0 || pi0 == ~0) /* Out of buffers */
continue;
@@ -525,15 +355,15 @@ ip6_icmp_error (vlib_main_t * vm,
n_left_from -= 1;
n_left_to_next -= 1;
- ip0 = vlib_buffer_get_current (p0);
sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ vlib_buffer_copy_trace_flag (vm, org_p0, pi0);
+
/* Add IP header and ICMPv6 header including a 4 byte data field */
vlib_buffer_advance (p0,
-(sizeof (ip6_header_t) +
sizeof (icmp46_header_t) + 4));
- vnet_buffer (p0)->sw_if_index[VLIB_TX] = ~0;
p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
p0->current_length =
p0->current_length > 1280 ? 1280 : p0->current_length;
@@ -594,14 +424,13 @@ ip6_icmp_error (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_icmp_error_node) = {
.function = ip6_icmp_error,
.name = "ip6-icmp-error",
.vector_size = sizeof (u32),
- .n_errors = ARRAY_LEN (icmp_error_strings),
- .error_strings = icmp_error_strings,
+ .n_errors = ICMP6_N_ERROR,
+ .error_counters = icmp6_error_counters,
.n_next_nodes = IP6_ICMP_ERROR_N_NEXT,
.next_nodes = {
@@ -611,7 +440,6 @@ VLIB_REGISTER_NODE (ip6_icmp_error_node) = {
.format_trace = format_icmp6_input_trace,
};
-/* *INDENT-ON* */
static uword
@@ -808,8 +636,10 @@ icmp6_init (vlib_main_t * vm)
cm->min_valid_length_by_type[ICMP6_redirect] =
sizeof (icmp6_redirect_header_t);
- icmp6_register_type (vm, ICMP6_echo_request,
- ip6_icmp_echo_request_node.index);
+ vlib_thread_main_t *tm = &vlib_thread_main;
+ u32 n_vlib_mains = tm->n_vlib_mains;
+
+ throttle_init (&icmp_throttle, n_vlib_mains, THROTTLE_BITS, 1e-3);
return (NULL);
}
diff --git a/src/vnet/ip/icmp6.h b/src/vnet/ip/icmp6.h
index 7a5eef5df18..119aaf0bae9 100644
--- a/src/vnet/ip/icmp6.h
+++ b/src/vnet/ip/icmp6.h
@@ -17,48 +17,6 @@
#include <vnet/ip/icmp46_packet.h>
-#define foreach_icmp6_error \
- _ (NONE, "valid packets") \
- _ (UNKNOWN_TYPE, "unknown type") \
- _ (INVALID_CODE_FOR_TYPE, "invalid code for type") \
- _ (INVALID_HOP_LIMIT_FOR_TYPE, "hop_limit != 255") \
- _ (LENGTH_TOO_SMALL_FOR_TYPE, "payload length too small for type") \
- _ (OPTIONS_WITH_ODD_LENGTH, \
- "total option length not multiple of 8 bytes") \
- _ (OPTION_WITH_ZERO_LENGTH, "option has zero length") \
- _ (ECHO_REPLIES_SENT, "echo replies sent") \
- _ (NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK, \
- "neighbor solicitations from source not on link") \
- _ (NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN, \
- "neighbor solicitations for unknown targets") \
- _ (NEIGHBOR_ADVERTISEMENTS_TX, "neighbor advertisements sent") \
- _ (NEIGHBOR_ADVERTISEMENTS_RX, "neighbor advertisements received") \
- _ (ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK, \
- "router solicitations from source not on link") \
- _ (ROUTER_SOLICITATION_UNSUPPORTED_INTF, \
- "neighbor discovery unsupported interface") \
- _ (ROUTER_SOLICITATION_RADV_NOT_CONFIG, \
- "neighbor discovery not configured") \
- _ (ROUTER_ADVERTISEMENT_SOURCE_NOT_LINK_LOCAL, \
- "router advertisement source not link local") \
- _ (ROUTER_ADVERTISEMENTS_TX, "router advertisements sent") \
- _ (ROUTER_ADVERTISEMENTS_RX, "router advertisements received") \
- _ (DST_LOOKUP_MISS, "icmp6 dst address lookup misses") \
- _ (DEST_UNREACH_SENT, "destination unreachable response sent") \
- _ (PACKET_TOO_BIG_SENT, "packet too big response sent") \
- _ (TTL_EXPIRE_SENT, "hop limit exceeded response sent") \
- _ (PARAM_PROBLEM_SENT, "parameter problem response sent") \
- _ (DROP, "error message dropped") \
- _ (ALLOC_FAILURE, "buffer allocation failure")
-
-
-typedef enum
-{
-#define _(f,s) ICMP6_ERROR_##f,
- foreach_icmp6_error
-#undef _
-} icmp6_error_t;
-
typedef struct
{
u8 packet_data[64];
diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api
index ca1e2008e4f..967f56cf917 100644
--- a/src/vnet/ip/ip.api
+++ b/src/vnet/ip/ip.api
@@ -366,6 +366,41 @@ autoreply define set_ip_flow_hash_v2
vl_api_ip_flow_hash_config_t flow_hash_config;
};
+/**
+ @brief flow hash settings for an IP table
+ @param src - include src in flow hash
+ @param dst - include dst in flow hash
+ @param sport - include sport in flow hash
+ @param dport - include dport in flow hash
+ @param proto - include proto in flow hash
+ @param reverse - include reverse in flow hash
+ @param symmetric - include symmetry in flow hash
+ @param flowlabel - include flowlabel in flow hash
+ @param gtpv1teid - include gtpv1teid in flow hash
+*/
+enumflag ip_flow_hash_config_v2
+{
+ IP_API_V2_FLOW_HASH_SRC_IP = 0x01,
+ IP_API_V2_FLOW_HASH_DST_IP = 0x02,
+ IP_API_V2_FLOW_HASH_SRC_PORT = 0x04,
+ IP_API_V2_FLOW_HASH_DST_PORT = 0x08,
+ IP_API_V2_FLOW_HASH_PROTO = 0x10,
+ IP_API_V2_FLOW_HASH_REVERSE = 0x20,
+ IP_API_V2_FLOW_HASH_SYMETRIC = 0x40,
+ IP_API_V2_FLOW_HASH_FLOW_LABEL = 0x80,
+ IP_API_V2_FLOW_HASH_GTPV1_TEID = 0x100,
+};
+
+autoreply define set_ip_flow_hash_v3
+{
+ u32 client_index;
+ u32 context;
+ u32 table_id;
+ vl_api_address_family_t af;
+ vl_api_ip_flow_hash_config_v2_t flow_hash_config;
+ option status="in_progress";
+};
+
/** \brief Set the ip flow hash router ID
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -587,6 +622,7 @@ typedef punt_redirect
autoreply define ip_punt_redirect
{
option deprecated;
+
u32 client_index;
u32 context;
vl_api_punt_redirect_t punt;
@@ -595,6 +631,8 @@ autoreply define ip_punt_redirect
define ip_punt_redirect_dump
{
+ option deprecated;
+
u32 client_index;
u32 context;
vl_api_interface_index_t sw_if_index;
@@ -603,6 +641,8 @@ define ip_punt_redirect_dump
define ip_punt_redirect_details
{
+ option deprecated;
+
u32 context;
vl_api_punt_redirect_t punt;
};
@@ -836,6 +876,30 @@ autoreply define ip_reassembly_enable_disable
vl_api_ip_reass_type_t type;
};
+/** enable/disable full reassembly of packets aimed at our addresses */
+autoreply define ip_local_reass_enable_disable
+{
+ u32 client_index;
+ u32 context;
+ bool enable_ip4;
+ bool enable_ip6;
+};
+
+/** get status of local reassembly */
+define ip_local_reass_get
+{
+ u32 client_index;
+ u32 context;
+};
+
+define ip_local_reass_get_reply
+{
+ u32 context;
+ i32 retval;
+ bool ip4_is_enabled;
+ bool ip6_is_enabled;
+};
+
/**
@brief Set a Path MTU value. i.e. a MTU value for a given neighbour.
The neighbour can be described as attached (w/ interface and next-hop)
@@ -893,6 +957,816 @@ autoreply define ip_path_mtu_replace_end
u32 context;
};
+counters ip_frag {
+ none {
+ severity info;
+ type counter64;
+ units "packets";
+ description "packet fragmented";
+ };
+ small_packet {
+ severity error;
+ type counter64;
+ units "packets";
+ description "packet smaller than MTU";
+ };
+ fragment_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "number of sent fragments";
+ };
+ cant_fragment_header {
+ severity error;
+ type counter64;
+ units "packets";
+ description "can't fragment header";
+ };
+ dont_fragment_set {
+ severity error;
+ type counter64;
+ units "packets";
+ description "can't fragment this packet";
+ };
+ malformed {
+ severity error;
+ type counter64;
+ units "packets";
+ description "malformed packet";
+ };
+ memory {
+ severity error;
+ type counter64;
+ units "packets";
+ description "could not allocate buffer";
+ };
+ unknown {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unknown error";
+ };
+};
+
+counters ip4 {
+ /* Must be first. */
+ none {
+ severity info;
+ type counter64;
+ units "packets";
+ description "valid ip4 packets";
+ };
+
+ /* Errors signalled by ip4-input */
+ too_short {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 length < 20 bytes";
+ };
+ bad_length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 length > l2 length";
+ };
+ bad_checksum {
+ severity error;
+ type counter64;
+ units "packets";
+ description "bad ip4 checksum";
+ };
+ version {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 version != 4";
+ };
+ options {
+ severity info;
+ type counter64;
+ units "packets";
+ description "ip4 options present";
+ };
+ fragment_offset_one {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 fragment offset == 1";
+ };
+ time_expired {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 ttl <= 1";
+ };
+ hdr_too_short {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 IHL < 5";
+ };
+
+ /* Errors signalled by ip4-rewrite. */
+ mtu_exceeded {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 MTU exceeded and DF set";
+ };
+ dst_lookup_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 destination lookup miss";
+ };
+ src_lookup_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 source lookup miss";
+ };
+ drop {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 drop";
+ };
+ punt {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 punt";
+ };
+ same_interface {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 egress interface same as ingress";
+ };
+
+ /* errors signalled by ip4-local. */
+ unknown_protocol {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unknown ip protocol";
+ };
+ tcp_checksum {
+ severity error;
+ type counter64;
+ units "packets";
+ description "bad tcp checksum";
+ };
+ udp_checksum {
+ severity error;
+ type counter64;
+ units "packets";
+ description "bad udp checksum";
+ };
+ udp_length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "inconsistent udp/ip lengths";
+ };
+
+ /* spoofed packets in ip4-rewrite-local */
+ spoofed_local_packets {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 spoofed local-address packet drops";
+ };
+
+ /* Errors signalled by ip4-inacl */
+ inacl_table_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "input ACL table-miss drops";
+ };
+ inacl_session_deny {
+ severity error;
+ type counter64;
+ units "packets";
+ description "input ACL session deny drops";
+ };
+
+ /* Errors singalled by ip4-outacl */
+ outacl_table_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "output ACL table-miss drops";
+ };
+ outacl_session_deny {
+ severity error;
+ type counter64;
+ units "packets";
+ description "output ACL session deny drops";
+ };
+
+ /* Errors from mfib-forward */
+ rpf_failure {
+ severity error;
+ type counter64;
+ units "packets";
+ description "Multicast RPF check failed";
+ };
+
+ /* Errors signalled by ip4-reassembly */
+ reass_duplicate_fragment {
+ severity error;
+ type counter64;
+ units "packets";
+ description "duplicate/overlapping fragments";
+ };
+ reass_limit_reached {
+ severity error;
+ type counter64;
+ units "packets";
+ description "drops due to concurrent reassemblies limit";
+ };
+ reass_fragment_chain_too_long {
+ severity error;
+ type counter64;
+ units "packets";
+ description "fragment chain too long (drop)";
+ };
+ reass_no_buf {
+ severity error;
+ type counter64;
+ units "packets";
+ description "out of buffers (drop)";
+ };
+ reass_malformed_packet {
+ severity error;
+ type counter64;
+ units "packets";
+ description "malformed packets";
+ };
+ reass_internal_error {
+ severity error;
+ type counter64;
+ units "packets";
+ description "drops due to internal reassembly error";
+ };
+ reass_timeout {
+ severity error;
+ type counter64;
+ units "packets";
+ description "fragments dropped due to reassembly timeout";
+ };
+ reass_to_custom_app {
+ severity error;
+ type counter64;
+ units "packets";
+ description "send to custom drop app";
+ };
+ reass_success {
+ severity info;
+ type counter64;
+ units "packets";
+ description "successful reassemblies";
+ };
+ reass_fragments_reassembled {
+ severity info;
+ type counter64;
+ units "packets";
+ description "fragments reassembled";
+ };
+ reass_fragments_rcvd {
+ severity info;
+ type counter64;
+ units "packets";
+ description "fragments received";
+ };
+ reass_unsupp_ip_prot {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unsupported ip protocol";
+ };
+};
+
+/**
+ * IPv6 Error/info counters
+ */
+counters ip6 {
+ /* Must be first. */
+ none {
+ severity info;
+ type counter64;
+ units "packets";
+ description "valid ip6 packets";
+ };
+
+ /* Errors signalled by ip6-input */
+ too_short {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 length < 40 bytes";
+ };
+ bad_length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 length > l2 length";
+ };
+ version {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 version != 6";
+ };
+ time_expired {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 ttl <= 1";
+ };
+
+ /* Errors signalled by ip6-rewrite. */
+ mtu_exceeded {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 MTU exceeded";
+ };
+ dst_lookup_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 destination lookup miss";
+ };
+ src_lookup_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 source lookup miss";
+ };
+ drop {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 drop";
+ };
+ punt {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 punt";
+ };
+
+ /* errors signalled by ip6-local. */
+ unknown_protocol {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unknown ip protocol";
+ };
+ udp_checksum {
+ severity error;
+ type counter64;
+ units "packets";
+ description "bad udp checksum";
+ };
+ icmp_checksum {
+ severity error;
+ type counter64;
+ units "packets";
+ description "bad icmp checksum";
+ };
+ udp_length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "inconsistent udp/ip lengths";
+ };
+ /* Errors signalled by udp6-lookup. */
+ unknown_udp_port {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no listener for udp port";
+ };
+
+ /* spoofed packets in ip6-rewrite-local */
+ spoofed_local_packets {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 spoofed local-address packet drops";
+ };
+
+ /* Errors signalled by ip6-inacl */
+ inacl_table_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "input ACL table-miss drops";
+ };
+ inacl_session_deny {
+ severity error;
+ type counter64;
+ units "packets";
+ description "input ACL session deny drops";
+ };
+
+ /* Errors singalled by ip6-outacl */
+ outacl_table_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "output ACL table-miss drops";
+ };
+ outacl_session_deny {
+ severity error;
+ type counter64;
+ units "packets";
+ description "output ACL session deny drops";
+ };
+
+ /* Errors from mfib-forward */
+ rpf_failure {
+ severity error;
+ type counter64;
+ units "packets";
+ description "Multicast RPF check failed";
+ };
+
+ /* Errors signalled by ip6-reassembly */
+ reass_missing_upper {
+ severity error;
+ type counter64;
+ units "packets";
+ description "missing-upper layer drops";
+ };
+ reass_duplicate_fragment {
+ severity error;
+ type counter64;
+ units "packets";
+ description "duplicate fragments";
+ };
+ reass_overlapping_fragment {
+ severity error;
+ type counter64;
+ units "packets";
+ description "overlapping fragments";
+ };
+ reass_limit_reached {
+ severity error;
+ type counter64;
+ units "packets";
+ description "drops due to concurrent reassemblies limit";
+ };
+ reass_fragment_chain_too_long {
+ severity error;
+ type counter64;
+ units "packets";
+ description "fragment chain too long (drop)";
+ };
+ reass_no_buf {
+ severity error;
+ type counter64;
+ units "packets";
+ description "out of buffers (drop)";
+ };
+ reass_timeout {
+ severity error;
+ type counter64;
+ units "packets";
+ description "fragments dropped due to reassembly timeout";
+ };
+ reass_internal_error {
+ severity error;
+ type counter64;
+ units "packets";
+ description "drops due to internal reassembly error";
+ };
+ reass_invalid_frag_len {
+ severity error;
+ type counter64;
+ units "packets";
+ description "invalid fragment length";
+ };
+ reass_to_custom_app {
+ severity error;
+ type counter64;
+ units "packets";
+ description "send to custom drop app";
+ };
+ reass_no_frag_hdr {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no fragmentation header";
+ };
+ reass_invalid_frag_size {
+ severity error;
+ type counter64;
+ units "packets";
+ description "drop due to invalid fragment size";
+ };
+ reass_success {
+ severity info;
+ type counter64;
+ units "packets";
+ description "successful reassemblies";
+ };
+ reass_fragments_reassembled {
+ severity info;
+ type counter64;
+ units "packets";
+ description "fragments reassembled";
+ };
+ reass_fragments_rcvd {
+ severity info;
+ type counter64;
+ units "packets";
+ description "fragments received";
+ };
+ reass_unsupp_ip_proto {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unsupported ip protocol";
+ };
+};
+
+counters icmp4 {
+ none {
+ severity info;
+ type counter64;
+ units "packets";
+ description "valid packets";
+ };
+ unknown_type {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unknown type";
+ };
+ invalid_code_for_type {
+ severity error;
+ type counter64;
+ units "packets";
+ description "invalid code for type";
+ };
+ invalid_hop_limit_for_type {
+ severity error;
+ type counter64;
+ units "packets";
+ description "hop_limit != 255";
+ };
+ length_too_small_for_type {
+ severity error;
+ type counter64;
+ units "packets";
+ description "payload length too small for type";
+ };
+ options_with_odd_length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "total option length not multiple of 8 bytes";
+ };
+ option_with_zero_length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "option has zero length";
+ };
+ echo_replies_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "echo replies sent";
+ };
+ dst_lookup_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "icmp6 dst address lookup misses";
+ };
+ dest_unreach_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "destination unreachable response sent";
+ };
+ ttl_expire_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "hop limit exceeded response sent";
+ };
+ param_problem_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "parameter problem response sent";
+ };
+ drop {
+ severity error;
+ type counter64;
+ units "packets";
+ description "error message dropped";
+ };
+};
+
+counters icmp6 {
+ none {
+ severity info;
+ type counter64;
+ units "packets";
+ description "valid packets";
+ };
+ unknown_type {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unknown type";
+ };
+ invalid_code_for_type {
+ severity error;
+ type counter64;
+ units "packets";
+ description "invalid code for type";
+ };
+ invalid_hop_limit_for_type {
+ severity error;
+ type counter64;
+ units "packets";
+ description "hop_limit != 255";
+ };
+ length_too_small_for_type {
+ severity error;
+ type counter64;
+ units "packets";
+ description "payload length too small for type";
+ };
+ options_with_odd_length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "total option length not multiple of 8 bytes";
+ };
+ option_with_zero_length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "option has zero length";
+ };
+ echo_replies_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "echo replies sent";
+ };
+ neighbor_solicitation_source_not_on_link {
+ severity error;
+ type counter64;
+ units "packets";
+ description "neighbor solicitations from source not on link";
+ };
+ neighbor_solicitation_source_unknown {
+ severity error;
+ type counter64;
+ units "packets";
+ description "neighbor solicitations for unknown targets";
+ };
+ neighbor_advertisements_tx {
+ severity info;
+ type counter64;
+ units "packets";
+ description "neighbor advertisements sent";
+ };
+ neighbor_advertisements_rx {
+ severity info;
+ type counter64;
+ units "packets";
+ description "neighbor advertisements received";
+ };
+ router_solicitation_source_not_on_link {
+ severity error;
+ type counter64;
+ units "packets";
+ description "router solicitations from source not on link";
+ };
+ router_solicitation_unsupported_intf {
+ severity error;
+ type counter64;
+ units "packets";
+ description "neighbor discovery unsupported interface";
+ };
+ router_solicitation_radv_not_config {
+ severity error;
+ type counter64;
+ units "packets";
+ description "neighbor discovery not configured";
+ };
+ router_advertisement_source_not_link_local {
+ severity error;
+ type counter64;
+ units "packets";
+ description "router advertisement source not link local";
+ };
+ router_advertisements_tx {
+ severity info;
+ type counter64;
+ units "packets";
+ description "router advertisements sent";
+ };
+ router_advertisements_rx {
+ severity info;
+ type counter64;
+ units "packets";
+ description "router advertisements received";
+ };
+ dst_lookup_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "icmp6 dst address lookup misses";
+ };
+ dest_unreach_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "destination unreachable response sent";
+ };
+ packet_too_big_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "packet too big response sent";
+ };
+ ttl_expire_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "hop limit exceeded response sent";
+ };
+ param_problem_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "parameter problem response sent";
+ };
+ drop {
+ severity error;
+ type counter64;
+ units "packets";
+ description "error message dropped";
+ };
+ alloc_failure {
+ severity error;
+ type counter64;
+ units "packets";
+ description "buffer allocation failure";
+ };
+};
+
+paths {
+ "/err/ip-frag" "ip_frag";
+ "/err/mpls-frag" "ip_frag";
+ "/err/ip4-mpls-label-disposition-pipe" "ip4";
+ "/err/ip4-mpls-label-disposition-uniform" "ip4";
+ "/err/ip4-local" "ip4";
+ "/err/ip4-input" "ip4";
+ "/err/ip4-full-reassembly" "ip4";
+ "/err/ip4-local-full-reassembly" "ip4";
+ "/err/ip4-full-reassembly-feature" "ip4";
+ "/err/ip4-full-reassembly-custom" "ip4";
+ "/err/ip4-full-reassembly-expire-walk" "ip4";
+ "/err/ip4-sv-reassembly" "ip4";
+ "/err/ip4-sv-reassembly-feature" "ip4";
+ "/err/ip4-sv-reassembly-output-feature" "ip4";
+ "/err/ip4-sv-reassembly-custom-next" "ip4";
+ "/err/ip4-sv-reassembly-expire-walk" "ip4";
+ "/err/ip6-mpls-label-disposition-pipe" "ip6";
+ "/err/ip6-mpls-label-disposition-uniform" "ip6";
+ "/err/ip6-local" "ip6";
+ "/err/ip6-input" "ip6";
+ "/err/ip6-full-reassembly" "ip6";
+ "/err/ip6-local-full-reassembly" "ip6";
+ "/err/ip6-full-reassembly-feature" "ip6";
+ "/err/ip6-full-reassembly-custom" "ip6";
+ "/err/ip6-full-reassembly-expire-walk" "ip6";
+ "/err/ip6-sv-reassembly" "ip6";
+ "/err/ip6-sv-reassembly-feature" "ip6";
+ "/err/ip6-sv-reassembly-output-feature" "ip6";
+ "/err/ip6-sv-reassembly-custom-next" "ip6";
+ "/err/ip6-sv-reassembly-expire-walk" "ip6";
+ "/err/ip4-icmp-input" "icmp4";
+ "/err/ip4-icmp-error" "icmp4";
+ "/err/ip6-icmp-input" "icmp6";
+ "/err/ip6-icmp-error" "icmp6";
+};
+
/*
* Local Variables:
* eval: (c-set-style "gnu")
diff --git a/src/vnet/ip/ip.c b/src/vnet/ip/ip.c
index 5d0c7707dd3..586f7dfbc85 100644
--- a/src/vnet/ip/ip.c
+++ b/src/vnet/ip/ip.c
@@ -18,6 +18,20 @@
u32 ip_flow_hash_router_id;
+ethernet_type_t
+ip_address_family_to_ether_type (ip_address_family_t af)
+{
+ switch (af)
+ {
+ case AF_IP4:
+ return (ETHERNET_TYPE_IP4);
+ case AF_IP6:
+ return (ETHERNET_TYPE_IP6);
+ }
+ ASSERT (0);
+ return (ETHERNET_TYPE_IP4);
+}
+
u8
ip_is_zero (ip46_address_t * ip46_address, u8 is_ip4)
{
@@ -104,7 +118,6 @@ ip_set (ip46_address_t * dst, void *src, u8 is_ip4)
sizeof (ip6_address_t));
}
-/* *INDENT-OFF* */
static const char *ip_arc_names[N_IP_FEATURE_LOCATIONS][N_AF][N_SAFI] = {
[IP_FEATURE_INPUT] = {
[AF_IP4] = {
@@ -157,7 +170,6 @@ static const char *ip_arc_names[N_IP_FEATURE_LOCATIONS][N_AF][N_SAFI] = {
},
},
};
-/* *INDENT-ON* */
void
ip_feature_enable_disable (ip_address_family_t af,
@@ -189,7 +201,8 @@ ip_feature_enable_disable (ip_address_family_t af,
}
int
-ip_flow_hash_set (ip_address_family_t af, u32 table_id, u32 flow_hash_config)
+ip_flow_hash_set (ip_address_family_t af, u32 table_id,
+ flow_hash_config_t flow_hash_config)
{
fib_protocol_t fproto;
u32 fib_index;
diff --git a/src/vnet/ip/ip.h b/src/vnet/ip/ip.h
index 87689076697..9ebefa0cf5d 100644
--- a/src/vnet/ip/ip.h
+++ b/src/vnet/ip/ip.h
@@ -51,19 +51,18 @@
#include <vnet/ip/ip_packet.h>
#include <vnet/ip/lookup.h>
#include <vnet/ip/ip_interface.h>
+#include <vnet/ip/ip.api_enum.h>
#include <vnet/tcp/tcp_packet.h>
#include <vnet/udp/udp_packet.h>
#include <vnet/ip/icmp46_packet.h>
#include <vnet/ip/ip4.h>
-#include <vnet/ip/ip4_error.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/icmp4.h>
#include <vnet/ip/ip6.h>
#include <vnet/ip/ip6_packet.h>
-#include <vnet/ip/ip6_error.h>
#include <vnet/ip/icmp6.h>
/* Per protocol info. */
@@ -267,6 +266,8 @@ void ip_table_create (fib_protocol_t fproto, u32 table_id, u8 is_api,
void ip_table_delete (fib_protocol_t fproto, u32 table_id, u8 is_api);
+void fib_table_bind (fib_protocol_t fproto, u32 sw_if_index, u32 fib_index);
+void mfib_table_bind (fib_protocol_t fproto, u32 sw_if_index, u32 mfib_index);
int ip_table_bind (fib_protocol_t fproto, u32 sw_if_index, u32 table_id);
u32 ip_table_get_unused_id (fib_protocol_t fproto);
@@ -287,6 +288,8 @@ void ip_feature_enable_disable (ip_address_family_t af,
void *feature_config,
u32 n_feature_config_bytes);
+ethernet_type_t ip_address_family_to_ether_type (ip_address_family_t af);
+
always_inline u32 vlib_buffer_get_ip4_fib_index (vlib_buffer_t * b);
always_inline u32 vlib_buffer_get_ip6_fib_index (vlib_buffer_t * b);
always_inline u32
diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h
index dde7b7b9de9..45d07c2e0f6 100644
--- a/src/vnet/ip/ip4.h
+++ b/src/vnet/ip/ip4.h
@@ -169,7 +169,6 @@ typedef struct ip4_main_t
/** Global ip4 main structure. */
extern ip4_main_t ip4_main;
-extern char *ip4_error_strings[];
/** Global ip4 input node. Errors get attached to ip4 input node. */
extern vlib_node_registration_t ip4_input_node;
@@ -212,7 +211,6 @@ ip4_interface_address_matching_destination (ip4_main_t * im,
ip_interface_address_t *ia;
ip4_address_t *result = 0;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm, ia, sw_if_index,
1 /* honor unnumbered */,
({
@@ -223,7 +221,6 @@ ip4_interface_address_matching_destination (ip4_main_t * im,
break;
}
}));
- /* *INDENT-ON* */
if (result_ia)
*result_ia = result ? ia : 0;
return result;
diff --git a/src/vnet/ip/ip46_address.h b/src/vnet/ip/ip46_address.h
index f726178ee63..90f766464f6 100644
--- a/src/vnet/ip/ip46_address.h
+++ b/src/vnet/ip/ip46_address.h
@@ -34,7 +34,6 @@ typedef enum
extern u8 *format_ip46_type (u8 * s, va_list * args);
-/* *INDENT-OFF* */
typedef CLIB_PACKED (union ip46_address_t_ {
struct {
u32 pad[3];
@@ -44,7 +43,6 @@ typedef CLIB_PACKED (union ip46_address_t_ {
u8 as_u8[16];
u64 as_u64[2];
}) ip46_address_t;
-/* *INDENT-ON* */
format_function_t format_ip46_address;
diff --git a/src/vnet/ip/ip46_cli.c b/src/vnet/ip/ip46_cli.c
index f58be898d9b..e3da27914bd 100644
--- a/src/vnet/ip/ip46_cli.c
+++ b/src/vnet/ip/ip46_cli.c
@@ -71,12 +71,10 @@ ip6_address_compare (ip6_address_t * a1, ip6_address_t * a2)
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_command, static) = {
.path = "set interface ip",
.short_help = "IP4/IP6 commands",
};
-/* *INDENT-ON* */
void
ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index)
@@ -90,7 +88,6 @@ ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index)
ip_interface_address_t *ia;
int i;
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im4->lookup_main, ia, sw_if_index,
0 /* honor unnumbered */,
({
@@ -99,9 +96,7 @@ ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index)
vec_add1 (ip4_addrs, x[0]);
vec_add1 (ip4_masks, ia->address_length);
}));
- /* *INDENT-ON* */
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im6->lookup_main, ia, sw_if_index,
0 /* honor unnumbered */,
({
@@ -110,7 +105,6 @@ ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index)
vec_add1 (ip6_addrs, x[0]);
vec_add1 (ip6_masks, ia->address_length);
}));
- /* *INDENT-ON* */
for (i = 0; i < vec_len (ip4_addrs); i++)
ip4_add_del_interface_address (vm, sw_if_index, &ip4_addrs[i],
@@ -212,13 +206,11 @@ done:
* @cliexcmd{set interface ip address del GigabitEthernet2/0/0 all}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_address_command, static) = {
.path = "set interface ip address",
.function = add_del_ip_address,
.short_help = "set interface ip address [del] <interface> <ip-addr>/<mask> | [all]",
};
-/* *INDENT-ON* */
static clib_error_t *
set_reassembly_command_fn (vlib_main_t * vm,
@@ -294,13 +286,11 @@ set_reassembly_command_fn (vlib_main_t * vm,
return NULL;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_reassembly_command, static) = {
.path = "set interface reassembly",
.short_help = "set interface reassembly <interface-name> [on|off|ip4|ip6]",
.function = set_reassembly_command_fn,
};
-/* *INDENT-ON* */
/* Dummy init function to get us linked in. */
static clib_error_t *
diff --git a/src/vnet/ip/ip4_error.h b/src/vnet/ip/ip4_error.h
deleted file mode 100644
index dce3dd4c1ab..00000000000
--- a/src/vnet/ip/ip4_error.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * ip/ip4_error.h: ip4 fast path errors
- *
- * Copyright (c) 2008 Eliot Dresselhaus
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef included_ip_ip4_error_h
-#define included_ip_ip4_error_h
-
-#define foreach_ip4_error \
- /* Must be first. */ \
- _ (NONE, "valid ip4 packets") \
- \
- /* Errors signalled by ip4-input */ \
- _ (TOO_SHORT, "ip4 length < 20 bytes") \
- _ (BAD_LENGTH, "ip4 length > l2 length") \
- _ (BAD_CHECKSUM, "bad ip4 checksum") \
- _ (VERSION, "ip4 version != 4") \
- _ (OPTIONS, "ip4 options present") \
- _ (FRAGMENT_OFFSET_ONE, "ip4 fragment offset == 1") \
- _ (TIME_EXPIRED, "ip4 ttl <= 1") \
- \
- /* Errors signalled by ip4-rewrite. */ \
- _ (MTU_EXCEEDED, "ip4 MTU exceeded and DF set") \
- _ (DST_LOOKUP_MISS, "ip4 destination lookup miss") \
- _ (SRC_LOOKUP_MISS, "ip4 source lookup miss") \
- _ (DROP, "ip4 drop") \
- _ (PUNT, "ip4 punt") \
- _ (SAME_INTERFACE, "ip4 egress interface same as ingress") \
- \
- /* Errors signalled by ip4-local. */ \
- _ (UNKNOWN_PROTOCOL, "unknown ip protocol") \
- _ (TCP_CHECKSUM, "bad tcp checksum") \
- _ (UDP_CHECKSUM, "bad udp checksum") \
- _ (UDP_LENGTH, "inconsistent udp/ip lengths") \
- \
- /* Spoofed packets in ip4-rewrite-local */ \
- _ (SPOOFED_LOCAL_PACKETS, "ip4 spoofed local-address packet drops") \
- \
- /* Errors signalled by ip4-inacl */ \
- _ (INACL_TABLE_MISS, "input ACL table-miss drops") \
- _ (INACL_SESSION_DENY, "input ACL session deny drops") \
- /* Errors singalled by ip4-outacl */ \
- _ (OUTACL_TABLE_MISS, "output ACL table-miss drops") \
- _ (OUTACL_SESSION_DENY, "output ACL session deny drops") \
- \
- /* Errors from mfib-forward */ \
- _ (RPF_FAILURE, "Multicast RPF check failed") \
- \
- /* Errors signalled by ip4-reassembly */ \
- _ (REASS_DUPLICATE_FRAGMENT, "duplicate/overlapping fragments") \
- _ (REASS_LIMIT_REACHED, "drops due to concurrent reassemblies limit") \
- _ (REASS_FRAGMENT_CHAIN_TOO_LONG, "fragment chain too long (drop)") \
- _ (REASS_NO_BUF, "out of buffers (drop)") \
- _ (REASS_MALFORMED_PACKET, "malformed packets") \
- _ (REASS_INTERNAL_ERROR, "drops due to internal reassembly error") \
- _ (REASS_UNSUPP_IP_PROT, "unsupported ip protocol")
-
-typedef enum
-{
-#define _(sym,str) IP4_ERROR_##sym,
- foreach_ip4_error
-#undef _
- IP4_N_ERROR,
-} ip4_error_t;
-
-#endif /* included_ip_ip4_error_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index 900a1b4ce8e..ff74b52eb18 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -61,6 +61,7 @@
#include <vnet/ip/ip4_forward.h>
#include <vnet/interface_output.h>
#include <vnet/classify/vnet_classify.h>
+#include <vnet/ip/reass/ip4_full_reass.h>
/** @brief IPv4 lookup node.
@node ip4-lookup
@@ -102,7 +103,6 @@ VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_lookup_node) =
{
.name = "ip4-lookup",
@@ -111,7 +111,6 @@ VLIB_REGISTER_NODE (ip4_lookup_node) =
.n_next_nodes = IP_LOOKUP_N_NEXT,
.next_nodes = IP4_LOOKUP_NEXT_NODES,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -267,7 +266,6 @@ VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_load_balance_node) =
{
.name = "ip4-load-balance",
@@ -275,7 +273,6 @@ VLIB_REGISTER_NODE (ip4_load_balance_node) =
.sibling_of = "ip4-lookup",
.format_trace = format_ip4_lookup_trace,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
/* get first interface address */
@@ -287,7 +284,6 @@ ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
ip_interface_address_t *ia = 0;
ip4_address_t *result = 0;
- /* *INDENT-OFF* */
foreach_ip_interface_address
(lm, ia, sw_if_index,
1 /* honor unnumbered */ ,
@@ -297,7 +293,6 @@ ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
result = a;
break;
}));
- /* *INDENT-OFF* */
if (result_ia)
*result_ia = result ? ia : 0;
return result;
@@ -670,7 +665,6 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm,
* subnets on interfaces. Easy fix - disallow overlapping subnets, like
* most routers do.
*/
- /* *INDENT-OFF* */
if (!is_del)
{
/* When adding an address check that it does not conflict
@@ -731,7 +725,6 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm,
}
}
}
- /* *INDENT-ON* */
if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
@@ -852,7 +845,6 @@ ip4_directed_broadcast (u32 sw_if_index, u8 enable)
* when directed broadcast is enabled, the subnet braodcast route will forward
* packets using an adjacency with a broadcast MAC. otherwise it drops
*/
- /* *INDENT-OFF* */
foreach_ip_interface_address(&im->lookup_main, ia,
sw_if_index, 0,
({
@@ -876,7 +868,6 @@ ip4_directed_broadcast (u32 sw_if_index, u8 enable)
&pfx, sw_if_index);
}
}));
- /* *INDENT-ON* */
}
#endif
@@ -896,7 +887,6 @@ ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
0 /* honor unnumbered */,
({
@@ -910,7 +900,6 @@ ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
im, fib_index,
a, ia->address_length);
}));
- /* *INDENT-ON* */
return 0;
}
@@ -918,7 +907,6 @@ ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
/* Built-in ip4 unicast rx feature path definition */
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
{
.arc_name = "ip4-unicast",
@@ -1057,7 +1045,6 @@ VNET_FEATURE_INIT (ip4_interface_output, static) =
.node_name = "interface-output",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
static clib_error_t *
ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
@@ -1082,14 +1069,21 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
vlib_main_t *vm = vlib_get_main ();
vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
({
address = ip_interface_address_get_address (lm4, ia);
ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
}));
- /* *INDENT-ON* */
ip4_mfib_interface_enable_disable (sw_if_index, 0);
+
+ if (0 != im4->fib_index_by_sw_if_index[sw_if_index])
+ fib_table_bind (FIB_PROTOCOL_IP4, sw_if_index, 0);
+ if (0 != im4->mfib_index_by_sw_if_index[sw_if_index])
+ mfib_table_bind (FIB_PROTOCOL_IP4, sw_if_index, 0);
+
+ /* Erase the lookup tables just in case */
+ im4->fib_index_by_sw_if_index[sw_if_index] = ~0;
+ im4->mfib_index_by_sw_if_index[sw_if_index] = ~0;
}
vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
@@ -1196,9 +1190,11 @@ format_ip4_forward_next_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
u32 indent = format_get_indent (s);
- s = format (s, "%U%U",
- format_white_space, indent,
- format_ip4_header, t->packet_data, sizeof (t->packet_data));
+
+ s = format (s, "%Ufib:%d adj:%d flow:0x%08x", format_white_space, indent,
+ t->fib_index, t->dpo_index, t->flow_hash);
+ s = format (s, "\n%U%U", format_white_space, indent, format_ip4_header,
+ t->packet_data, sizeof (t->packet_data));
return s;
}
#endif
@@ -1387,13 +1383,11 @@ ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
}
#endif
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (ip4_local) = {
.arc_name = "ip4-local",
.start_nodes = VNET_FEATURES ("ip4-local", "ip4-receive"),
.last_in_arc = "ip4-local-end-of-arc",
};
-/* *INDENT-ON* */
static inline void
ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
@@ -1469,10 +1463,10 @@ ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
|| ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
{
- if (is_tcp_udp[0])
+ if (is_tcp_udp[0] && !ip4_local_csum_is_offloaded (b[0]))
ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
&good_tcp_udp[0]);
- if (is_tcp_udp[1])
+ if (is_tcp_udp[1] && !ip4_local_csum_is_offloaded (b[1]))
ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
&good_tcp_udp[1]);
}
@@ -1498,9 +1492,8 @@ ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
next_index = *next;
if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
{
- vnet_feature_arc_start (arc_index,
- vnet_buffer (b)->sw_if_index[VLIB_RX],
- &next_index, b);
+ vnet_feature_arc_start (
+ arc_index, vnet_buffer (b)->ip.rx_sw_if_index, &next_index, b);
*next = next_index;
}
}
@@ -1508,7 +1501,9 @@ ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
typedef struct
{
+ /* The src and fib-index together determine if packet n is the same as n-1 */
ip4_address_t src;
+ u32 fib_index;
u32 lbi;
u8 error;
u8 first;
@@ -1527,14 +1522,14 @@ ip4_local_check_src (vlib_buffer_t *b, ip4_header_t *ip0,
vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
+ vnet_buffer (b)->ip.rx_sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
if (is_receive_dpo)
{
receive_dpo_t *rd;
rd = receive_dpo_get (vnet_buffer (b)->ip.adj_index[VLIB_TX]);
- vnet_buffer (b)->ip.rx_sw_if_index = rd->rd_sw_if_index;
+ if (rd->rd_sw_if_index != ~0)
+ vnet_buffer (b)->ip.rx_sw_if_index = rd->rd_sw_if_index;
}
- else
- vnet_buffer (b)->ip.rx_sw_if_index = ~0;
/*
* vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
@@ -1542,7 +1537,8 @@ ip4_local_check_src (vlib_buffer_t *b, ip4_header_t *ip0,
* vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
* adjacency for the source address (the remote sender's address)
*/
- if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) ||
+ if (PREDICT_TRUE ((last_check->src.as_u32 != ip0->src_address.as_u32)) ||
+ (last_check->fib_index != vnet_buffer (b)->ip.fib_index) ||
last_check->first)
{
lbi0 = ip4_fib_forwarding_lookup (vnet_buffer (b)->ip.fib_index,
@@ -1578,6 +1574,7 @@ ip4_local_check_src (vlib_buffer_t *b, ip4_header_t *ip0,
last_check->lbi = lbi0;
last_check->error = *error0;
last_check->first = 0;
+ last_check->fib_index = vnet_buffer (b)->ip.fib_index;
}
else
{
@@ -1612,18 +1609,22 @@ ip4_local_check_src_x2 (vlib_buffer_t **b, ip4_header_t **ip,
vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
vnet_buffer (b[1])->ip.fib_index;
+ not_last_hit |= vnet_buffer (b[0])->ip.fib_index ^ last_check->fib_index;
+ not_last_hit |= vnet_buffer (b[1])->ip.fib_index ^ last_check->fib_index;
+
+ vnet_buffer (b[0])->ip.rx_sw_if_index =
+ vnet_buffer (b[0])->sw_if_index[VLIB_RX];
+ vnet_buffer (b[1])->ip.rx_sw_if_index =
+ vnet_buffer (b[1])->sw_if_index[VLIB_RX];
if (is_receive_dpo)
{
const receive_dpo_t *rd0, *rd1;
rd0 = receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
rd1 = receive_dpo_get (vnet_buffer (b[1])->ip.adj_index[VLIB_TX]);
- vnet_buffer (b[0])->ip.rx_sw_if_index = rd0->rd_sw_if_index;
- vnet_buffer (b[1])->ip.rx_sw_if_index = rd1->rd_sw_if_index;
- }
- else
- {
- vnet_buffer (b[0])->ip.rx_sw_if_index = ~0;
- vnet_buffer (b[1])->ip.rx_sw_if_index = ~0;
+ if (rd0->rd_sw_if_index != ~0)
+ vnet_buffer (b[0])->ip.rx_sw_if_index = rd0->rd_sw_if_index;
+ if (rd1->rd_sw_if_index != ~0)
+ vnet_buffer (b[1])->ip.rx_sw_if_index = rd1->rd_sw_if_index;
}
/*
@@ -1672,6 +1673,7 @@ ip4_local_check_src_x2 (vlib_buffer_t **b, ip4_header_t **ip,
last_check->lbi = lbi[1];
last_check->error = error[1];
last_check->first = 0;
+ last_check->fib_index = vnet_buffer (b[1])->ip.fib_index;
}
else
{
@@ -1741,10 +1743,11 @@ ip4_local_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
* member to make sure the .lbi is initialised for the first
* packet.
*/
- .src = {.as_u32 = 0},
+ .src = { .as_u32 = 0 },
.lbi = ~0,
.error = IP4_ERROR_UNKNOWN_PROTOCOL,
.first = 1,
+ .fib_index = 0,
};
from = vlib_frame_vector_args (frame);
@@ -1861,7 +1864,7 @@ VLIB_REGISTER_NODE (ip4_local_node) =
.vector_size = sizeof (u32),
.format_trace = format_ip4_forward_next_trace,
.n_errors = IP4_N_ERROR,
- .error_strings = ip4_error_strings,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP_LOCAL_N_NEXT,
.next_nodes =
{
@@ -1869,7 +1872,7 @@ VLIB_REGISTER_NODE (ip4_local_node) =
[IP_LOCAL_NEXT_PUNT] = "ip4-punt",
[IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
[IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
- [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
+ [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-local-full-reassembly",
},
};
@@ -1970,14 +1973,12 @@ show_ip_local_command_fn (vlib_main_t * vm,
* 47
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip_local, static) =
{
.path = "show ip local",
.function = show_ip_local_command_fn,
.short_help = "show ip local",
};
-/* *INDENT-ON* */
typedef enum
{
@@ -2044,7 +2045,9 @@ ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
ttl += 1;
ip->ttl = ttl;
- ASSERT (ip4_header_checksum_is_valid (ip));
+ ASSERT (ip4_header_checksum_is_valid (ip) ||
+ (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) ||
+ (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM));
}
/* Decrement TTL & update checksum.
@@ -2222,9 +2225,6 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
adj0->ia_cfg_index);
next[0] = next_index;
- if (is_midchain)
- vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
- 0 /* is_ip6 */ );
}
else
{
@@ -2247,9 +2247,6 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
&next_index, b[1],
adj1->ia_cfg_index);
next[1] = next_index;
- if (is_midchain)
- vnet_calc_checksums_inline (vm, b[1], 1 /* is_ip4 */ ,
- 0 /* is_ip6 */ );
}
else
{
@@ -2399,9 +2396,6 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (is_midchain)
{
- vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
- 0 /* is_ip6 */ );
-
/* Guess we are only writing on ipv4 header. */
vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
}
@@ -2505,10 +2499,6 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (is_midchain)
{
- /* this acts on the packet that is about to be encapped */
- vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
- 0 /* is_ip6 */ );
-
/* Guess we are only writing on ipv4 header. */
vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
}
@@ -2635,7 +2625,6 @@ VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_rewrite_node) = {
.name = "ip4-rewrite",
.vector_size = sizeof (u32),
@@ -2680,7 +2669,6 @@ VLIB_REGISTER_NODE (ip4_midchain_node) = {
.format_trace = format_ip4_rewrite_trace,
.sibling_of = "ip4-rewrite",
};
-/* *INDENT-ON */
static clib_error_t *
set_ip_flow_hash_command_fn (vlib_main_t * vm,
@@ -2812,15 +2800,12 @@ set_ip_flow_hash_command_fn (vlib_main_t * vm,
* [0] [@0]: dpo-drop ip6
* @cliexend
?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
-{
+VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
.path = "set ip flow-hash",
- .short_help =
- "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
+ .short_help = "set ip flow-hash table <table-id> [src] [dst] [sport] "
+ "[dport] [proto] [reverse] [gtpv1teid]",
.function = set_ip_flow_hash_command_fn,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
int
@@ -2937,7 +2922,6 @@ set_ip_classify_command_fn (vlib_main_t * vm,
* Example of how to assign a classification table to an interface:
* @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ip_classify_command, static) =
{
.path = "set ip classify",
@@ -2945,7 +2929,6 @@ VLIB_CLI_COMMAND (set_ip_classify_command, static) =
"set ip classify intfc <interface> table-index <classify-idx>",
.function = set_ip_classify_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip4_inlines.h b/src/vnet/ip/ip4_inlines.h
index 00a47125b8a..b4fcebc9896 100644
--- a/src/vnet/ip/ip4_inlines.h
+++ b/src/vnet/ip/ip4_inlines.h
@@ -42,6 +42,8 @@
#include <vnet/ip/ip_flow_hash.h>
#include <vnet/ip/ip4_packet.h>
+#include <vnet/tcp/tcp_packet.h>
+#include <vnet/udp/udp_packet.h>
#define IP_DF 0x4000 /* don't fragment */
@@ -52,9 +54,11 @@ ip4_compute_flow_hash (const ip4_header_t * ip,
flow_hash_config_t flow_hash_config)
{
tcp_header_t *tcp = (void *) (ip + 1);
+ udp_header_t *udp = (void *) (ip + 1);
+ gtpv1u_header_t *gtpu = (void *) (udp + 1);
u32 a, b, c, t1, t2;
- uword is_tcp_udp = (ip->protocol == IP_PROTOCOL_TCP
- || ip->protocol == IP_PROTOCOL_UDP);
+ uword is_udp = ip->protocol == IP_PROTOCOL_UDP;
+ uword is_tcp_udp = (ip->protocol == IP_PROTOCOL_TCP || is_udp);
t1 = (flow_hash_config & IP_FLOW_HASH_SRC_ADDR)
? ip->src_address.data_u32 : 0;
@@ -89,6 +93,13 @@ ip4_compute_flow_hash (const ip4_header_t * ip,
b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? ip->protocol : 0;
c = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ?
(t1 << 16) | t2 : (t2 << 16) | t1;
+ if (PREDICT_TRUE (is_udp) &&
+ PREDICT_FALSE ((flow_hash_config & IP_FLOW_HASH_GTPV1_TEID) &&
+ udp->dst_port == GTPV1_PORT_BE))
+ {
+ t1 = gtpu->teid;
+ c ^= t1;
+ }
a ^= ip_flow_hash_router_id;
hash_v3_mix32 (a, b, c);
@@ -98,9 +109,9 @@ ip4_compute_flow_hash (const ip4_header_t * ip,
}
always_inline void *
-vlib_buffer_push_ip4_custom (vlib_main_t * vm, vlib_buffer_t * b,
- ip4_address_t * src, ip4_address_t * dst,
- int proto, u8 csum_offload, u8 is_df)
+vlib_buffer_push_ip4_custom (vlib_main_t *vm, vlib_buffer_t *b,
+ ip4_address_t *src, ip4_address_t *dst, int proto,
+ u8 csum_offload, u8 is_df, u8 dscp)
{
ip4_header_t *ih;
@@ -108,7 +119,8 @@ vlib_buffer_push_ip4_custom (vlib_main_t * vm, vlib_buffer_t * b,
ih = vlib_buffer_push_uninit (b, sizeof (ip4_header_t));
ih->ip_version_and_header_length = 0x45;
- ih->tos = 0;
+ ip4_header_set_dscp (ih, dscp);
+ ip4_header_set_ecn (ih, 0);
ih->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b));
/* No fragments */
@@ -152,7 +164,7 @@ vlib_buffer_push_ip4 (vlib_main_t * vm, vlib_buffer_t * b,
u8 csum_offload)
{
return vlib_buffer_push_ip4_custom (vm, b, src, dst, proto, csum_offload,
- 1 /* is_df */ );
+ 1 /* is_df */, 0);
}
#endif /* included_ip_ip4_inlines_h */
diff --git a/src/vnet/ip/ip4_input.c b/src/vnet/ip/ip4_input.c
index 3b3edf9fca7..106d17da3cb 100644
--- a/src/vnet/ip/ip4_input.c
+++ b/src/vnet/ip/ip4_input.c
@@ -374,22 +374,13 @@ VLIB_NODE_FN (ip4_input_no_checksum_node) (vlib_main_t * vm,
return ip4_input_inline (vm, node, frame, /* verify_checksum */ 0);
}
-#ifndef CLIB_MARCH_VARIANT
-char *ip4_error_strings[] = {
-#define _(sym,string) string,
- foreach_ip4_error
-#undef _
-};
-#endif
-
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_input_node) = {
.name = "ip4-input",
.vector_size = sizeof (u32),
.protocol_hint = VLIB_NODE_PROTO_HINT_IP4,
.n_errors = IP4_N_ERROR,
- .error_strings = ip4_error_strings,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP4_INPUT_N_NEXT,
.next_nodes = {
@@ -399,7 +390,6 @@ VLIB_REGISTER_NODE (ip4_input_node) = {
[IP4_INPUT_NEXT_LOOKUP] = "ip4-lookup",
[IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-mfib-forward-lookup",
[IP4_INPUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
- [IP4_INPUT_NEXT_REASSEMBLY] = "ip4-full-reassembly",
},
.format_buffer = format_ip4_header,
@@ -414,7 +404,6 @@ VLIB_REGISTER_NODE (ip4_input_no_checksum_node) = {
.format_buffer = format_ip4_header,
.format_trace = format_ip4_input_trace,
};
-/* *INDENT-ON* */
static clib_error_t *
ip4_init (vlib_main_t * vm)
diff --git a/src/vnet/ip/ip4_input.h b/src/vnet/ip/ip4_input.h
index 383ef31758c..d2ed13fa35f 100644
--- a/src/vnet/ip/ip4_input.h
+++ b/src/vnet/ip/ip4_input.h
@@ -42,6 +42,7 @@
#include <vnet/ip/ip.h>
#include <vnet/ethernet/ethernet.h>
+#include <vppinfra/vector/ip_csum.h>
typedef enum
{
@@ -51,7 +52,6 @@ typedef enum
IP4_INPUT_NEXT_LOOKUP,
IP4_INPUT_NEXT_LOOKUP_MULTICAST,
IP4_INPUT_NEXT_ICMP_ERROR,
- IP4_INPUT_NEXT_REASSEMBLY,
IP4_INPUT_N_NEXT,
} ip4_input_next_t;
@@ -60,18 +60,21 @@ check_ver_opt_csum (ip4_header_t * ip, u8 * error, int verify_checksum)
{
if (PREDICT_FALSE (ip->ip_version_and_header_length != 0x45))
{
- if ((ip->ip_version_and_header_length & 0xf) != 5)
+ if ((ip->ip_version_and_header_length & 0xf0) != 0x40)
+ *error = IP4_ERROR_VERSION;
+ else if ((ip->ip_version_and_header_length & 0x0f) < 5)
+ *error = IP4_ERROR_HDR_TOO_SHORT;
+ else
{
*error = IP4_ERROR_OPTIONS;
- if (verify_checksum && ip_csum (ip, ip4_header_bytes (ip)) != 0)
+ if (verify_checksum &&
+ clib_ip_csum ((u8 *) ip, ip4_header_bytes (ip)) != 0)
*error = IP4_ERROR_BAD_CHECKSUM;
}
- else
- *error = IP4_ERROR_VERSION;
}
- else
- if (PREDICT_FALSE (verify_checksum &&
- ip_csum (ip, sizeof (ip4_header_t)) != 0))
+ else if (PREDICT_FALSE (verify_checksum &&
+ clib_ip_csum ((u8 *) ip, sizeof (ip4_header_t)) !=
+ 0))
*error = IP4_ERROR_BAD_CHECKSUM;
}
diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c
index 0f4c47fe11a..00855f7db43 100644
--- a/src/vnet/ip/ip4_mtrie.c
+++ b/src/vnet/ip/ip4_mtrie.c
@@ -91,94 +91,48 @@ ip4_mtrie_leaf_set_next_ply_index (u32 i)
return l;
}
-#ifndef __ALTIVEC__
-#define PLY_X4_SPLAT_INIT(init_x4, init) \
- init_x4 = u32x4_splat (init);
-#else
-#define PLY_X4_SPLAT_INIT(init_x4, init) \
-{ \
- u32x4_union_t y; \
- y.as_u32[0] = init; \
- y.as_u32[1] = init; \
- y.as_u32[2] = init; \
- y.as_u32[3] = init; \
- init_x4 = y.as_u32x4; \
-}
-#endif
-
-#ifdef CLIB_HAVE_VEC128
-#define PLY_INIT_LEAVES(p) \
-{ \
- u32x4 *l, init_x4; \
- \
- PLY_X4_SPLAT_INIT(init_x4, init); \
- for (l = p->leaves_as_u32x4; \
- l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); \
- l += 4) \
- { \
- l[0] = init_x4; \
- l[1] = init_x4; \
- l[2] = init_x4; \
- l[3] = init_x4; \
- } \
-}
-#else
-#define PLY_INIT_LEAVES(p) \
-{ \
- u32 *l; \
- \
- for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4) \
- { \
- l[0] = init; \
- l[1] = init; \
- l[2] = init; \
- l[3] = init; \
- } \
-}
-#endif
-
-#define PLY_INIT(p, init, prefix_len, ply_base_len) \
-{ \
- /* \
- * A leaf is 'empty' if it represents a leaf from the covering PLY \
- * i.e. if the prefix length of the leaf is less than or equal to \
- * the prefix length of the PLY \
- */ \
- p->n_non_empty_leafs = (prefix_len > ply_base_len ? \
- ARRAY_LEN (p->leaves) : 0); \
- clib_memset (p->dst_address_bits_of_leaves, prefix_len, \
- sizeof (p->dst_address_bits_of_leaves)); \
- p->dst_address_bits_base = ply_base_len; \
- \
- /* Initialize leaves. */ \
- PLY_INIT_LEAVES(p); \
-}
-
static void
ply_8_init (ip4_mtrie_8_ply_t *p, ip4_mtrie_leaf_t init, uword prefix_len,
u32 ply_base_len)
{
- PLY_INIT (p, init, prefix_len, ply_base_len);
+ p->n_non_empty_leafs = prefix_len > ply_base_len ? ARRAY_LEN (p->leaves) : 0;
+ clib_memset_u8 (p->dst_address_bits_of_leaves, prefix_len,
+ sizeof (p->dst_address_bits_of_leaves));
+ p->dst_address_bits_base = ply_base_len;
+
+ clib_memset_u32 (p->leaves, init, ARRAY_LEN (p->leaves));
}
static void
ply_16_init (ip4_mtrie_16_ply_t *p, ip4_mtrie_leaf_t init, uword prefix_len)
{
- clib_memset (p->dst_address_bits_of_leaves, prefix_len,
- sizeof (p->dst_address_bits_of_leaves));
- PLY_INIT_LEAVES (p);
+ clib_memset_u8 (p->dst_address_bits_of_leaves, prefix_len,
+ sizeof (p->dst_address_bits_of_leaves));
+ clib_memset_u32 (p->leaves, init, ARRAY_LEN (p->leaves));
}
static ip4_mtrie_leaf_t
ply_create (ip4_mtrie_leaf_t init_leaf, u32 leaf_prefix_len, u32 ply_base_len)
{
ip4_mtrie_8_ply_t *p;
- /* Get cache aligned ply. */
+ ip4_mtrie_leaf_t l;
+ u8 need_barrier_sync = pool_get_will_expand (ip4_ply_pool);
+ vlib_main_t *vm = vlib_get_main ();
+ ASSERT (vm->thread_index == 0);
+
+ if (need_barrier_sync)
+ vlib_worker_thread_barrier_sync (vm);
+ /* Get cache aligned ply. */
pool_get_aligned (ip4_ply_pool, p, CLIB_CACHE_LINE_BYTES);
ply_8_init (p, init_leaf, leaf_prefix_len, ply_base_len);
- return ip4_mtrie_leaf_set_next_ply_index (p - ip4_ply_pool);
+ l = ip4_mtrie_leaf_set_next_ply_index (p - ip4_ply_pool);
+
+ if (need_barrier_sync)
+ vlib_worker_thread_barrier_release (vm);
+
+ return l;
}
always_inline ip4_mtrie_8_ply_t *
diff --git a/src/vnet/ip/ip4_mtrie.h b/src/vnet/ip/ip4_mtrie.h
index ec417c9a9f7..16c524745be 100644
--- a/src/vnet/ip/ip4_mtrie.h
+++ b/src/vnet/ip/ip4_mtrie.h
@@ -65,14 +65,7 @@ typedef struct ip4_mtrie_16_ply_t_
/**
* The leaves/slots/buckets to be filed with leafs
*/
- union
- {
- ip4_mtrie_leaf_t leaves[PLY_16_SIZE];
-
-#ifdef CLIB_HAVE_VEC128
- u32x4 leaves_as_u32x4[PLY_16_SIZE / 4];
-#endif
- };
+ ip4_mtrie_leaf_t leaves[PLY_16_SIZE];
/**
* Prefix length for terminal leaves.
@@ -85,17 +78,11 @@ typedef struct ip4_mtrie_16_ply_t_
*/
typedef struct ip4_mtrie_8_ply_t_
{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
/**
* The leaves/slots/buckets to be filed with leafs
*/
- union
- {
- ip4_mtrie_leaf_t leaves[256];
-
-#ifdef CLIB_HAVE_VEC128
- u32x4 leaves_as_u32x4[256 / 4];
-#endif
- };
+ ip4_mtrie_leaf_t leaves[256];
/**
* Prefix length for leaves/ply.
@@ -113,9 +100,6 @@ typedef struct ip4_mtrie_8_ply_t_
* 'non-empty'. Otherwise it is the value of the cover.
*/
i32 dst_address_bits_base;
-
- /* Pad to cache line boundary. */
- u8 pad[CLIB_CACHE_LINE_BYTES - 2 * sizeof (i32)];
} ip4_mtrie_8_ply_t;
STATIC_ASSERT (0 == sizeof (ip4_mtrie_8_ply_t) % CLIB_CACHE_LINE_BYTES,
diff --git a/src/vnet/ip/ip4_options.c b/src/vnet/ip/ip4_options.c
index 1b5a7878512..bbe311ffb20 100644
--- a/src/vnet/ip/ip4_options.c
+++ b/src/vnet/ip/ip4_options.c
@@ -78,10 +78,17 @@ VLIB_NODE_FN (ip4_options_node) (vlib_main_t * vm,
{
case IP4_ROUTER_ALERT_OPTION:
/*
+ * check the option length
+ */
+ if (options[1] != 4)
+ break;
+ /*
* if it's an IGMP packet, pass up the local stack
*/
if (IP_PROTOCOL_IGMP == ip4->protocol)
{
+ ip_lookup_set_buffer_fib_index (
+ ip4_main.fib_index_by_sw_if_index, b);
next = IP4_OPTIONS_NEXT_LOCAL;
}
break;
@@ -120,7 +127,6 @@ format_ip4_options_trace (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_options_node) = {
.name = "ip4-options",
.vector_size = sizeof (u32),
@@ -133,7 +139,6 @@ VLIB_REGISTER_NODE (ip4_options_node) = {
.format_buffer = format_ip4_header,
.format_trace = format_ip4_options_trace,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip4_packet.h b/src/vnet/ip/ip4_packet.h
index 513a7449b54..269049194e6 100644
--- a/src/vnet/ip/ip4_packet.h
+++ b/src/vnet/ip/ip4_packet.h
@@ -41,7 +41,6 @@
#define included_ip4_packet_h
#include <vnet/ip/ip_packet.h> /* for ip_csum_t */
-#include <vnet/tcp/tcp_packet.h> /* for tcp_header_t */
#include <vppinfra/byte_order.h> /* for clib_net_to_host_u16 */
#include <vppinfra/warnings.h> /* for WARN_OFF/WARN_ON macro */
@@ -130,19 +129,15 @@ typedef union
/* For checksumming we'll want to access IP header in word sized chunks. */
/* For 64 bit machines. */
- /* *INDENT-OFF* */
CLIB_PACKED (struct {
u64 checksum_data_64[2];
u32 checksum_data_64_32[1];
});
- /* *INDENT-ON* */
/* For 32 bit machines. */
- /* *INDENT-OFF* */
CLIB_PACKED (struct {
u32 checksum_data_32[5];
});
- /* *INDENT-ON* */
} ip4_header_t;
/* Value of ip_version_and_header_length for packets w/o options. */
@@ -201,9 +196,7 @@ ip4_next_header (ip4_header_t * i)
/* Turn off array bounds check due to ip4_header_t
option field operations. */
-/* *INDENT-OFF* */
WARN_OFF(array-bounds)
-/* *INDENT-ON* */
static_always_inline u16
ip4_header_checksum_inline (ip4_header_t * i, int with_checksum)
@@ -306,9 +299,7 @@ ip4_header_checksum_inline (ip4_header_t * i, int with_checksum)
return ~((u16) sum);
}
-/* *INDENT-OFF* */
WARN_ON(array-bounds)
-/* *INDENT-ON* */
always_inline u16
ip4_header_checksum (ip4_header_t * i)
@@ -476,47 +467,6 @@ ip4_multicast_ethernet_address (u8 * ethernet_address,
ethernet_address[5] = d[3];
}
-always_inline void
-ip4_tcp_reply_x1 (ip4_header_t * ip0, tcp_header_t * tcp0)
-{
- u32 src0, dst0;
-
- src0 = ip0->src_address.data_u32;
- dst0 = ip0->dst_address.data_u32;
- ip0->src_address.data_u32 = dst0;
- ip0->dst_address.data_u32 = src0;
-
- src0 = tcp0->src;
- dst0 = tcp0->dst;
- tcp0->src = dst0;
- tcp0->dst = src0;
-}
-
-always_inline void
-ip4_tcp_reply_x2 (ip4_header_t * ip0, ip4_header_t * ip1,
- tcp_header_t * tcp0, tcp_header_t * tcp1)
-{
- u32 src0, dst0, src1, dst1;
-
- src0 = ip0->src_address.data_u32;
- src1 = ip1->src_address.data_u32;
- dst0 = ip0->dst_address.data_u32;
- dst1 = ip1->dst_address.data_u32;
- ip0->src_address.data_u32 = dst0;
- ip1->src_address.data_u32 = dst1;
- ip0->dst_address.data_u32 = src0;
- ip1->dst_address.data_u32 = src1;
-
- src0 = tcp0->src;
- src1 = tcp1->src;
- dst0 = tcp0->dst;
- dst1 = tcp1->dst;
- tcp0->src = dst0;
- tcp1->src = dst1;
- tcp0->dst = src0;
- tcp1->dst = src1;
-}
-
#endif /* included_ip4_packet_h */
/*
diff --git a/src/vnet/ip/ip4_punt_drop.c b/src/vnet/ip/ip4_punt_drop.c
index 89803afb9dd..b8cc3304437 100644
--- a/src/vnet/ip/ip4_punt_drop.c
+++ b/src/vnet/ip/ip4_punt_drop.c
@@ -18,7 +18,6 @@
#include <vnet/policer/policer.h>
#include <vnet/policer/police_inlines.h>
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (ip4_punt) =
{
.arc_name = "ip4-punt",
@@ -30,7 +29,6 @@ VNET_FEATURE_ARC_INIT (ip4_drop) =
.arc_name = "ip4-drop",
.start_nodes = VNET_FEATURES ("ip4-drop", "ip4-not-enabled"),
};
-/* *INDENT-ON* */
extern ip_punt_policer_t ip4_punt_policer_cfg;
@@ -89,7 +87,6 @@ VLIB_NODE_FN (ip4_punt_policer_node) (vlib_main_t * vm,
ip4_punt_policer_cfg.policer_index));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_punt_policer_node) = {
.name = "ip4-punt-policer",
.vector_size = sizeof (u32),
@@ -109,7 +106,6 @@ VNET_FEATURE_INIT (ip4_punt_policer_node) = {
.node_name = "ip4-punt-policer",
.runs_before = VNET_FEATURES("ip4-punt-redirect"),
};
-/* *INDENT-ON* */
#define foreach_ip4_punt_redirect_error \
@@ -138,7 +134,6 @@ VLIB_NODE_FN (ip4_punt_redirect_node) (vlib_main_t * vm,
FIB_PROTOCOL_IP4));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_punt_redirect_node) = {
.name = "ip4-punt-redirect",
.vector_size = sizeof (u32),
@@ -160,7 +155,6 @@ VNET_FEATURE_INIT (ip4_punt_redirect_node, static) = {
.node_name = "ip4-punt-redirect",
.runs_before = VNET_FEATURES("error-punt"),
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip4_drop_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
@@ -194,7 +188,6 @@ ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
vnet_feat_arc_ip4_punt.feature_arc_index);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_drop_node) =
{
.name = "ip4-drop",
@@ -237,7 +230,6 @@ VNET_FEATURE_INIT (ip4_drop_end_of_arc, static) = {
.node_name = "error-drop",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON */
#ifndef CLIB_MARCH_VARIANT
void
@@ -301,17 +293,17 @@ done:
* @cliexpar
* @cliexcmd{set ip punt policer <INDEX>}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip4_punt_policer_command, static) =
{
.path = "ip punt policer",
.function = ip4_punt_police_cmd,
.short_help = "ip punt policer [add|del] <index>",
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
+static u32 ip4_punt_redirect_enable_counts;
+
void
ip4_punt_redirect_add_paths (u32 rx_sw_if_index,
const fib_route_path_t *rpaths)
@@ -320,13 +312,16 @@ ip4_punt_redirect_add_paths (u32 rx_sw_if_index,
rx_sw_if_index,
FIB_FORW_CHAIN_TYPE_UNICAST_IP4, rpaths);
- vnet_feature_enable_disable ("ip4-punt", "ip4-punt-redirect", 0, 1, 0, 0);
+ if (1 == ++ip4_punt_redirect_enable_counts)
+ vnet_feature_enable_disable ("ip4-punt", "ip4-punt-redirect", 0, 1, 0, 0);
}
void
ip4_punt_redirect_del (u32 rx_sw_if_index)
{
- vnet_feature_enable_disable ("ip4-punt", "ip4-punt-redirect", 0, 0, 0, 0);
+ ASSERT (ip4_punt_redirect_enable_counts);
+ if (0 == --ip4_punt_redirect_enable_counts)
+ vnet_feature_enable_disable ("ip4-punt", "ip4-punt-redirect", 0, 0, 0, 0);
ip_punt_redirect_del (FIB_PROTOCOL_IP4, rx_sw_if_index);
}
@@ -399,14 +394,12 @@ done:
* @cliexpar
* @cliexcmd{set ip punt policer}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip4_punt_redirect_command, static) =
{
.path = "ip punt redirect",
.function = ip4_punt_redirect_cmd,
.short_help = "ip punt redirect [add|del] rx [<interface>|all] via [<nh>] <tx_interface>",
};
-/* *INDENT-ON* */
static clib_error_t *
ip4_punt_redirect_show_cmd (vlib_main_t * vm,
@@ -423,7 +416,6 @@ ip4_punt_redirect_show_cmd (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{set ip punt redierect}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip4_punt_redirect_command, static) =
{
.path = "show ip punt redirect",
@@ -431,7 +423,6 @@ VLIB_CLI_COMMAND (show_ip4_punt_redirect_command, static) =
.short_help = "show ip punt redirect",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip4_source_and_port_range_check.c b/src/vnet/ip/ip4_source_and_port_range_check.c
index 4c311eb8335..27b2d549ea7 100644
--- a/src/vnet/ip/ip4_source_and_port_range_check.c
+++ b/src/vnet/ip/ip4_source_and_port_range_check.c
@@ -99,7 +99,9 @@ static inline u32
check_adj_port_range_x1 (const protocol_port_range_dpo_t * ppr_dpo,
u16 dst_port, u32 next)
{
+#ifdef CLIB_HAVE_VEC128
u16x8 key = u16x8_splat (dst_port);
+#endif
int i;
if (NULL == ppr_dpo || dst_port == 0)
@@ -107,9 +109,20 @@ check_adj_port_range_x1 (const protocol_port_range_dpo_t * ppr_dpo,
for (i = 0; i < ppr_dpo->n_used_blocks; i++)
+#ifdef CLIB_HAVE_VEC128
if (!u16x8_is_all_zero ((ppr_dpo->blocks[i].low.as_u16x8 <= key) &
(ppr_dpo->blocks[i].hi.as_u16x8 >= key)))
return next;
+#else
+ {
+ for (int j = 0; j < 8; j++)
+ {
+ if ((ppr_dpo->blocks[i].low.as_u16[j] <= dst_port) &&
+ (ppr_dpo->blocks[i].hi.as_u16[j] >= dst_port))
+ return next;
+ }
+ };
+#endif
return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP;
}
@@ -550,7 +563,6 @@ ip4_source_and_port_range_check_tx (vlib_main_t * vm,
if this changes can easily make new function
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_source_port_and_range_check_rx) = {
.function = ip4_source_and_port_range_check_rx,
.name = "ip4-source-and-port-range-check-rx",
@@ -567,9 +579,7 @@ VLIB_REGISTER_NODE (ip4_source_port_and_range_check_rx) = {
.format_buffer = format_ip4_header,
.format_trace = format_ip4_source_and_port_range_check_trace,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_source_port_and_range_check_tx) = {
.function = ip4_source_and_port_range_check_tx,
.name = "ip4-source-and-port-range-check-tx",
@@ -586,7 +596,6 @@ VLIB_REGISTER_NODE (ip4_source_port_and_range_check_tx) = {
.format_buffer = format_ip4_header,
.format_trace = format_ip4_source_and_port_range_check_trace,
};
-/* *INDENT-ON* */
int
set_ip_source_and_port_range_check (vlib_main_t * vm,
@@ -784,13 +793,11 @@ set_ip_source_and_port_range_check_fn (vlib_main_t * vm,
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_source_and_port_range_check_command, static) = {
.path = "set interface ip source-and-port-range-check",
.function = set_ip_source_and_port_range_check_fn,
.short_help = "set interface ip source-and-port-range-check <interface> [tcp-out-vrf <table-id>] [udp-out-vrf <table-id>] [tcp-in-vrf <table-id>] [udp-in-vrf <table-id>] [del]",
};
-/* *INDENT-ON* */
static u8 *
format_ppr_dpo (u8 * s, va_list * args)
@@ -1251,14 +1258,12 @@ ip_source_and_port_range_check_command_fn (vlib_main_t * vm,
* Example of how to delete an IPv4 subnet and range of ports from an IPv4 FIB table:
* @cliexcmd{set ip source-and-port-range-check vrf 7 172.16.1.0/24 range 23 - 100 del}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip_source_and_port_range_check_command, static) = {
.path = "set ip source-and-port-range-check",
.function = ip_source_and_port_range_check_command_fn,
.short_help =
"set ip source-and-port-range-check vrf <table-id> <ip-addr>/<mask> {port nn | range <nn> - <nn>} [del]",
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -1377,14 +1382,12 @@ show_source_and_port_range_check_fn (vlib_main_t * vm,
* 172.16.2.2 port 250 FAIL
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_source_and_port_range_check, static) = {
.path = "show ip source-and-port-range-check",
.function = show_source_and_port_range_check_fn,
.short_help =
"show ip source-and-port-range-check vrf <table-id> <ip-addr> [port <n>]",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip4_to_ip6.h b/src/vnet/ip/ip4_to_ip6.h
index a6d87f1f962..57c2b6ff78b 100644
--- a/src/vnet/ip/ip4_to_ip6.h
+++ b/src/vnet/ip/ip4_to_ip6.h
@@ -28,14 +28,12 @@
typedef int (*ip4_to_ip6_set_fn_t) (vlib_buffer_t * b, ip4_header_t * ip4,
ip6_header_t * ip6, void *ctx);
-/* *INDENT-OFF* */
static u8 icmp_to_icmp6_updater_pointer_table[] =
{ 0, 1, 4, 4, ~0,
~0, ~0, ~0, 7, 6,
~0, ~0, 8, 8, 8,
8, 24, 24, 24, 24
};
-/* *INDENT-ON* */
#define frag_id_4to6(id) (id)
diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h
index f33780f1a98..56eec523d5b 100644
--- a/src/vnet/ip/ip6.h
+++ b/src/vnet/ip/ip6.h
@@ -238,7 +238,6 @@ ip6_interface_address_matching_destination (ip6_main_t * im,
ip_interface_address_t *ia;
ip6_address_t *result = 0;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm, ia, sw_if_index,
1 /* honor unnumbered */,
({
@@ -249,7 +248,6 @@ ip6_interface_address_matching_destination (ip6_main_t * im,
break;
}
}));
- /* *INDENT-ON* */
if (result_ia)
*result_ia = result ? ia : 0;
return result;
diff --git a/src/vnet/ip/ip6_error.h b/src/vnet/ip/ip6_error.h
deleted file mode 100644
index 8546b4af8d3..00000000000
--- a/src/vnet/ip/ip6_error.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * ip/ip6_error.h: ip6 fast path errors
- *
- * Copyright (c) 2008 Eliot Dresselhaus
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef included_ip_ip6_error_h
-#define included_ip_ip6_error_h
-
-// clang-format off
-#define foreach_ip6_error \
- /* Must be first. */ \
- _ (NONE, "valid ip6 packets") \
- \
- /* Errors signalled by ip6-input */ \
- _ (TOO_SHORT, "ip6 length < 40 bytes") \
- _ (BAD_LENGTH, "ip6 length > l2 length") \
- _ (VERSION, "ip6 version != 6") \
- _ (TIME_EXPIRED, "ip6 ttl <= 1") \
- \
- /* Errors signalled by ip6-rewrite. */ \
- _ (MTU_EXCEEDED, "ip6 MTU exceeded") \
- _ (DST_LOOKUP_MISS, "ip6 destination lookup miss") \
- _ (SRC_LOOKUP_MISS, "ip6 source lookup miss") \
- _ (DROP, "ip6 drop") \
- _ (PUNT, "ip6 punt") \
- \
- /* Errors signalled by ip6-local. */ \
- _ (UNKNOWN_PROTOCOL, "unknown ip protocol") \
- _ (UDP_CHECKSUM, "bad udp checksum") \
- _ (ICMP_CHECKSUM, "bad icmp checksum") \
- _ (UDP_LENGTH, "inconsistent udp/ip lengths") \
- \
- /* Errors signalled by udp6-lookup. */ \
- _ (UNKNOWN_UDP_PORT, "no listener for udp port") \
- \
- /* Spoofed packets in ip6-rewrite-local */ \
- _(SPOOFED_LOCAL_PACKETS, "ip4 spoofed local-address packet drops") \
- \
- /* Erros singalled by ip6-inacl */ \
- _ (INACL_TABLE_MISS, "input ACL table-miss drops") \
- _ (INACL_SESSION_DENY, "input ACL session deny drops") \
- /* Erros singalled by ip6-outacl */ \
- _ (OUTACL_TABLE_MISS, "output ACL table-miss drops") \
- _ (OUTACL_SESSION_DENY, "output ACL session deny drops") \
- \
- /* Errors from mfib-forward */ \
- _ (RPF_FAILURE, "Multicast RPF check failed") \
- \
- /* Errors signalled by ip6-reassembly */ \
- _ (REASS_MISSING_UPPER, "missing-upper layer drops") \
- _ (REASS_DUPLICATE_FRAGMENT, "duplicate fragments") \
- _ (REASS_OVERLAPPING_FRAGMENT, "overlapping fragments") \
- _ (REASS_LIMIT_REACHED, "drops due to concurrent reassemblies limit") \
- _ (REASS_FRAGMENT_CHAIN_TOO_LONG, "fragment chain too long (drop)") \
- _ (REASS_NO_BUF, "out of buffers (drop)") \
- _ (REASS_TIMEOUT, "fragments dropped due to reassembly timeout") \
- _ (REASS_INTERNAL_ERROR, "drops due to internal reassembly error") \
- _ (REASS_UNSUPP_IP_PROTO, "unsupported ip protocol")
-
-// clang-format on
-
-typedef enum
-{
-#define _(sym,str) IP6_ERROR_##sym,
- foreach_ip6_error
-#undef _
- IP6_N_ERROR,
-} ip6_error_t;
-
-#endif /* included_ip_ip6_error_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/ip/ip6_format.c b/src/vnet/ip/ip6_format.c
index 1b8ff1e0ab0..1a1bef26aa6 100644
--- a/src/vnet/ip/ip6_format.c
+++ b/src/vnet/ip/ip6_format.c
@@ -288,7 +288,7 @@ format_ip6_header (u8 * s, va_list * args)
"\n%Utos 0x%02x, flow label 0x%x, hop limit %d, payload length %d",
format_white_space, indent, traffic_class, flow_label,
ip->hop_limit, clib_net_to_host_u16 (ip->payload_length));
-
+#if 0
/* Recurse into next protocol layer. */
if (max_header_bytes != 0 && sizeof (ip[0]) < max_header_bytes)
{
@@ -301,7 +301,7 @@ format_ip6_header (u8 * s, va_list * args)
/* next protocol header */ (void *) (ip + 1),
max_header_bytes - sizeof (ip[0]));
}
-
+#endif
return s;
}
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index b9f9892f929..48fb633fd32 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -71,7 +71,6 @@ ip6_add_interface_prefix_routes (ip6_main_t * im,
ip_lookup_main_t *lm = &im->lookup_main;
ip_interface_prefix_t *if_prefix;
- /* *INDENT-OFF* */
ip_interface_prefix_key_t key = {
.prefix = {
.fp_len = address_length,
@@ -85,7 +84,6 @@ ip6_add_interface_prefix_routes (ip6_main_t * im,
},
.sw_if_index = sw_if_index,
};
- /* *INDENT-ON* */
/* If prefix already set on interface, just increment ref count & return */
if_prefix = ip_get_interface_prefix (lm, &key);
@@ -178,7 +176,6 @@ ip6_del_interface_prefix_routes (ip6_main_t * im,
ip_lookup_main_t *lm = &im->lookup_main;
ip_interface_prefix_t *if_prefix;
- /* *INDENT-OFF* */
ip_interface_prefix_key_t key = {
.prefix = {
.fp_len = address_length,
@@ -192,13 +189,12 @@ ip6_del_interface_prefix_routes (ip6_main_t * im,
},
.sw_if_index = sw_if_index,
};
- /* *INDENT-ON* */
if_prefix = ip_get_interface_prefix (lm, &key);
if (!if_prefix)
{
clib_warning ("Prefix not found while deleting %U",
- format_ip4_address_and_length, address, address_length);
+ format_ip6_address_and_length, address, address_length);
return;
}
@@ -283,7 +279,6 @@ ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index)
ip_interface_address_t *ia = 0;
ip6_address_t *result = 0;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm, ia, sw_if_index,
1 /* honor unnumbered */,
({
@@ -291,7 +286,6 @@ ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index)
result = a;
break;
}));
- /* *INDENT-ON* */
return result;
}
@@ -359,7 +353,6 @@ ip6_add_del_interface_address (vlib_main_t * vm,
vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
vec_add1 (addr_fib, ip6_af);
- /* *INDENT-OFF* */
if (!is_del)
{
/* When adding an address check that it does not conflict
@@ -417,7 +410,6 @@ ip6_add_del_interface_address (vlib_main_t * vm,
}
}
}
- /* *INDENT-ON* */
if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
@@ -537,7 +529,6 @@ ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
0 /* honor unnumbered */,
({
@@ -550,7 +541,6 @@ ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
ip6_del_interface_routes (sw_if_index, im, fib_index,
a, ia->address_length);
}));
- /* *INDENT-ON* */
return 0;
}
@@ -558,7 +548,6 @@ ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down);
/* Built-in ip6 unicast rx feature path definition */
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (ip6_unicast, static) =
{
.arc_name = "ip6-unicast",
@@ -683,7 +672,6 @@ VNET_FEATURE_INIT (ip6_interface_output, static) = {
.node_name = "interface-output",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
@@ -709,14 +697,21 @@ ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
vlib_main_t *vm = vlib_get_main ();
vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm6, ia, sw_if_index, 0,
({
address = ip_interface_address_get_address (lm6, ia);
ip6_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
}));
- /* *INDENT-ON* */
ip6_mfib_interface_enable_disable (sw_if_index, 0);
+
+ if (0 != im6->fib_index_by_sw_if_index[sw_if_index])
+ fib_table_bind (FIB_PROTOCOL_IP6, sw_if_index, 0);
+ if (0 != im6->mfib_index_by_sw_if_index[sw_if_index])
+ mfib_table_bind (FIB_PROTOCOL_IP6, sw_if_index, 0);
+
+ /* Erase the lookup tables just in case */
+ im6->fib_index_by_sw_if_index[sw_if_index] = ~0;
+ im6->mfib_index_by_sw_if_index[sw_if_index] = ~0;
}
vnet_feature_enable_disable ("ip6-unicast", "ip6-not-enabled", sw_if_index,
@@ -739,7 +734,6 @@ VLIB_NODE_FN (ip6_lookup_node) (vlib_main_t * vm,
static u8 *format_ip6_lookup_trace (u8 * s, va_list * args);
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_lookup_node) =
{
.name = "ip6-lookup",
@@ -748,7 +742,6 @@ VLIB_REGISTER_NODE (ip6_lookup_node) =
.n_next_nodes = IP6_LOOKUP_N_NEXT,
.next_nodes = IP6_LOOKUP_NEXT_NODES,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip6_load_balance_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -926,7 +919,6 @@ VLIB_NODE_FN (ip6_load_balance_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_load_balance_node) =
{
.name = "ip6-load-balance",
@@ -934,7 +926,6 @@ VLIB_REGISTER_NODE (ip6_load_balance_node) =
.sibling_of = "ip6-lookup",
.format_trace = format_ip6_lookup_trace,
};
-/* *INDENT-ON* */
typedef struct
{
@@ -957,8 +948,7 @@ format_ip6_forward_next_trace (u8 * s, va_list * args)
ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
u32 indent = format_get_indent (s);
- s = format (s, "%Ufib:%d adj:%d flow:%d",
- format_white_space, indent,
+ s = format (s, "%Ufib:%d adj:%d flow:0x%08x", format_white_space, indent,
t->fib_index, t->adj_index, t->flow_hash);
s = format (s, "\n%U%U",
format_white_space, indent,
@@ -1218,22 +1208,17 @@ always_inline u8
ip6_next_proto_is_tcp_udp (vlib_buffer_t * p0, ip6_header_t * ip0,
u32 * udp_offset0)
{
- u32 proto0;
- proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_UDP, udp_offset0);
- if (proto0 != IP_PROTOCOL_UDP)
- {
- proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_TCP, udp_offset0);
- proto0 = (proto0 == IP_PROTOCOL_TCP) ? proto0 : 0;
- }
- return proto0;
+ int nh = ip6_locate_header (p0, ip0, -1, udp_offset0);
+ if (nh > 0)
+ if (nh == IP_PROTOCOL_UDP || nh == IP_PROTOCOL_TCP)
+ return nh;
+ return 0;
}
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (ip6_local) = {
.arc_name = "ip6-local",
.start_nodes = VNET_FEATURES ("ip6-local", "ip6-receive"),
};
-/* *INDENT-ON* */
static_always_inline u8
ip6_tcp_udp_icmp_bad_length (vlib_main_t * vm, vlib_buffer_t * p0)
@@ -1270,7 +1255,7 @@ ip6_tcp_udp_icmp_bad_length (vlib_main_t * vm, vlib_buffer_t * p0)
}
n_bytes_left -= n_this_buffer;
- n_bytes_left -= p0->total_length_not_including_first_buffer;
+ n_bytes_left -= vlib_buffer_length_in_chain (vm, p0) - p0->current_length;
if (n_bytes_left == 0)
return 0;
@@ -1313,7 +1298,7 @@ ip6_local_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_prefetch_buffer_data (b[3], LOAD);
}
- ip6_error_t error[2];
+ vl_counter_ip6_enum_t error[2];
error[0] = IP6_ERROR_UNKNOWN_PROTOCOL;
error[1] = IP6_ERROR_UNKNOWN_PROTOCOL;
@@ -1469,6 +1454,11 @@ ip6_local_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
vnet_buffer (b[1])->ip.fib_index;
+
+ vnet_buffer (b[0])->ip.rx_sw_if_index =
+ vnet_buffer (b[0])->sw_if_index[VLIB_RX];
+ vnet_buffer (b[1])->ip.rx_sw_if_index =
+ vnet_buffer (b[1])->sw_if_index[VLIB_RX];
if (is_receive_dpo)
{
const receive_dpo_t *rd0, *rd1;
@@ -1476,13 +1466,10 @@ ip6_local_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
rd1 =
receive_dpo_get (vnet_buffer (b[1])->ip.adj_index[VLIB_TX]);
- vnet_buffer (b[0])->ip.rx_sw_if_index = rd0->rd_sw_if_index;
- vnet_buffer (b[1])->ip.rx_sw_if_index = rd1->rd_sw_if_index;
- }
- else
- {
- vnet_buffer (b[0])->ip.rx_sw_if_index = ~0;
- vnet_buffer (b[1])->ip.rx_sw_if_index = ~0;
+ if (rd0->rd_sw_if_index != ~0)
+ vnet_buffer (b[0])->ip.rx_sw_if_index = rd0->rd_sw_if_index;
+ if (rd1->rd_sw_if_index != ~0)
+ vnet_buffer (b[1])->ip.rx_sw_if_index = rd1->rd_sw_if_index;
}
} /* head_of_feature_arc */
@@ -1505,16 +1492,16 @@ ip6_local_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
u32 next32 = next[0];
vnet_feature_arc_start (arc_index,
- vnet_buffer (b[0])->sw_if_index
- [VLIB_RX], &next32, b[0]);
+ vnet_buffer (b[0])->ip.rx_sw_if_index,
+ &next32, b[0]);
next[0] = next32;
}
if (PREDICT_TRUE (ip6_unknown[1]))
{
u32 next32 = next[1];
vnet_feature_arc_start (arc_index,
- vnet_buffer (b[1])->sw_if_index
- [VLIB_RX], &next32, b[1]);
+ vnet_buffer (b[1])->ip.rx_sw_if_index,
+ &next32, b[1]);
next[1] = next32;
}
}
@@ -1611,14 +1598,16 @@ ip6_local_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
vnet_buffer (b[0])->ip.fib_index;
+
+ vnet_buffer (b[0])->ip.rx_sw_if_index =
+ vnet_buffer (b[0])->sw_if_index[VLIB_RX];
if (is_receive_dpo)
{
receive_dpo_t *rd;
rd = receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
- vnet_buffer (b[0])->ip.rx_sw_if_index = rd->rd_sw_if_index;
+ if (rd->rd_sw_if_index != ~0)
+ vnet_buffer (b[0])->ip.rx_sw_if_index = rd->rd_sw_if_index;
}
- else
- vnet_buffer (b[0])->ip.rx_sw_if_index = ~0;
} /* head_of_feature_arc */
next[0] = lm->local_next_by_ip_protocol[ip->protocol];
@@ -1633,8 +1622,8 @@ ip6_local_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
u32 next32 = next[0];
vnet_feature_arc_start (arc_index,
- vnet_buffer (b[0])->sw_if_index
- [VLIB_RX], &next32, b[0]);
+ vnet_buffer (b[0])->ip.rx_sw_if_index,
+ &next32, b[0]);
next[0] = next32;
}
}
@@ -1661,6 +1650,8 @@ VLIB_REGISTER_NODE (ip6_local_node) =
.name = "ip6-local",
.vector_size = sizeof (u32),
.format_trace = format_ip6_forward_next_trace,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
.n_next_nodes = IP_LOCAL_N_NEXT,
.next_nodes =
{
@@ -1668,7 +1659,7 @@ VLIB_REGISTER_NODE (ip6_local_node) =
[IP_LOCAL_NEXT_PUNT] = "ip6-punt",
[IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup",
[IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input",
- [IP_LOCAL_NEXT_REASSEMBLY] = "ip6-full-reassembly",
+ [IP_LOCAL_NEXT_REASSEMBLY] = "ip6-local-full-reassembly",
},
};
@@ -1979,13 +1970,6 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm,
if (is_midchain)
{
- /* before we paint on the next header, update the L4
- * checksums if required, since there's no offload on a tunnel */
- vnet_calc_checksums_inline (vm, p0, 0 /* is_ip4 */ ,
- 1 /* is_ip6 */ );
- vnet_calc_checksums_inline (vm, p1, 0 /* is_ip4 */ ,
- 1 /* is_ip6 */ );
-
/* Guess we are only writing on ipv6 header. */
vnet_rewrite_two_headers (adj0[0], adj1[0],
ip0, ip1, sizeof (ip6_header_t));
@@ -2079,9 +2063,6 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm,
if (is_midchain)
{
- vnet_calc_checksums_inline (vm, p0, 0 /* is_ip4 */ ,
- 1 /* is_ip6 */ );
-
/* Guess we are only writing on ip6 header. */
vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip6_header_t));
}
@@ -2231,14 +2212,12 @@ VLIB_NODE_FN (ip6_mcast_midchain_node) (vlib_main_t * vm,
return ip6_rewrite_inline (vm, node, frame, 0, 1, 1);
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip6_midchain_node) =
-{
+VLIB_REGISTER_NODE (ip6_midchain_node) = {
.name = "ip6-midchain",
.vector_size = sizeof (u32),
.format_trace = format_ip6_forward_next_trace,
.sibling_of = "ip6-rewrite",
- };
+};
VLIB_REGISTER_NODE (ip6_rewrite_node) =
{
@@ -2279,7 +2258,6 @@ VLIB_REGISTER_NODE (ip6_mcast_midchain_node) =
.sibling_of = "ip6-rewrite",
};
-/* *INDENT-ON* */
/*
* Hop-by-Hop handling
@@ -2293,7 +2271,6 @@ _(PROCESSED, "pkts with ip6 hop-by-hop options") \
_(FORMAT, "incorrectly formatted hop-by-hop options") \
_(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options")
-/* *INDENT-OFF* */
typedef enum
{
#define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
@@ -2301,7 +2278,6 @@ typedef enum
#undef _
IP6_HOP_BY_HOP_N_ERROR,
} ip6_hop_by_hop_error_t;
-/* *INDENT-ON* */
/*
* Primary h-b-h handler trace support
@@ -2728,7 +2704,6 @@ VLIB_NODE_FN (ip6_hop_by_hop_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_hop_by_hop_node) =
{
.name = "ip6-hop-by-hop",
@@ -2740,7 +2715,6 @@ VLIB_REGISTER_NODE (ip6_hop_by_hop_node) =
.error_strings = ip6_hop_by_hop_error_strings,
.n_next_nodes = 0,
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_hop_by_hop_init (vlib_main_t * vm)
@@ -2992,14 +2966,12 @@ set_ip6_flow_hash_command_fn (vlib_main_t * vm,
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) = {
.path = "set ip6 flow-hash",
.short_help = "set ip6 flow-hash table <table-id> [src] [dst] [sport] "
"[dport] [proto] [reverse] [flowlabel]",
.function = set_ip6_flow_hash_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_ip6_local_command_fn (vlib_main_t * vm,
@@ -3040,14 +3012,12 @@ show_ip6_local_command_fn (vlib_main_t * vm,
* 115
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip6_local, static) =
{
.path = "show ip6 local",
.function = show_ip6_local_command_fn,
.short_help = "show ip6 local",
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
int
@@ -3159,7 +3129,6 @@ set_ip6_classify_command_fn (vlib_main_t * vm,
* Example of how to assign a classification table to an interface:
* @cliexcmd{set ip6 classify intfc GigabitEthernet2/0/0 table-index 1}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ip6_classify_command, static) =
{
.path = "set ip6 classify",
@@ -3167,7 +3136,6 @@ VLIB_CLI_COMMAND (set_ip6_classify_command, static) =
"set ip6 classify intfc <interface> table-index <classify-idx>",
.function = set_ip6_classify_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip6_hop_by_hop.c b/src/vnet/ip/ip6_hop_by_hop.c
index e66084c2c4d..412741abcf8 100644
--- a/src/vnet/ip/ip6_hop_by_hop.c
+++ b/src/vnet/ip/ip6_hop_by_hop.c
@@ -438,8 +438,7 @@ VLIB_NODE_FN (ip6_add_hop_by_hop_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = /* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) =
{
.name = "ip6-add-hop-by-hop",
.vector_size = sizeof (u32),
@@ -455,7 +454,6 @@ VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = /* *INDENT-OFF* */
#undef _
},
};
-/* *INDENT-ON* */
/* The main h-b-h tracer was already invoked, no need to do much here */
typedef struct
@@ -778,7 +776,6 @@ VLIB_NODE_FN (ip6_pop_hop_by_hop_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) =
{
.name = "ip6-pop-hop-by-hop",
@@ -791,7 +788,6 @@ VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) =
/* See ip/lookup.h */
.n_next_nodes = 0,
};
-/* *INDENT-ON* */
typedef struct
{
@@ -1006,7 +1002,6 @@ VLIB_NODE_FN (ip6_local_hop_by_hop_node) (vlib_main_t * vm,
}
#ifndef CLIB_MARCH_VARIANT
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_local_hop_by_hop_node) =
{
.name = "ip6-local-hop-by-hop",
@@ -1025,7 +1020,6 @@ VLIB_REGISTER_NODE (ip6_local_hop_by_hop_node) =
[IP6_LOCAL_HOP_BY_HOP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
clib_error_t *
show_ip6_hbh_command_fn (vlib_main_t * vm,
@@ -1059,13 +1053,11 @@ show_ip6_hbh_command_fn (vlib_main_t * vm,
* Display ip6 local hop-by-hop next protocol handler nodes
* @cliexcmd{show ip6 hbh}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip6_hbh, static) = {
.path = "show ip6 hbh",
.short_help = "show ip6 hbh",
.function = show_ip6_hbh_command_fn,
};
-/* *INDENT-ON* */
#endif /* CLIB_MARCH_VARIANT */
@@ -1105,12 +1097,10 @@ ip6_hop_by_hop_ioam_init (vlib_main_t * vm)
return (0);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip6_hop_by_hop_ioam_init) =
{
.runs_after = VLIB_INITS("ip_main_init", "ip6_lookup_init"),
};
-/* *INDENT-ON* */
void
ip6_local_hop_by_hop_register_protocol (u32 protocol, u32 node_index)
@@ -1264,13 +1254,11 @@ clear_ioam_rewrite_command_fn (vlib_main_t * vm,
* Example of how to clear iOAM features:
* @cliexcmd{clear ioam rewrite}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_clear_ioam_rewrite_cmd, static) = {
.path = "clear ioam rewrite",
.short_help = "clear ioam rewrite",
.function = clear_ioam_rewrite_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
ip6_ioam_enable (int has_trace_option, int has_pot_option,
@@ -1371,13 +1359,11 @@ ip6_set_ioam_rewrite_command_fn (vlib_main_t * vm,
* Example of how to enable trace and pot with ppc set to encap:
* @cliexcmd{set ioam rewrite trace pot ppc encap}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_set_ioam_rewrite_cmd, static) = {
.path = "set ioam rewrite",
.short_help = "set ioam [trace] [pot] [seqno] [analyse]",
.function = ip6_set_ioam_rewrite_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_show_ioam_summary_cmd_fn (vlib_main_t * vm,
@@ -1455,13 +1441,11 @@ ip6_show_ioam_summary_cmd_fn (vlib_main_t * vm,
* EDGE TO EDGE - PPC OPTION - 1 (Encap)
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_show_ioam_run_cmd, static) = {
.path = "show ioam summary",
.short_help = "show ioam summary",
.function = ip6_show_ioam_summary_cmd_fn,
};
-/* *INDENT-ON* */
void
vnet_register_ioam_end_of_path_callback (void *cb)
diff --git a/src/vnet/ip/ip6_inlines.h b/src/vnet/ip/ip6_inlines.h
index 2a4bb70573b..9bd475224eb 100644
--- a/src/vnet/ip/ip6_inlines.h
+++ b/src/vnet/ip/ip6_inlines.h
@@ -49,29 +49,40 @@ always_inline u32
ip6_compute_flow_hash (const ip6_header_t * ip,
flow_hash_config_t flow_hash_config)
{
- tcp_header_t *tcp;
+ const tcp_header_t *tcp;
+ const udp_header_t *udp = (void *) (ip + 1);
+ const gtpv1u_header_t *gtpu = (void *) (udp + 1);
u64 a, b, c;
u64 t1, t2;
+ u32 t3;
uword is_tcp_udp = 0;
u8 protocol = ip->protocol;
+ uword is_udp = protocol == IP_PROTOCOL_UDP;
- if (PREDICT_TRUE
- ((ip->protocol == IP_PROTOCOL_TCP)
- || (ip->protocol == IP_PROTOCOL_UDP)))
+ if (PREDICT_TRUE ((protocol == IP_PROTOCOL_TCP) || is_udp))
{
is_tcp_udp = 1;
tcp = (void *) (ip + 1);
}
- else if (ip->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ else
{
- ip6_hop_by_hop_header_t *hbh = (ip6_hop_by_hop_header_t *) (ip + 1);
- if ((hbh->protocol == IP_PROTOCOL_TCP) ||
- (hbh->protocol == IP_PROTOCOL_UDP))
+ const void *cur = ip + 1;
+ if (protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ {
+ const ip6_hop_by_hop_header_t *hbh = cur;
+ protocol = hbh->protocol;
+ cur += (hbh->length + 1) * 8;
+ }
+ if (protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ {
+ const ip6_fragment_ext_header_t *frag = cur;
+ protocol = frag->protocol;
+ }
+ else if (protocol == IP_PROTOCOL_TCP || protocol == IP_PROTOCOL_UDP)
{
is_tcp_udp = 1;
- tcp = (tcp_header_t *) ((u8 *) hbh + ((hbh->length + 1) << 3));
+ tcp = cur;
}
- protocol = hbh->protocol;
}
t1 = (ip->src_address.as_u64[0] ^ ip->src_address.as_u64[1]);
@@ -113,7 +124,13 @@ ip6_compute_flow_hash (const ip6_header_t * ip,
((flow_hash_config & IP_FLOW_HASH_FL) ? ip6_flow_label_network_order (ip) :
0);
c ^= t1;
-
+ if (PREDICT_TRUE (is_udp) &&
+ PREDICT_FALSE ((flow_hash_config & IP_FLOW_HASH_GTPV1_TEID) &&
+ udp->dst_port == GTPV1_PORT_BE))
+ {
+ t3 = gtpu->teid;
+ a ^= t3;
+ }
hash_mix64 (a, b, c);
return (u32) c;
}
@@ -134,65 +151,17 @@ ip6_compute_flow_hash (const ip6_header_t * ip,
* it is a non-first fragment -1 is returned.
*/
always_inline int
-ip6_locate_header (vlib_buffer_t * p0,
- ip6_header_t * ip0, int find_hdr_type, u32 * offset)
+ip6_locate_header (vlib_buffer_t *b, ip6_header_t *ip, int find_hdr_type,
+ u32 *offset)
{
- u8 next_proto = ip0->protocol;
- u8 *next_header;
- u8 done = 0;
- u32 cur_offset;
- u8 *temp_nxthdr = 0;
- u32 exthdr_len = 0;
-
- next_header = ip6_next_header (ip0);
- cur_offset = sizeof (ip6_header_t);
- while (1)
+ ip6_ext_hdr_chain_t hdr_chain;
+ int res = ip6_ext_header_walk (b, ip, find_hdr_type, &hdr_chain);
+ if (res >= 0)
{
- done = (next_proto == find_hdr_type);
- if (PREDICT_FALSE
- (next_header >=
- (u8 *) vlib_buffer_get_current (p0) + p0->current_length))
- {
- //A malicious packet could set an extension header with a too big size
- return (-1);
- }
- if (done)
- break;
- if ((!ip6_ext_hdr (next_proto)) || next_proto == IP_PROTOCOL_IP6_NONXT)
- {
- if (find_hdr_type < 0)
- break;
- return -1;
- }
- if (next_proto == IP_PROTOCOL_IPV6_FRAGMENTATION)
- {
- ip6_frag_hdr_t *frag_hdr = (ip6_frag_hdr_t *) next_header;
- u16 frag_off = ip6_frag_hdr_offset (frag_hdr);
- /* Non first fragment return -1 */
- if (frag_off)
- return (-1);
- exthdr_len = sizeof (ip6_frag_hdr_t);
- temp_nxthdr = next_header + exthdr_len;
- }
- else if (next_proto == IP_PROTOCOL_IPSEC_AH)
- {
- exthdr_len =
- ip6_ext_authhdr_len (((ip6_ext_header_t *) next_header));
- temp_nxthdr = next_header + exthdr_len;
- }
- else
- {
- exthdr_len =
- ip6_ext_header_len (((ip6_ext_header_t *) next_header));
- temp_nxthdr = next_header + exthdr_len;
- }
- next_proto = ((ip6_ext_header_t *) next_header)->next_hdr;
- next_header = temp_nxthdr;
- cur_offset += exthdr_len;
+ *offset = hdr_chain.eh[res].offset;
+ return hdr_chain.eh[res].protocol;
}
-
- *offset = cur_offset;
- return (next_proto);
+ return -1;
}
diff --git a/src/vnet/ip/ip6_input.c b/src/vnet/ip/ip6_input.c
index 01b8f46b4d8..64c9d76ebaa 100644
--- a/src/vnet/ip/ip6_input.c
+++ b/src/vnet/ip/ip6_input.c
@@ -219,21 +219,12 @@ VLIB_NODE_FN (ip6_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return frame->n_vectors;
}
-#ifndef CLIB_MARCH_VARIANT
-char *ip6_error_strings[] = {
-#define _(sym,string) string,
- foreach_ip6_error
-#undef _
-};
-#endif /* CLIB_MARCH_VARIANT */
-
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_input_node) = {
.name = "ip6-input",
.vector_size = sizeof (u32),
.n_errors = IP6_N_ERROR,
- .error_strings = ip6_error_strings,
+ .error_counters = ip6_error_counters,
.n_next_nodes = IP6_INPUT_N_NEXT,
.next_nodes = {
@@ -246,7 +237,6 @@ VLIB_REGISTER_NODE (ip6_input_node) = {
.format_buffer = format_ip6_header,
.format_trace = format_ip6_input_trace,
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_init (vlib_main_t * vm)
diff --git a/src/vnet/ip/ip6_input.h b/src/vnet/ip/ip6_input.h
index fe993caa889..49e37ec1808 100644
--- a/src/vnet/ip/ip6_input.h
+++ b/src/vnet/ip/ip6_input.h
@@ -43,8 +43,6 @@
#include <vnet/ip/ip.h>
#include <vnet/ip/icmp6.h>
-extern char *ip6_error_strings[];
-
typedef enum
{
IP6_INPUT_NEXT_DROP,
diff --git a/src/vnet/ip/ip6_link.c b/src/vnet/ip/ip6_link.c
index afa9d8e3ea9..c2a7ccacbc1 100644
--- a/src/vnet/ip/ip6_link.c
+++ b/src/vnet/ip/ip6_link.c
@@ -242,12 +242,10 @@ ip6_link_delegate_flush (ip6_link_t * il)
{
ip6_link_delegate_t *ild;
- /* *INDENT-OFF* */
FOREACH_IP6_LINK_DELEGATE (ild, il,
({
il_delegate_vfts[ild->ild_type].ildv_disable(ild->ild_index);
}));
- /* *INDENT-ON* */
vec_free (il->il_delegates);
il->il_delegates = NULL;
@@ -357,14 +355,12 @@ ip6_link_set_local_address (u32 sw_if_index, const ip6_address_t * address)
ip6_address_copy (&ilp.ilp_addr, address);
ip6_ll_table_entry_update (&ilp, FIB_ROUTE_PATH_LOCAL);
- /* *INDENT-OFF* */
FOREACH_IP6_LINK_DELEGATE (ild, il,
({
if (NULL != il_delegate_vfts[ild->ild_type].ildv_ll_change)
il_delegate_vfts[ild->ild_type].ildv_ll_change(ild->ild_index,
&il->il_ll_addr);
}));
- /* *INDENT-ON* */
return (0);
}
@@ -465,7 +461,6 @@ ip6_link_add_del_address (ip6_main_t * im,
if (NULL == il)
return;
- /* *INDENT-OFF* */
FOREACH_IP6_LINK_DELEGATE (ild, il,
({
if (is_delete)
@@ -481,7 +476,6 @@ ip6_link_add_del_address (ip6_main_t * im,
address, address_length);
}
}));
- /* *INDENT-ON* */
}
static clib_error_t *
@@ -555,14 +549,12 @@ test_ip6_link_command_fn (vlib_main_t * vm,
* Original MAC address: 16:d9:e0:91:79:86
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_link_command, static) =
{
.path = "test ip6 link",
.function = test_ip6_link_command_fn,
.short_help = "test ip6 link <mac-address>",
};
-/* *INDENT-ON* */
static u8 *
ip6_print_addrs (u8 * s, u32 * addrs)
@@ -594,11 +586,10 @@ format_ip6_link (u8 * s, va_list * arg)
if (!ip6_link_is_enabled_i (il))
return (s);
- s = format (s, "%U is admin %s\n",
- format_vnet_sw_interface_name, vnm,
- vnet_get_sw_interface (vnm, il->il_sw_if_index),
- (vnet_sw_interface_is_admin_up (vnm, il->il_sw_if_index) ?
- "up" : "down"));
+ s = format (
+ s, "%U is admin %s\n", format_vnet_sw_if_index_name, vnm,
+ il->il_sw_if_index,
+ (vnet_sw_interface_is_admin_up (vnm, il->il_sw_if_index) ? "up" : "down"));
u32 ai;
u32 *link_scope = 0, *global_scope = 0;
@@ -660,13 +651,11 @@ format_ip6_link (u8 * s, va_list * arg)
s = format (s, "%U%U\n",
format_white_space, 4, format_ip6_address, &il->il_ll_addr);
- /* *INDENT-OFF* */
FOREACH_IP6_LINK_DELEGATE(ild, il,
({
s = format (s, "%U", il_delegate_vfts[ild->ild_type].ildv_format,
ild->ild_index, 2);
}));
- /* *INDENT-ON* */
return (s);
}
@@ -739,14 +728,12 @@ ip6_link_show (vlib_main_t * vm,
* show ip6 interface: IPv6 not enabled on interface
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_link_show_command, static) =
{
.path = "show ip6 interface",
.function = ip6_link_show,
.short_help = "show ip6 interface <interface>",
};
-/* *INDENT-ON* */
static clib_error_t *
enable_ip6_interface_cmd (vlib_main_t * vm,
@@ -779,14 +766,12 @@ enable_ip6_interface_cmd (vlib_main_t * vm,
* Example of how enable IPv6 on a given interface:
* @cliexcmd{enable ip6 interface GigabitEthernet2/0/0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (enable_ip6_interface_command, static) =
{
.path = "enable ip6 interface",
.function = enable_ip6_interface_cmd,
.short_help = "enable ip6 interface <interface>",
};
-/* *INDENT-ON* */
static clib_error_t *
disable_ip6_interface_cmd (vlib_main_t * vm,
@@ -819,14 +804,12 @@ disable_ip6_interface_cmd (vlib_main_t * vm,
* Example of how disable IPv6 on a given interface:
* @cliexcmd{disable ip6 interface GigabitEthernet2/0/0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (disable_ip6_interface_command, static) =
{
.path = "disable ip6 interface",
.function = disable_ip6_interface_cmd,
.short_help = "disable ip6 interface <interface>",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip6_ll_table.c b/src/vnet/ip/ip6_ll_table.c
index e4010bc43c4..f9172f6c50c 100644
--- a/src/vnet/ip/ip6_ll_table.c
+++ b/src/vnet/ip/ip6_ll_table.c
@@ -52,9 +52,8 @@ ip6_ll_fib_create (u32 sw_if_index)
vnet_main_t *vnm = vnet_get_main ();
u8 *desc;
- desc = format (NULL, "IP6-link-local:%U",
- format_vnet_sw_interface_name,
- vnm, vnet_get_sw_interface (vnm, sw_if_index));
+ desc = format (NULL, "IP6-link-local:%U", format_vnet_sw_if_index_name, vnm,
+ sw_if_index);
ip6_ll_table.ilt_fibs[sw_if_index] =
ip6_fib_table_create_and_lock (FIB_SOURCE_IP6_ND,
@@ -64,7 +63,6 @@ ip6_ll_fib_create (u32 sw_if_index)
* leave the default route as a drop, but fix fe::/10 to be a glean
* via the interface.
*/
- /* *INDENT-OFF* */
fib_prefix_t pfx = {
.fp_proto = FIB_PROTOCOL_IP6,
.fp_len = 10,
@@ -90,7 +88,6 @@ ip6_ll_fib_create (u32 sw_if_index)
1,
NULL,
FIB_ROUTE_PATH_FLAG_NONE);
- /* *INDENT-ON* */
}
static void
@@ -111,12 +108,17 @@ ip6_ll_table_entry_update (const ip6_ll_prefix_t * ilp,
.frp_flags = flags,
.frp_sw_if_index = ilp->ilp_sw_if_index,
.frp_proto = DPO_PROTO_IP6,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
};
- fib_prefix_t fp;
+ fib_prefix_t fp = { 0 };
- vec_validate (ip6_ll_table.ilt_fibs, ilp->ilp_sw_if_index);
+ if (flags & FIB_ROUTE_PATH_LOCAL)
+ rpath.frp_addr.ip6 = ilp->ilp_addr;
- if (0 == ip6_ll_fib_get (ilp->ilp_sw_if_index))
+ vec_validate_init_empty (ip6_ll_table.ilt_fibs, ilp->ilp_sw_if_index, ~0);
+
+ if (~0 == ip6_ll_fib_get (ilp->ilp_sw_if_index))
{
ip6_ll_fib_create (ilp->ilp_sw_if_index);
}
@@ -151,11 +153,12 @@ ip6_ll_table_entry_delete (const ip6_ll_prefix_t * ilp)
* if there are no ND sourced prefixes left, then we can clean up this FIB
*/
fib_index = ip6_ll_fib_get (ilp->ilp_sw_if_index);
- if (0 == fib_table_get_num_entries (fib_index,
- FIB_PROTOCOL_IP6, FIB_SOURCE_IP6_ND))
+ if (~0 != fib_index &&
+ 0 == fib_table_get_num_entries (fib_index, FIB_PROTOCOL_IP6,
+ FIB_SOURCE_IP6_ND))
{
fib_table_unlock (fib_index, FIB_PROTOCOL_IP6, FIB_SOURCE_IP6_ND);
- ip6_ll_table.ilt_fibs[ilp->ilp_sw_if_index] = 0;
+ ip6_ll_table.ilt_fibs[ilp->ilp_sw_if_index] = ~0;
}
}
@@ -273,8 +276,7 @@ ip6_ll_show_fib (vlib_main_t * vm,
u8 *s = NULL;
fib_index = ip6_ll_table.ilt_fibs[sw_if_index];
-
- if (0 == fib_index)
+ if (~0 == fib_index)
continue;
fib_table = fib_table_get (fib_index, FIB_PROTOCOL_IP6);
@@ -345,13 +347,21 @@ ip6_ll_show_fib (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_show_fib_command, static) = {
.path = "show ip6-ll",
.short_help = "show ip6-ll [summary] [interface] [<ip6-addr>[/<width>]] [detail]",
.function = ip6_ll_show_fib,
};
-/* *INDENT-ON* */
+
+static clib_error_t *
+ip6_ll_sw_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_add)
+{
+ vec_validate_init_empty (ip6_ll_table.ilt_fibs, sw_if_index, ~0);
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_ll_sw_interface_add_del);
static clib_error_t *
ip6_ll_module_init (vlib_main_t * vm)
diff --git a/src/vnet/ip/ip6_ll_types.c b/src/vnet/ip/ip6_ll_types.c
index a7ac164b05a..b074b6e991c 100644
--- a/src/vnet/ip/ip6_ll_types.c
+++ b/src/vnet/ip/ip6_ll_types.c
@@ -23,10 +23,8 @@ format_ip6_ll_prefix (u8 * s, va_list * args)
ip6_ll_prefix_t *ilp = va_arg (*args, ip6_ll_prefix_t *);
vnet_main_t *vnm = vnet_get_main ();
- s = format (s, "(%U, %U)",
- format_ip6_address, &ilp->ilp_addr,
- format_vnet_sw_interface_name,
- vnm, vnet_get_sw_interface (vnm, ilp->ilp_sw_if_index));
+ s = format (s, "(%U, %U)", format_ip6_address, &ilp->ilp_addr,
+ format_vnet_sw_if_index_name, vnm, ilp->ilp_sw_if_index);
return (s);
}
diff --git a/src/vnet/ip/ip6_packet.h b/src/vnet/ip/ip6_packet.h
index 7a8c31cee48..c506792ddcf 100644
--- a/src/vnet/ip/ip6_packet.h
+++ b/src/vnet/ip/ip6_packet.h
@@ -40,8 +40,9 @@
#ifndef included_ip6_packet_h
#define included_ip6_packet_h
-#include <vnet/tcp/tcp_packet.h>
+#include <vlib/vlib.h>
#include <vnet/ip/ip4_packet.h>
+#include <stdbool.h>
typedef union
{
@@ -62,13 +63,11 @@ typedef struct
} ip6_address_and_mask_t;
/* Packed so that the mhash key doesn't include uninitialized pad bytes */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
/* IP address must be first for ip_interface_address_get_address() to work */
ip6_address_t ip6_addr;
u32 fib_index;
}) ip6_address_fib_t;
-/* *INDENT-ON* */
always_inline void
ip6_addr_fib_init (ip6_address_fib_t * addr_fib,
@@ -424,97 +423,39 @@ ip6_copy_header (ip6_header_t * dst, const ip6_header_t * src)
dst->dst_address.as_uword[1] = src->dst_address.as_uword[1];
}
-always_inline void
-ip6_tcp_reply_x1 (ip6_header_t * ip0, tcp_header_t * tcp0)
-{
- {
- ip6_address_t src0, dst0;
-
- src0 = ip0->src_address;
- dst0 = ip0->dst_address;
- ip0->src_address = dst0;
- ip0->dst_address = src0;
- }
-
- {
- u16 src0, dst0;
-
- src0 = tcp0->src;
- dst0 = tcp0->dst;
- tcp0->src = dst0;
- tcp0->dst = src0;
- }
-}
-
-always_inline void
-ip6_tcp_reply_x2 (ip6_header_t * ip0, ip6_header_t * ip1,
- tcp_header_t * tcp0, tcp_header_t * tcp1)
-{
- {
- ip6_address_t src0, dst0, src1, dst1;
-
- src0 = ip0->src_address;
- src1 = ip1->src_address;
- dst0 = ip0->dst_address;
- dst1 = ip1->dst_address;
- ip0->src_address = dst0;
- ip1->src_address = dst1;
- ip0->dst_address = src0;
- ip1->dst_address = src1;
- }
-
- {
- u16 src0, dst0, src1, dst1;
-
- src0 = tcp0->src;
- src1 = tcp1->src;
- dst0 = tcp0->dst;
- dst1 = tcp1->dst;
- tcp0->src = dst0;
- tcp1->src = dst1;
- tcp0->dst = src0;
- tcp1->dst = src1;
- }
-}
-
-
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 data;
}) ip6_pad1_option_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 type;
u8 len;
u8 data[0];
}) ip6_padN_option_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
#define IP6_MLDP_ALERT_TYPE 0x5
u8 type;
u8 len;
u16 value;
}) ip6_router_alert_option_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u8 protocol;
+ u8 reserved;
+ u16 fragoff;
+ u32 id;
+}) ip6_fragment_ext_header_t;
+
typedef CLIB_PACKED (struct {
u8 next_hdr;
/* Length of this header plus option data in 8 byte units. */
u8 n_data_u64s;
}) ip6_ext_header_t;
-/* *INDENT-ON* */
#define foreach_ext_hdr_type \
_(IP6_HOP_BY_HOP_OPTIONS) \
_(IPV6_ROUTE) \
- _(IPV6_FRAGMENTATION) \
- _(IPSEC_ESP) \
- _(IPSEC_AH) \
_(IP6_DESTINATION_OPTIONS) \
_(MOBILITY) \
_(HIP) \
@@ -542,15 +483,70 @@ ip6_ext_hdr (u8 nexthdr)
#endif
}
+typedef CLIB_PACKED (struct {
+ u8 next_hdr;
+ /* Length of this header plus option data in 8 byte units. */
+ u8 n_data_u64s;
+ u8 data[0];
+}) ip6_hop_by_hop_ext_t;
+
+typedef CLIB_PACKED (struct {
+ u8 next_hdr;
+ u8 rsv;
+ u16 fragment_offset_and_more;
+ u32 identification;
+}) ip6_frag_hdr_t;
+
+#define ip6_frag_hdr_offset(hdr) \
+ (clib_net_to_host_u16 ((hdr)->fragment_offset_and_more) >> 3)
+
+#define ip6_frag_hdr_offset_bytes(hdr) (8 * ip6_frag_hdr_offset (hdr))
+
+#define ip6_frag_hdr_more(hdr) \
+ (clib_net_to_host_u16 ((hdr)->fragment_offset_and_more) & 0x1)
+
+#define ip6_frag_hdr_offset_and_more(offset, more) \
+ clib_host_to_net_u16 (((offset) << 3) + !!(more))
+
#define ip6_ext_header_len(p) ((((ip6_ext_header_t *)(p))->n_data_u64s+1) << 3)
#define ip6_ext_authhdr_len(p) ((((ip6_ext_header_t *)(p))->n_data_u64s+2) << 2)
+static inline int
+ip6_ext_header_len_s (ip_protocol_t nh, void *p)
+{
+ if (ip6_ext_hdr (nh))
+ return ip6_ext_header_len (p);
+ switch (nh)
+ {
+ case IP_PROTOCOL_IPSEC_AH:
+ return ip6_ext_authhdr_len (p);
+ case IP_PROTOCOL_IPV6_FRAGMENTATION:
+ return sizeof (ip6_frag_hdr_t);
+ case IP_PROTOCOL_ICMP6:
+ return 4;
+ case IP_PROTOCOL_UDP:
+ return 8;
+ case IP_PROTOCOL_TCP:
+ return 20;
+ default: /* Caller is responsible for validating the length of terminating
+ protocols */
+ ;
+ }
+ return 0;
+}
+
always_inline void *
ip6_ext_next_header (ip6_ext_header_t * ext_hdr)
{
return (void *) ((u8 *) ext_hdr + ip6_ext_header_len (ext_hdr));
}
+always_inline void *
+ip6_ext_next_header_offset (void *hdr, u16 offset)
+{
+ return (hdr + offset);
+}
+
always_inline int
vlib_object_within_buffer_data (vlib_main_t * vm, vlib_buffer_t * b,
void *obj, size_t len)
@@ -562,153 +558,144 @@ vlib_object_within_buffer_data (vlib_main_t * vm, vlib_buffer_t * b,
return 1;
}
-/*
- * find ipv6 extension header within ipv6 header within buffer b
- *
- * @param vm
- * @param b buffer to limit search to
- * @param ip6_header ipv6 header
- * @param header_type extension header type to search for
- * @param[out] prev_ext_header address of header preceding found header
- */
+/* Returns the number of bytes left in buffer from p. */
+static inline u32
+vlib_bytes_left_in_buffer (vlib_buffer_t *b, void *obj)
+{
+ return b->current_length - (((u8 *) obj - b->data) - b->current_data);
+}
+
always_inline void *
-ip6_ext_header_find (vlib_main_t * vm, vlib_buffer_t * b,
- ip6_header_t * ip6_header, u8 header_type,
- ip6_ext_header_t ** prev_ext_header)
+ip6_ext_next_header_s (ip_protocol_t cur_nh, void *hdr, u32 max_offset,
+ u32 *offset, int *res_nh, bool *last)
{
- ip6_ext_header_t *prev = NULL;
- ip6_ext_header_t *result = NULL;
- if ((ip6_header)->protocol == header_type)
+ u16 hdrlen = 0;
+ int new_nh = -1;
+ void *res = 0;
+ if (ip6_ext_hdr (cur_nh))
{
- result = (void *) (ip6_header + 1);
- if (!vlib_object_within_buffer_data (vm, b, result,
- ip6_ext_header_len (result)))
- {
- result = NULL;
- }
+ hdrlen = ip6_ext_header_len (hdr);
+ new_nh = ((ip6_ext_header_t *) hdr)->next_hdr;
+ res = hdr + hdrlen;
+ }
+ else if (cur_nh == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ {
+ ip6_frag_hdr_t *frag_hdr = (ip6_frag_hdr_t *) hdr;
+ if (ip6_frag_hdr_offset (frag_hdr) > 0)
+ *last = true;
+ new_nh = frag_hdr->next_hdr;
+ hdrlen = sizeof (ip6_frag_hdr_t);
+ res = hdr + hdrlen;
+ }
+ else if (cur_nh == IP_PROTOCOL_IPSEC_AH)
+ {
+ new_nh = ((ip6_ext_header_t *) hdr)->next_hdr;
+ hdrlen = ip6_ext_authhdr_len (hdr);
+ res = hdr + hdrlen;
}
else
{
- result = NULL;
- prev = (void *) (ip6_header + 1);
- while (ip6_ext_hdr (prev->next_hdr) && prev->next_hdr != header_type)
- {
- prev = ip6_ext_next_header (prev);
- if (!vlib_object_within_buffer_data (vm, b, prev,
- ip6_ext_header_len (prev)))
- {
- prev = NULL;
- break;
- }
- }
- if (prev && (prev->next_hdr == header_type))
- {
- result = ip6_ext_next_header (prev);
- if (!vlib_object_within_buffer_data (vm, b, result,
- ip6_ext_header_len (result)))
- {
- result = NULL;
- }
- }
+ ;
}
- if (prev_ext_header)
+
+ if (res && (*offset + hdrlen) >= max_offset)
{
- *prev_ext_header = prev;
+ return 0;
}
- return result;
+ *res_nh = new_nh;
+ *offset += hdrlen;
+ return res;
}
+#define IP6_EXT_HDR_MAX (4) /* Maximum number of headers */
+#define IP6_EXT_HDR_MAX_DEPTH (256) /* Maximum header depth */
+typedef struct
+{
+ int length;
+ struct
+ {
+ u16 protocol;
+ u16 offset;
+ } eh[IP6_EXT_HDR_MAX];
+} ip6_ext_hdr_chain_t;
+
/*
- * walk extension headers, looking for a specific extension header and last
- * extension header, calculating length of all extension headers
+ * Find ipv6 extension header within ipv6 header within
+ * whichever is smallest of buffer or IP6_EXT_HDR_MAX_DEPTH.
+ * The complete header chain must be in first buffer.
*
- * @param vm
- * @param b buffer to limit search to
- * @param ip6_header ipv6 header
- * @param find_hdr extension header to look for (ignored if ext_hdr is NULL)
- * @param length[out] length of all extension headers
- * @param ext_hdr[out] extension header of type find_hdr (may be NULL)
- * @param last_ext_hdr[out] last extension header (may be NULL)
- *
- * @return 0 on success, -1 on failure (ext headers crossing buffer boundary)
+ * The complete header chain (up to the terminating header) is
+ * returned in res.
+ * Returns the index of the find_hdr_type if > 0. Otherwise
+ * it returns the index of the last header.
*/
always_inline int
-ip6_walk_ext_hdr (vlib_main_t * vm, vlib_buffer_t * b,
- const ip6_header_t * ip6_header, u8 find_hdr, u32 * length,
- ip6_ext_header_t ** ext_hdr,
- ip6_ext_header_t ** last_ext_hdr)
-{
- if (!ip6_ext_hdr (ip6_header->protocol))
- {
- *length = 0;
- *ext_hdr = NULL;
- *last_ext_hdr = NULL;
- return 0;
- }
- *length = 0;
- ip6_ext_header_t *h = (void *) (ip6_header + 1);
- if (!vlib_object_within_buffer_data (vm, b, h, ip6_ext_header_len (h)))
+ip6_ext_header_walk (vlib_buffer_t *b, ip6_header_t *ip, int find_hdr_type,
+ ip6_ext_hdr_chain_t *res)
+{
+ int i = 0;
+ int found = -1;
+ void *next_header = ip6_next_header (ip);
+ int next_proto = ip->protocol;
+ res->length = 0;
+ u32 n_bytes_this_buffer =
+ clib_min (vlib_bytes_left_in_buffer (b, ip), IP6_EXT_HDR_MAX_DEPTH);
+ u32 max_offset = clib_min (n_bytes_this_buffer,
+ sizeof (ip6_header_t) +
+ clib_net_to_host_u16 (ip->payload_length));
+ u32 offset = sizeof (ip6_header_t);
+ if ((ip6_ext_header_len_s (ip->protocol, next_header) + offset) > max_offset)
{
return -1;
}
- *length += ip6_ext_header_len (h);
- *last_ext_hdr = h;
- *ext_hdr = NULL;
- if (ip6_header->protocol == find_hdr)
+ bool last = false;
+ while (next_header)
{
- *ext_hdr = h;
+ /* Move on to next header */
+ res->eh[i].offset = offset;
+ res->eh[i].protocol = next_proto;
+ if (next_proto == find_hdr_type)
+ found = i;
+ i++;
+ if (last)
+ break;
+ if (i >= IP6_EXT_HDR_MAX)
+ break;
+ next_header = ip6_ext_next_header_s (next_proto, next_header, max_offset,
+ &offset, &next_proto, &last);
}
- while (ip6_ext_hdr (h->next_hdr))
+ res->length = i;
+ if (find_hdr_type < 0)
+ return i - 1;
+ return found != -1 ? found : i - 1;
+}
+
+always_inline void *
+ip6_ext_header_find (vlib_main_t *vm, vlib_buffer_t *b, ip6_header_t *ip,
+ int find_hdr_type, ip6_ext_header_t **prev_ext_header)
+{
+ ip6_ext_hdr_chain_t hdr_chain;
+ int res = ip6_ext_header_walk (b, ip, find_hdr_type, &hdr_chain);
+ if (res < 0)
+ return 0;
+
+ if (prev_ext_header)
{
- if (h->next_hdr == find_hdr)
+ if (res > 0)
{
- h = ip6_ext_next_header (h);
- *ext_hdr = h;
+ *prev_ext_header =
+ ip6_ext_next_header_offset (ip, hdr_chain.eh[res - 1].offset);
}
else
{
- h = ip6_ext_next_header (h);
+ *prev_ext_header = 0;
}
- if (!vlib_object_within_buffer_data (vm, b, h, ip6_ext_header_len (h)))
- {
- return -1;
- }
- *length += ip6_ext_header_len (h);
- *last_ext_hdr = h;
}
+ if (find_hdr_type == hdr_chain.eh[res].protocol)
+ return ip6_ext_next_header_offset (ip, hdr_chain.eh[res].offset);
return 0;
}
-/* *INDENT-OFF* */
-typedef CLIB_PACKED (struct {
- u8 next_hdr;
- /* Length of this header plus option data in 8 byte units. */
- u8 n_data_u64s;
- u8 data[0];
-}) ip6_hop_by_hop_ext_t;
-/* *INDENT-ON* */
-
-/* *INDENT-OFF* */
-typedef CLIB_PACKED (struct {
- u8 next_hdr;
- u8 rsv;
- u16 fragment_offset_and_more;
- u32 identification;
-}) ip6_frag_hdr_t;
-/* *INDENT-ON* */
-
-#define ip6_frag_hdr_offset(hdr) \
- (clib_net_to_host_u16((hdr)->fragment_offset_and_more) >> 3)
-
-#define ip6_frag_hdr_offset_bytes(hdr) \
- (8 * ip6_frag_hdr_offset(hdr))
-
-#define ip6_frag_hdr_more(hdr) \
- (clib_net_to_host_u16((hdr)->fragment_offset_and_more) & 0x1)
-
-#define ip6_frag_hdr_offset_and_more(offset, more) \
- clib_host_to_net_u16(((offset) << 3) + !!(more))
-
#endif /* included_ip6_packet_h */
/*
diff --git a/src/vnet/ip/ip6_punt_drop.c b/src/vnet/ip/ip6_punt_drop.c
index 4edb673c3fa..78ca9521f53 100644
--- a/src/vnet/ip/ip6_punt_drop.c
+++ b/src/vnet/ip/ip6_punt_drop.c
@@ -18,7 +18,6 @@
#include <vnet/policer/policer.h>
#include <vnet/policer/police_inlines.h>
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (ip6_punt) =
{
.arc_name = "ip6-punt",
@@ -30,7 +29,6 @@ VNET_FEATURE_ARC_INIT (ip6_drop) =
.arc_name = "ip6-drop",
.start_nodes = VNET_FEATURES ("ip6-drop", "ip6-not-enabled"),
};
-/* *INDENT-ON* */
extern ip_punt_policer_t ip6_punt_policer_cfg;
@@ -77,7 +75,6 @@ VLIB_NODE_FN (ip6_punt_policer_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_punt_policer_node) = {
.name = "ip6-punt-policer",
@@ -99,7 +96,6 @@ VNET_FEATURE_INIT (ip6_punt_policer_node, static) = {
.node_name = "ip6-punt-policer",
.runs_before = VNET_FEATURES("ip6-punt-redirect")
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip6_drop_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
@@ -134,7 +130,6 @@ VLIB_NODE_FN (ip6_punt_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vnet_feat_arc_ip6_punt.feature_arc_index);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_drop_node) =
{
.name = "ip6-drop",
@@ -146,15 +141,11 @@ VLIB_REGISTER_NODE (ip6_drop_node) =
},
};
-VLIB_REGISTER_NODE (ip6_not_enabled_node) =
-{
+VLIB_REGISTER_NODE (ip6_not_enabled_node) = {
.name = "ip6-not-enabled",
.vector_size = sizeof (u32),
.format_trace = format_ip6_forward_next_trace,
- .n_next_nodes = 1,
- .next_nodes = {
- [0] = "error-drop",
- },
+ .sibling_of = "ip6-drop",
};
VLIB_REGISTER_NODE (ip6_punt_node) =
@@ -179,7 +170,6 @@ VNET_FEATURE_INIT (ip6_drop_end_of_arc, static) = {
.node_name = "error-drop",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON */
#ifndef CLIB_MARCH_VARIANT
void
@@ -243,7 +233,6 @@ done:
* @cliexpar
* @cliexcmd{set ip punt policer <INDEX>}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_punt_policer_command, static) =
{
.path = "ip6 punt policer",
@@ -251,7 +240,6 @@ VLIB_CLI_COMMAND (ip6_punt_policer_command, static) =
.short_help = "ip6 punt policer [add|del] <index>",
};
-/* *INDENT-ON* */
#define foreach_ip6_punt_redirect_error \
_(DROP, "ip6 punt redirect drop")
@@ -279,7 +267,6 @@ VLIB_NODE_FN (ip6_punt_redirect_node) (vlib_main_t * vm,
FIB_PROTOCOL_IP6));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_punt_redirect_node) = {
.name = "ip6-punt-redirect",
.vector_size = sizeof (u32),
@@ -301,10 +288,11 @@ VNET_FEATURE_INIT (ip6_punt_redirect_node, static) = {
.node_name = "ip6-punt-redirect",
.runs_before = VNET_FEATURES("error-punt")
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
+static u32 ip6_punt_redirect_enable_counts;
+
void
ip6_punt_redirect_add_paths (u32 rx_sw_if_index,
const fib_route_path_t *rpaths)
@@ -313,13 +301,16 @@ ip6_punt_redirect_add_paths (u32 rx_sw_if_index,
rx_sw_if_index,
FIB_FORW_CHAIN_TYPE_UNICAST_IP6, rpaths);
- vnet_feature_enable_disable ("ip6-punt", "ip6-punt-redirect", 0, 1, 0, 0);
+ if (1 == ++ip6_punt_redirect_enable_counts)
+ vnet_feature_enable_disable ("ip6-punt", "ip6-punt-redirect", 0, 1, 0, 0);
}
void
ip6_punt_redirect_del (u32 rx_sw_if_index)
{
- vnet_feature_enable_disable ("ip6-punt", "ip6-punt-redirect", 0, 0, 0, 0);
+ ASSERT (ip6_punt_redirect_enable_counts);
+ if (0 == --ip6_punt_redirect_enable_counts)
+ vnet_feature_enable_disable ("ip6-punt", "ip6-punt-redirect", 0, 0, 0, 0);
ip_punt_redirect_del (FIB_PROTOCOL_IP6, rx_sw_if_index);
}
@@ -392,14 +383,12 @@ done:
* @cliexpar
* @cliexcmd{set ip punt policer <INDEX>}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_punt_redirect_command, static) =
{
.path = "ip6 punt redirect",
.function = ip6_punt_redirect_cmd,
.short_help = "ip6 punt redirect [add|del] rx [<interface>|all] via [<nh>] <tx_interface>",
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
@@ -420,7 +409,6 @@ ip6_punt_redirect_show_cmd (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{set ip punt policer <INDEX>}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip6_punt_redirect_command, static) =
{
.path = "show ip6 punt redirect",
@@ -428,7 +416,6 @@ VLIB_CLI_COMMAND (show_ip6_punt_redirect_command, static) =
.short_help = "show ip6 punt redirect",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip6_to_ip4.h b/src/vnet/ip/ip6_to_ip4.h
index 6a533e3b54e..29d5718d4da 100644
--- a/src/vnet/ip/ip6_to_ip4.h
+++ b/src/vnet/ip/ip6_to_ip4.h
@@ -31,7 +31,6 @@ typedef int (*ip6_to_ip4_tcp_udp_set_fn_t) (vlib_buffer_t * b,
ip6_header_t * ip6,
ip4_header_t * ip4, void *ctx);
-/* *INDENT-OFF* */
static u8 icmp6_to_icmp_updater_pointer_table[] =
{ 0, 1, ~0, ~0,
2, 2, 9, 8,
@@ -44,7 +43,6 @@ static u8 icmp6_to_icmp_updater_pointer_table[] =
24, 24, 24, 24,
24, 24, 24, 24
};
-/* *INDENT-ON* */
#define frag_id_6to4(id) ((id) ^ ((id) >> 16))
@@ -62,41 +60,25 @@ static u8 icmp6_to_icmp_updater_pointer_table[] =
* @returns 0 on success, non-zero value otherwise.
*/
static_always_inline int
-ip6_parse (vlib_main_t * vm, vlib_buffer_t * b, const ip6_header_t * ip6,
- u32 buff_len, u8 * l4_protocol, u16 * l4_offset,
- u16 * frag_hdr_offset)
+ip6_parse (vlib_main_t *vm, vlib_buffer_t *b, ip6_header_t *ip6, u32 buff_len,
+ u8 *l4_protocol, u16 *l4_offset, u16 *frag_hdr_offset)
{
- ip6_ext_header_t *last_hdr, *frag_hdr;
- u32 length;
- if (ip6_walk_ext_hdr
- (vm, b, ip6, IP_PROTOCOL_IPV6_FRAGMENTATION, &length, &frag_hdr,
- &last_hdr))
+ ip6_ext_hdr_chain_t hdr_chain;
+ int res =
+ ip6_ext_header_walk (b, ip6, IP_PROTOCOL_IPV6_FRAGMENTATION, &hdr_chain);
+ if (res < 0)
{
return -1;
}
-
- if (length > 0)
- {
- if (frag_hdr)
- {
- *frag_hdr_offset = (u8 *) frag_hdr - (u8 *) ip6;
- }
- else
- {
- *frag_hdr_offset = 0;
- }
- *l4_protocol = last_hdr->next_hdr;
- }
+ if (hdr_chain.eh[res].protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ *frag_hdr_offset = hdr_chain.eh[res].offset;
else
- {
- *frag_hdr_offset = 0;
- *l4_protocol = ip6->protocol;
- }
- *l4_offset = sizeof (*ip6) + length;
+ *frag_hdr_offset = 0;
- return (buff_len < (*l4_offset + 4)) ||
- (clib_net_to_host_u16 (ip6->payload_length) <
- (*l4_offset + 4 - sizeof (*ip6)));
+ *l4_protocol = hdr_chain.eh[hdr_chain.length - 1].protocol;
+ *l4_offset = hdr_chain.eh[hdr_chain.length - 1].offset;
+
+ return 0;
}
/**
@@ -124,13 +106,13 @@ ip6_get_port (vlib_main_t * vm, vlib_buffer_t * b, ip6_header_t * ip6,
u16 frag_offset;
u8 *l4;
- if (ip6_parse
- (vm, b, ip6, buffer_len, &l4_protocol, &l4_offset, &frag_offset))
- return 0;
-
+ if (ip6_parse (vm, b, ip6, buffer_len, &l4_protocol, &l4_offset,
+ &frag_offset))
+ {
+ return 0;
+ }
if (frag_offset &&
- ip6_frag_hdr_offset (((ip6_frag_hdr_t *)
- u8_ptr_add (ip6, frag_offset))))
+ ip6_frag_hdr_offset (((ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset))))
return 0; //Can't deal with non-first fragment for now
if (ip_protocol)
diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c
index e197057d8c5..644b4988abc 100644
--- a/src/vnet/ip/ip_api.c
+++ b/src/vnet/ip/ip_api.c
@@ -106,7 +106,6 @@ vl_api_ip_table_dump_t_handler (vl_api_ip_table_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (fib_table, ip4_main.fibs)
{
send_ip_table_details(am, reg, mp->context, fib_table);
@@ -118,7 +117,6 @@ vl_api_ip_table_dump_t_handler (vl_api_ip_table_dump_t * mp)
continue;
send_ip_table_details(am, reg, mp->context, fib_table);
}
- /* *INDENT-ON* */
}
typedef struct vl_api_ip_fib_dump_walk_ctx_t_
@@ -326,7 +324,6 @@ vl_api_ip_mtable_dump_t_handler (vl_api_ip_mtable_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (mfib_table, ip4_main.mfibs)
{
send_ip_mtable_details (reg, mp->context, mfib_table);
@@ -335,7 +332,6 @@ vl_api_ip_mtable_dump_t_handler (vl_api_ip_mtable_dump_t * mp)
{
send_ip_mtable_details (reg, mp->context, mfib_table);
}
- /* *INDENT-ON* */
}
typedef struct vl_api_ip_mfib_dump_ctx_t_
@@ -514,7 +510,9 @@ vl_api_add_del_ip_punt_redirect_v2_t_handler (
goto out;
if (0 != n_paths)
- vec_validate (rpaths, n_paths - 1);
+ {
+ vec_validate (rpaths, n_paths - 1);
+ }
for (ii = 0; ii < n_paths; ii++)
{
@@ -780,12 +778,10 @@ vl_api_ip_route_add_del_t_handler (vl_api_ip_route_add_del_t * mp)
rv = ip_route_add_del_t_handler (mp, &stats_index);
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IP_ROUTE_ADD_DEL_REPLY,
({
rmp->stats_index = htonl (stats_index);
}))
- /* *INDENT-ON* */
}
void
@@ -837,7 +833,6 @@ vl_api_ip_route_lookup_t_handler (vl_api_ip_route_lookup_t * mp)
}
}
- /* *INDENT-OFF* */
REPLY_MACRO3_ZERO(VL_API_IP_ROUTE_LOOKUP_REPLY,
npaths * sizeof (*fp),
({
@@ -857,7 +852,6 @@ vl_api_ip_route_lookup_t_handler (vl_api_ip_route_lookup_t * mp)
}
}
}));
- /* *INDENT-ON* */
vec_free (rpaths);
}
@@ -1047,12 +1041,10 @@ vl_api_ip_mroute_add_del_t_handler (vl_api_ip_mroute_add_del_t * mp)
rv = api_mroute_add_del_t_handler (mp, &stats_index);
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IP_MROUTE_ADD_DEL_REPLY,
({
rmp->stats_index = htonl (stats_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -1115,7 +1107,6 @@ vl_api_ip_address_dump_t_handler (vl_api_ip_address_dump_t * mp)
if (mp->is_ipv6)
{
- /* *INDENT-OFF* */
/* Do not send subnet details of the IP-interface for
* unnumbered interfaces. otherwise listening clients
* will be confused that the subnet is applied on more
@@ -1129,11 +1120,9 @@ vl_api_ip_address_dump_t_handler (vl_api_ip_address_dump_t * mp)
};
send_ip_address_details(am, reg, &pfx, sw_if_index, mp->context);
}));
- /* *INDENT-ON* */
}
else
{
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
({
fib_prefix_t pfx = {
@@ -1144,7 +1133,6 @@ vl_api_ip_address_dump_t_handler (vl_api_ip_address_dump_t * mp)
send_ip_address_details(am, reg, &pfx, sw_if_index, mp->context);
}));
- /* *INDENT-ON* */
}
BAD_SW_IF_INDEX_LABEL;
@@ -1201,7 +1189,6 @@ vl_api_ip_unnumbered_dump_t_handler (vl_api_ip_unnumbered_dump_t * mp)
}
else
{
- /* *INDENT-OFF* */
pool_foreach (si, im->sw_interfaces)
{
if ((si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED))
@@ -1212,7 +1199,6 @@ vl_api_ip_unnumbered_dump_t_handler (vl_api_ip_unnumbered_dump_t * mp)
mp->context);
}
}
- /* *INDENT-ON* */
}
BAD_SW_IF_INDEX_LABEL;
@@ -1235,13 +1221,11 @@ vl_api_ip_dump_t_handler (vl_api_ip_dump_t * mp)
/* Gather interfaces. */
sorted_sis = vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces));
- _vec_len (sorted_sis) = 0;
- /* *INDENT-OFF* */
+ vec_set_len (sorted_sis, 0);
pool_foreach (si, im->sw_interfaces)
{
vec_add1 (sorted_sis, si[0]);
}
- /* *INDENT-ON* */
vec_foreach (si, sorted_sis)
{
@@ -1296,6 +1280,22 @@ vl_api_set_ip_flow_hash_v2_t_handler (vl_api_set_ip_flow_hash_v2_t *mp)
}
static void
+vl_api_set_ip_flow_hash_v3_t_handler (vl_api_set_ip_flow_hash_v3_t *mp)
+{
+ vl_api_set_ip_flow_hash_v3_reply_t *rmp;
+ ip_address_family_t af;
+ int rv;
+
+ rv = ip_address_family_decode (mp->af, &af);
+
+ if (!rv)
+ rv = ip_flow_hash_set (af, htonl (mp->table_id),
+ htonl (mp->flow_hash_config));
+
+ REPLY_MACRO (VL_API_SET_IP_FLOW_HASH_V3_REPLY);
+}
+
+static void
vl_api_set_ip_flow_hash_router_id_t_handler (
vl_api_set_ip_flow_hash_router_id_t *mp)
{
@@ -1705,7 +1705,6 @@ vl_api_ip_table_flush_t_handler (vl_api_ip_table_flush_t * mp)
vnet_sw_interface_t *si;
/* Shut down interfaces in this FIB / clean out intfc routes */
- /* *INDENT-OFF* */
pool_foreach (si, im->sw_interfaces)
{
if (fib_index == fib_table_get_index_for_sw_if_index (fproto,
@@ -1716,7 +1715,6 @@ vl_api_ip_table_flush_t_handler (vl_api_ip_table_flush_t * mp)
vnet_sw_interface_set_flags (vnm, si->sw_if_index, flags);
}
}
- /* *INDENT-ON* */
fib_table_flush (fib_index, fproto, FIB_SOURCE_API);
mfib_table_flush (mfib_table_find (fproto, ntohl (mp->table.table_id)),
@@ -1873,6 +1871,30 @@ void
REPLY_MACRO (VL_API_IP_REASSEMBLY_ENABLE_DISABLE_REPLY);
}
+void
+vl_api_ip_local_reass_enable_disable_t_handler (
+ vl_api_ip_local_reass_enable_disable_t *mp)
+{
+ vl_api_ip_local_reass_enable_disable_reply_t *rmp;
+ int rv = 0;
+
+ ip4_local_full_reass_enable_disable (mp->enable_ip4);
+ ip6_local_full_reass_enable_disable (mp->enable_ip6);
+
+ REPLY_MACRO (VL_API_IP_LOCAL_REASS_ENABLE_DISABLE_REPLY);
+}
+
+void
+vl_api_ip_local_reass_get_t_handler (vl_api_ip_local_reass_get_t *mp)
+{
+ vl_api_ip_local_reass_get_reply_t *rmp;
+ int rv = 0;
+ REPLY_MACRO2 (VL_API_IP_LOCAL_REASS_GET, {
+ rmp->ip4_is_enabled = ip4_local_full_reass_enabled ();
+ rmp->ip6_is_enabled = ip6_local_full_reass_enabled ();
+ });
+}
+
static walk_rc_t
send_ip_punt_redirect_details (u32 rx_sw_if_index,
const ip_punt_redirect_rx_t * ipr, void *arg)
@@ -2091,17 +2113,21 @@ ip_api_hookup (vlib_main_t * vm)
api_main_t *am = vlibapi_get_main ();
/*
- * Mark the route add/del API as MP safe
+ * Set up the (msg_name, crc, message-id) table
*/
- am->is_mp_safe[VL_API_IP_ROUTE_ADD_DEL] = 1;
- am->is_mp_safe[VL_API_IP_ROUTE_ADD_DEL_REPLY] = 1;
- am->is_mp_safe[VL_API_IP_ROUTE_ADD_DEL_V2] = 1;
- am->is_mp_safe[VL_API_IP_ROUTE_ADD_DEL_V2_REPLY] = 1;
+ REPLY_MSG_ID_BASE = setup_message_id_table ();
/*
- * Set up the (msg_name, crc, message-id) table
+ * Mark the route add/del API as MP safe
*/
- REPLY_MSG_ID_BASE = setup_message_id_table ();
+ vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL,
+ 1);
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL_REPLY, 1);
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL_V2, 1);
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL_V2_REPLY, 1);
return 0;
}
diff --git a/src/vnet/ip/ip_checksum.c b/src/vnet/ip/ip_checksum.c
index 1ac7248ea05..4fbf1fb74fa 100644
--- a/src/vnet/ip/ip_checksum.c
+++ b/src/vnet/ip/ip_checksum.c
@@ -165,14 +165,12 @@ test_ip_checksum_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_checksum, static) =
{
.path = "test ip checksum",
.short_help = "test ip checksum",
.function = test_ip_checksum_fn,
};
-/* *INDENT-ON* */
#endif /* CLIB_DEBUG */
diff --git a/src/vnet/ip/ip_container_proxy.c b/src/vnet/ip/ip_container_proxy.c
index 18d07ba6082..1618704e804 100644
--- a/src/vnet/ip/ip_container_proxy.c
+++ b/src/vnet/ip/ip_container_proxy.c
@@ -138,7 +138,6 @@ ip_container_proxy_walk (ip_container_proxy_cb_t cb, void *ctx)
};
u32 fib_index;
- /* *INDENT-OFF* */
pool_foreach_index (fib_index, ip4_main.fibs)
{
fib_table_walk (fib_index, FIB_PROTOCOL_IP4,
@@ -149,7 +148,6 @@ ip_container_proxy_walk (ip_container_proxy_cb_t cb, void *ctx)
fib_table_walk (fib_index, FIB_PROTOCOL_IP6,
ip_container_proxy_fib_table_walk, &wctx);
}
- /* *INDENT-ON* */
}
clib_error_t *
@@ -216,14 +214,12 @@ ip_container_cmd (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip_container_command_node, static) = {
.path = "ip container",
.function = ip_container_cmd,
.short_help = "ip container <address> <interface>",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
clib_error_t *
show_ip_container_cmd_fn (vlib_main_t * vm, unformat_input_t * main_input,
@@ -275,14 +271,12 @@ show_ip_container_cmd_fn (vlib_main_t * vm, unformat_input_t * main_input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip_container_command, static) = {
.path = "show ip container",
.function = show_ip_container_cmd_fn,
.short_help = "show ip container <address> <interface>",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip_flow_hash.h b/src/vnet/ip/ip_flow_hash.h
index bd37ef7307b..30dfcd70a1b 100644
--- a/src/vnet/ip/ip_flow_hash.h
+++ b/src/vnet/ip/ip_flow_hash.h
@@ -38,7 +38,17 @@
_ (proto, 4, IP_FLOW_HASH_PROTO) \
_ (reverse, 5, IP_FLOW_HASH_REVERSE_SRC_DST) \
_ (symmetric, 6, IP_FLOW_HASH_SYMMETRIC) \
- _ (flowlabel, 7, IP_FLOW_HASH_FL)
+ _ (flowlabel, 7, IP_FLOW_HASH_FL) \
+ _ (gtpv1teid, 8, IP_FLOW_HASH_GTPV1_TEID)
+
+typedef struct
+{
+ u8 ver_flags;
+ u8 type;
+ u16 length;
+ u32 teid;
+} __attribute__ ((packed)) gtpv1u_header_t;
+#define GTPV1_PORT_BE 0x6808
/**
* A flow hash configuration is a mask of the flow hash options
diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c
index b9bc90dcc11..934e40a5d18 100644
--- a/src/vnet/ip/ip_frag.c
+++ b/src/vnet/ip/ip_frag.c
@@ -25,10 +25,10 @@
typedef struct
{
- u8 ipv6;
u16 mtu;
u8 next;
u16 n_fragments;
+ u16 pkt_size;
} ip_frag_trace_t;
static u8 *
@@ -37,8 +37,8 @@ format_ip_frag_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
ip_frag_trace_t *t = va_arg (*args, ip_frag_trace_t *);
- s = format (s, "IPv%s mtu: %u fragments: %u next: %d",
- t->ipv6 ? "6" : "4", t->mtu, t->n_fragments, t->next);
+ s = format (s, "mtu: %u pkt-size: %u fragments: %u next: %d", t->mtu,
+ t->pkt_size, t->n_fragments, t->next);
return s;
}
@@ -95,7 +95,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
{
vlib_buffer_t *from_b;
ip4_header_t *ip4;
- u16 len, max, rem, ip_frag_id, ip_frag_offset;
+ u16 len, max, rem, ip_frag_id, ip_frag_offset, head_bytes;
u8 *org_from_packet, more;
from_b = vlib_get_buffer (vm, from_bi);
@@ -103,9 +103,9 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
ip4 = vlib_buffer_get_current (from_b) + l2unfragmentablesize;
rem = clib_net_to_host_u16 (ip4->length) - sizeof (ip4_header_t);
- max =
- (clib_min (mtu, vlib_buffer_get_default_data_size (vm)) -
- sizeof (ip4_header_t)) & ~0x7;
+ head_bytes = sizeof (ip4_header_t) + l2unfragmentablesize;
+ max = (clib_min (mtu, vlib_buffer_get_default_data_size (vm)) - head_bytes) &
+ ~0x7;
if (rem >
(vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip4_header_t)))
@@ -142,8 +142,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
u8 *from_data = (void *) (ip4 + 1);
vlib_buffer_t *org_from_b = from_b;
u16 fo = 0;
- u16 left_in_from_buffer =
- from_b->current_length - (l2unfragmentablesize + sizeof (ip4_header_t));
+ u16 left_in_from_buffer = from_b->current_length - head_bytes;
u16 ptr = 0;
/* Do the actual fragmentation */
@@ -166,8 +165,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
/* Copy ip4 header */
to_data = vlib_buffer_get_current (to_b);
- clib_memcpy_fast (to_data, org_from_packet,
- l2unfragmentablesize + sizeof (ip4_header_t));
+ clib_memcpy_fast (to_data, org_from_packet, head_bytes);
to_ip4 = (ip4_header_t *) (to_data + l2unfragmentablesize);
to_data = (void *) (to_ip4 + 1);
vnet_buffer (to_b)->l3_hdr_offset = to_b->current_data;
@@ -213,8 +211,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
}
to_b->flags |= VNET_BUFFER_F_IS_IP4;
- to_b->current_length =
- len + sizeof (ip4_header_t) + l2unfragmentablesize;
+ to_b->current_length = len + head_bytes;
to_ip4->fragment_id = ip_frag_id;
to_ip4->flags_and_fragment_offset =
@@ -286,7 +283,7 @@ frag_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
ip_frag_trace_t *tr =
vlib_add_trace (vm, node, p0, sizeof (*tr));
tr->mtu = mtu;
- tr->ipv6 = is_ip6 ? 1 : 0;
+ tr->pkt_size = vlib_buffer_length_in_chain (vm, p0);
tr->n_fragments = vec_len (buffer);
tr->next = vnet_buffer (p0)->ip_frag.next_index;
}
@@ -385,13 +382,17 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
ip6_header_t *ip6;
u16 len, max, rem, ip_frag_id;
u8 *org_from_packet;
+ u16 head_bytes;
from_b = vlib_get_buffer (vm, from_bi);
org_from_packet = vlib_buffer_get_current (from_b);
ip6 = vlib_buffer_get_current (from_b) + l2unfragmentablesize;
+ head_bytes =
+ (sizeof (ip6_header_t) + sizeof (ip6_frag_hdr_t) + l2unfragmentablesize);
rem = clib_net_to_host_u16 (ip6->payload_length);
- max = (mtu - sizeof (ip6_header_t) - sizeof (ip6_frag_hdr_t)) & ~0x7; // TODO: Is max correct??
+ max = (clib_min (mtu, vlib_buffer_get_default_data_size (vm)) - head_bytes) &
+ ~0x7;
if (rem >
(vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip6_header_t)))
@@ -423,9 +424,7 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
ip6_frag_hdr_t *to_frag_hdr;
u8 *to_data;
- len =
- (rem >
- (mtu - sizeof (ip6_header_t) - sizeof (ip6_frag_hdr_t)) ? max : rem);
+ len = (rem > max ? max : rem);
if (len != rem) /* Last fragment does not need to divisible by 8 */
len &= ~0x7;
if ((to_b = frag_buffer_alloc (org_from_b, &to_bi)) == 0)
@@ -438,7 +437,7 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
/* Copy ip6 header */
clib_memcpy_fast (to_b->data, org_from_packet,
l2unfragmentablesize + sizeof (ip6_header_t));
- to_ip6 = vlib_buffer_get_current (to_b);
+ to_ip6 = vlib_buffer_get_current (to_b) + l2unfragmentablesize;
to_frag_hdr = (ip6_frag_hdr_t *) (to_ip6 + 1);
to_data = (void *) (to_frag_hdr + 1);
@@ -484,8 +483,7 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
to_ptr += bytes_to_copy;
}
- to_b->current_length =
- len + sizeof (ip6_header_t) + sizeof (ip6_frag_hdr_t);
+ to_b->current_length = len + head_bytes;
to_ip6->payload_length =
clib_host_to_net_u16 (len + sizeof (ip6_frag_hdr_t));
to_ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
@@ -502,13 +500,6 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
return IP_FRAG_ERROR_NONE;
}
-char *ip4_frag_error_strings[] = {
-#define _(sym,string) string,
- foreach_ip_frag_error
-#undef _
-};
-
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_frag_node) = {
.function = ip4_frag,
.name = IP4_FRAG_NODE_NAME,
@@ -517,21 +508,17 @@ VLIB_REGISTER_NODE (ip4_frag_node) = {
.type = VLIB_NODE_TYPE_INTERNAL,
.n_errors = IP_FRAG_N_ERROR,
- .error_strings = ip4_frag_error_strings,
+ .error_counters = ip_frag_error_counters,
.n_next_nodes = IP_FRAG_N_NEXT,
- .next_nodes = {
- [IP_FRAG_NEXT_IP_REWRITE] = "ip4-rewrite",
- [IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN] = "ip4-midchain",
- [IP_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
- [IP_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
- [IP_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
- [IP_FRAG_NEXT_DROP] = "ip4-drop"
- },
+ .next_nodes = { [IP_FRAG_NEXT_IP_REWRITE] = "ip4-rewrite",
+ [IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN] = "ip4-midchain",
+ [IP_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ [IP_FRAG_NEXT_DROP] = "ip4-drop" },
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_frag_node) = {
.function = ip6_frag,
.name = IP6_FRAG_NODE_NAME,
@@ -540,19 +527,16 @@ VLIB_REGISTER_NODE (ip6_frag_node) = {
.type = VLIB_NODE_TYPE_INTERNAL,
.n_errors = IP_FRAG_N_ERROR,
- .error_strings = ip4_frag_error_strings,
+ .error_counters = ip_frag_error_counters,
.n_next_nodes = IP_FRAG_N_NEXT,
- .next_nodes = {
- [IP_FRAG_NEXT_IP_REWRITE] = "ip6-rewrite",
- [IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN] = "ip6-midchain",
- [IP_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
- [IP_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
- [IP_FRAG_NEXT_ICMP_ERROR] = "error-drop",
- [IP_FRAG_NEXT_DROP] = "ip6-drop"
- },
+ .next_nodes = { [IP_FRAG_NEXT_IP_REWRITE] = "ip6-rewrite",
+ [IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN] = "ip6-midchain",
+ [IP_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP_FRAG_NEXT_ICMP_ERROR] = "error-drop",
+ [IP_FRAG_NEXT_DROP] = "ip6-drop" },
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip_frag.h b/src/vnet/ip/ip_frag.h
index ac562c944a3..4ddd62b89e6 100644
--- a/src/vnet/ip/ip_frag.h
+++ b/src/vnet/ip/ip_frag.h
@@ -36,6 +36,7 @@
#define IP_FRAG_H
#include <vnet/vnet.h>
+#include <vnet/ip/ip.api_enum.h>
#define IP_FRAG_FLAG_IP4_HEADER 0x01 //Encapsulating IPv4 header
#define IP_FRAG_FLAG_IP6_HEADER 0x02 //Encapsulating IPv6 header
@@ -57,26 +58,7 @@ typedef enum
IP_FRAG_N_NEXT
} ip_frag_next_t;
-#define foreach_ip_frag_error \
- /* Must be first. */ \
- _(NONE, "packet fragmented") \
- _(SMALL_PACKET, "packet smaller than MTU") \
- _(FRAGMENT_SENT, "number of sent fragments") \
- _(CANT_FRAGMENT_HEADER, "can't fragment header") \
- _(DONT_FRAGMENT_SET, "can't fragment this packet") \
- _(MALFORMED, "malformed packet") \
- _(MEMORY, "could not allocate buffer") \
- _(UNKNOWN, "unknown error")
-
-typedef enum
-{
-#define _(sym,str) IP_FRAG_ERROR_##sym,
- foreach_ip_frag_error
-#undef _
- IP_FRAG_N_ERROR,
-} ip_frag_error_t;
-
-extern char *ip4_frag_error_strings[];
+typedef vl_counter_ip_frag_enum_t ip_frag_error_t;
void ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 mtu,
u8 next_index, u8 flags);
diff --git a/src/vnet/ip/ip_in_out_acl.c b/src/vnet/ip/ip_in_out_acl.c
index e858c8ed9cc..eb3c94a188a 100644
--- a/src/vnet/ip/ip_in_out_acl.c
+++ b/src/vnet/ip/ip_in_out_acl.c
@@ -32,11 +32,26 @@ format_ip_in_out_acl_trace (u8 * s, u32 is_output, va_list * args)
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
ip_in_out_acl_trace_t *t = va_arg (*args, ip_in_out_acl_trace_t *);
-
- s = format (s, "%s: sw_if_index %d, next_index %d, table %d, offset %d",
- is_output ? "OUTACL" : "INACL",
- t->sw_if_index, t->next_index, t->table_index, t->offset);
- return s;
+ const vnet_classify_main_t *vcm = &vnet_classify_main;
+ const u32 indent = format_get_indent (s);
+ vnet_classify_table_t *table;
+ vnet_classify_entry_t *e;
+
+ s =
+ format (s, "%s: sw_if_index %d, next_index %d, table_index %d, offset %d",
+ is_output ? "OUTACL" : "INACL", t->sw_if_index, t->next_index,
+ t->table_index, t->offset);
+
+ if (pool_is_free_index (vcm->tables, t->table_index))
+ return format (s, "\n%Uno table", format_white_space, indent + 4);
+
+ if (~0 == t->offset)
+ return format (s, "\n%Uno match", format_white_space, indent + 4);
+
+ table = vnet_classify_table_get (t->table_index);
+ e = vnet_classify_get_entry (table, t->offset);
+ return format (s, "\n%U%U", format_white_space, indent + 4,
+ format_classify_entry, table, e);
}
static u8 *
@@ -97,16 +112,14 @@ static char *ip_outacl_error_strings[] = {
};
static_always_inline void
-ip_in_out_acl_inline_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
- vlib_frame_t *frame, vlib_buffer_t **b, u16 *next,
- u32 n_left, u32 *hits__, u32 *misses__,
- u32 *chain_hits__, const vlib_error_t error_none,
- const vlib_error_t error_deny,
- const vlib_error_t error_miss,
- vnet_classify_table_t *tables,
- const u32 *table_index_by_sw_if_index,
- vnet_config_main_t *cm, const vlib_rx_or_tx_t way,
- const int is_output, const int do_trace)
+ip_in_out_acl_inline_trace (
+ vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame,
+ vlib_buffer_t **b, u16 *next, u32 n_left, u32 *hits__, u32 *misses__,
+ u32 *chain_hits__, const vlib_error_t error_none,
+ const vlib_error_t error_deny, const vlib_error_t error_miss,
+ vnet_classify_table_t *tables, const u32 *table_index_by_sw_if_index,
+ u32 *fib_index_by_sw_if_index, vnet_config_main_t *cm,
+ const vlib_rx_or_tx_t way, const int is_output, const int do_trace)
{
f64 now = vlib_time_now (vm);
u32 hits = 0;
@@ -117,7 +130,7 @@ ip_in_out_acl_inline_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
u32 sw_if_index[4];
u32 table_index[4];
vnet_classify_table_t *t[4] = { 0, 0 };
- u64 hash[4];
+ u32 hash[4];
/* calculate hashes for b[0] & b[1] */
if (n_left >= 2)
@@ -149,16 +162,16 @@ ip_in_out_acl_inline_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
if (is_output)
{
/* Save the rewrite length, since we are using the l2_classify struct */
- vnet_buffer (b[0])->l2_classify.pad.l2_len =
+ vnet_buffer (b[0])->l2.l2_len =
vnet_buffer (b[0])->ip.save_rewrite_length;
/* advance the match pointer so the matching happens on IP header */
- h[2] += vnet_buffer (b[0])->l2_classify.pad.l2_len;
+ h[2] += vnet_buffer (b[0])->l2.l2_len;
/* Save the rewrite length, since we are using the l2_classify struct */
- vnet_buffer (b[1])->l2_classify.pad.l2_len =
+ vnet_buffer (b[1])->l2.l2_len =
vnet_buffer (b[1])->ip.save_rewrite_length;
/* advance the match pointer so the matching happens on IP header */
- h[3] += vnet_buffer (b[1])->l2_classify.pad.l2_len;
+ h[3] += vnet_buffer (b[1])->l2.l2_len;
}
hash[2] = vnet_classify_hash_packet_inline (t[2], (u8 *) h[2]);
@@ -239,16 +252,16 @@ ip_in_out_acl_inline_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
if (is_output)
{
/* Save the rewrite length, since we are using the l2_classify struct */
- vnet_buffer (b[2])->l2_classify.pad.l2_len =
+ vnet_buffer (b[2])->l2.l2_len =
vnet_buffer (b[2])->ip.save_rewrite_length;
/* advance the match pointer so the matching happens on IP header */
- h[2] += vnet_buffer (b[2])->l2_classify.pad.l2_len;
+ h[2] += vnet_buffer (b[2])->l2.l2_len;
/* Save the rewrite length, since we are using the l2_classify struct */
- vnet_buffer (b[3])->l2_classify.pad.l2_len =
+ vnet_buffer (b[3])->l2.l2_len =
vnet_buffer (b[3])->ip.save_rewrite_length;
/* advance the match pointer so the matching happens on IP header */
- h[3] += vnet_buffer (b[3])->l2_classify.pad.l2_len;
+ h[3] += vnet_buffer (b[3])->l2.l2_len;
}
hash[2] = vnet_classify_hash_packet_inline (t[2], (u8 *) h[2]);
@@ -301,16 +314,22 @@ ip_in_out_acl_inline_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
e[0]->action == CLASSIFY_ACTION_SET_IP6_FIB_INDEX)
vnet_buffer (b[0])->sw_if_index[VLIB_TX] = e[0]->metadata;
else if (e[0]->action == CLASSIFY_ACTION_SET_METADATA)
- vnet_buffer (b[0])->ip.adj_index[VLIB_TX] =
- e[0]->metadata;
+ {
+ vnet_buffer (b[0])->ip.adj_index[VLIB_TX] =
+ e[0]->metadata;
+ /* For source check in case we skip the lookup node */
+ ip_lookup_set_buffer_fib_index (fib_index_by_sw_if_index,
+ b[0]);
+ }
}
}
else
{
while (1)
{
- if (PREDICT_TRUE (t[0]->next_table_index != ~0))
- t[0] = pool_elt_at_index (tables, t[0]->next_table_index);
+ table_index[0] = t[0]->next_table_index;
+ if (PREDICT_TRUE (table_index[0] != ~0))
+ t[0] = pool_elt_at_index (tables, table_index[0]);
else
{
_next[0] = (t[0]->miss_next_index < n_next_nodes) ?
@@ -333,7 +352,7 @@ ip_in_out_acl_inline_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
/* advance the match pointer so the matching happens on IP header */
if (is_output)
- h[0] += vnet_buffer (b[0])->l2_classify.pad.l2_len;
+ h[0] += vnet_buffer (b[0])->l2.l2_len;
hash[0] =
vnet_classify_hash_packet_inline (t[0], (u8 *) h[0]);
@@ -364,8 +383,14 @@ ip_in_out_acl_inline_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
e[0]->metadata;
else if (e[0]->action ==
CLASSIFY_ACTION_SET_METADATA)
- vnet_buffer (b[0])->ip.adj_index[VLIB_TX] =
- e[0]->metadata;
+ {
+ vnet_buffer (b[0])->ip.adj_index[VLIB_TX] =
+ e[0]->metadata;
+ /* For source check in case we skip the lookup
+ * node */
+ ip_lookup_set_buffer_fib_index (
+ fib_index_by_sw_if_index, b[0]);
+ }
}
break;
}
@@ -397,16 +422,22 @@ ip_in_out_acl_inline_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
e[1]->action == CLASSIFY_ACTION_SET_IP6_FIB_INDEX)
vnet_buffer (b[1])->sw_if_index[VLIB_TX] = e[1]->metadata;
else if (e[1]->action == CLASSIFY_ACTION_SET_METADATA)
- vnet_buffer (b[1])->ip.adj_index[VLIB_TX] =
- e[1]->metadata;
+ {
+ vnet_buffer (b[1])->ip.adj_index[VLIB_TX] =
+ e[1]->metadata;
+ /* For source check in case we skip the lookup node */
+ ip_lookup_set_buffer_fib_index (fib_index_by_sw_if_index,
+ b[1]);
+ }
}
}
else
{
while (1)
{
- if (PREDICT_TRUE (t[1]->next_table_index != ~0))
- t[1] = pool_elt_at_index (tables, t[1]->next_table_index);
+ table_index[1] = t[1]->next_table_index;
+ if (PREDICT_TRUE (table_index[1] != ~0))
+ t[1] = pool_elt_at_index (tables, table_index[1]);
else
{
_next[1] = (t[1]->miss_next_index < n_next_nodes) ?
@@ -429,7 +460,7 @@ ip_in_out_acl_inline_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
/* advance the match pointer so the matching happens on IP header */
if (is_output)
- h[1] += vnet_buffer (b[1])->l2_classify.pad.l2_len;
+ h[1] += vnet_buffer (b[1])->l2.l2_len;
hash[1] =
vnet_classify_hash_packet_inline (t[1], (u8 *) h[1]);
@@ -460,8 +491,14 @@ ip_in_out_acl_inline_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
e[1]->metadata;
else if (e[1]->action ==
CLASSIFY_ACTION_SET_METADATA)
- vnet_buffer (b[1])->ip.adj_index[VLIB_TX] =
- e[1]->metadata;
+ {
+ vnet_buffer (b[1])->ip.adj_index[VLIB_TX] =
+ e[1]->metadata;
+ /* For source check in case we skip the lookup
+ * node */
+ ip_lookup_set_buffer_fib_index (
+ fib_index_by_sw_if_index, b[1]);
+ }
}
break;
}
@@ -476,7 +513,7 @@ ip_in_out_acl_inline_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
_t->sw_if_index =
~0 == way ? 0 : vnet_buffer (b[0])->sw_if_index[way];
_t->next_index = _next[0];
- _t->table_index = t[0] ? t[0] - tables : ~0;
+ _t->table_index = table_index[0];
_t->offset = (e[0]
&& t[0]) ? vnet_classify_get_offset (t[0], e[0]) : ~0;
}
@@ -488,7 +525,7 @@ ip_in_out_acl_inline_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
_t->sw_if_index =
~0 == way ? 0 : vnet_buffer (b[1])->sw_if_index[way];
_t->next_index = _next[1];
- _t->table_index = t[1] ? t[1] - tables : ~0;
+ _t->table_index = table_index[1];
_t->offset = (e[1]
&& t[1]) ? vnet_classify_get_offset (t[1], e[1]) : ~0;
}
@@ -522,7 +559,7 @@ ip_in_out_acl_inline_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
vnet_classify_table_t *t0 = 0;
vnet_classify_entry_t *e0 = 0;
u32 next0 = ACL_NEXT_INDEX_DENY;
- u64 hash0;
+ u32 hash0;
sw_if_index0 = ~0 == way ? 0 : vnet_buffer (b[0])->sw_if_index[way];
table_index0 = table_index_by_sw_if_index[sw_if_index0];
@@ -538,10 +575,10 @@ ip_in_out_acl_inline_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
if (is_output)
{
/* Save the rewrite length, since we are using the l2_classify struct */
- vnet_buffer (b[0])->l2_classify.pad.l2_len =
+ vnet_buffer (b[0])->l2.l2_len =
vnet_buffer (b[0])->ip.save_rewrite_length;
/* advance the match pointer so the matching happens on IP header */
- h0 += vnet_buffer (b[0])->l2_classify.pad.l2_len;
+ h0 += vnet_buffer (b[0])->l2.l2_len;
}
vnet_buffer (b[0])->l2_classify.hash =
@@ -567,7 +604,7 @@ ip_in_out_acl_inline_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
/* advance the match pointer so the matching happens on IP header */
if (is_output)
- h0 += vnet_buffer (b[0])->l2_classify.pad.l2_len;
+ h0 += vnet_buffer (b[0])->l2.l2_len;
e0 = vnet_classify_find_entry_inline (t0, (u8 *) h0, hash0, now);
if (e0)
@@ -589,15 +626,21 @@ ip_in_out_acl_inline_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
e0->action == CLASSIFY_ACTION_SET_IP6_FIB_INDEX)
vnet_buffer (b[0])->sw_if_index[VLIB_TX] = e0->metadata;
else if (e0->action == CLASSIFY_ACTION_SET_METADATA)
- vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = e0->metadata;
+ {
+ vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = e0->metadata;
+ /* For source check in case we skip the lookup node */
+ ip_lookup_set_buffer_fib_index (fib_index_by_sw_if_index,
+ b[0]);
+ }
}
}
else
{
while (1)
{
- if (PREDICT_TRUE (t0->next_table_index != ~0))
- t0 = pool_elt_at_index (tables, t0->next_table_index);
+ table_index0 = t0->next_table_index;
+ if (PREDICT_TRUE (table_index0 != ~0))
+ t0 = pool_elt_at_index (tables, table_index0);
else
{
next0 = (t0->miss_next_index < n_next_nodes) ?
@@ -620,7 +663,7 @@ ip_in_out_acl_inline_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
/* advance the match pointer so the matching happens on IP header */
if (is_output)
- h0 += vnet_buffer (b[0])->l2_classify.pad.l2_len;
+ h0 += vnet_buffer (b[0])->l2.l2_len;
hash0 = vnet_classify_hash_packet_inline (t0, (u8 *) h0);
e0 = vnet_classify_find_entry_inline
@@ -647,8 +690,14 @@ ip_in_out_acl_inline_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
vnet_buffer (b[0])->sw_if_index[VLIB_TX] =
e0->metadata;
else if (e0->action == CLASSIFY_ACTION_SET_METADATA)
- vnet_buffer (b[0])->ip.adj_index[VLIB_TX] =
- e0->metadata;
+ {
+ vnet_buffer (b[0])->ip.adj_index[VLIB_TX] =
+ e0->metadata;
+ /* For source check in case we skip the lookup
+ * node */
+ ip_lookup_set_buffer_fib_index (
+ fib_index_by_sw_if_index, b[0]);
+ }
}
break;
}
@@ -663,7 +712,7 @@ ip_in_out_acl_inline_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
t->sw_if_index =
~0 == way ? 0 : vnet_buffer (b[0])->sw_if_index[way];
t->next_index = next0;
- t->table_index = t0 ? t0 - tables : ~0;
+ t->table_index = table_index0;
t->offset = (e0 && t0) ? vnet_classify_get_offset (t0, e0) : ~0;
}
@@ -689,6 +738,7 @@ ip_in_out_acl_inline_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
static_always_inline uword
ip_in_out_acl_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_frame_t *frame, const in_out_acl_table_id_t tid,
+ u32 *fib_index_by_sw_if_index,
const vlib_node_registration_t *parent_error_node,
const u32 error_none_index, const u32 error_deny_index,
const u32 error_miss_index, const vlib_rx_or_tx_t way,
@@ -715,7 +765,8 @@ ip_in_out_acl_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
ip_in_out_acl_inline_trace ( \
vm, node, frame, bufs, nexts, frame->n_vectors, &hits, &misses, \
&chain_hits, error_deny, error_miss, error_none, tables, \
- table_index_by_sw_if_index, cm, way, is_output, do_trace)
+ table_index_by_sw_if_index, fib_index_by_sw_if_index, cm, way, is_output, \
+ do_trace)
if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
ip_in_out_acl_inline_trace__ (1 /* do_trace */);
@@ -741,30 +792,30 @@ VLIB_NODE_FN (ip4_inacl_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
return ip_in_out_acl_inline (
- vm, node, frame, IN_OUT_ACL_TABLE_IP4, &ip4_input_node, IP4_ERROR_NONE,
- IP4_ERROR_INACL_SESSION_DENY, IP4_ERROR_INACL_TABLE_MISS, VLIB_RX,
- 0 /* is_output */);
+ vm, node, frame, IN_OUT_ACL_TABLE_IP4, ip4_main.fib_index_by_sw_if_index,
+ &ip4_input_node, IP4_ERROR_NONE, IP4_ERROR_INACL_SESSION_DENY,
+ IP4_ERROR_INACL_TABLE_MISS, VLIB_RX, 0 /* is_output */);
}
VLIB_NODE_FN (ip4_punt_acl_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
return ip_in_out_acl_inline (
- vm, node, frame, IN_OUT_ACL_TABLE_IP4_PUNT, &ip4_input_node,
- IP4_ERROR_NONE, IP4_ERROR_INACL_SESSION_DENY, IP4_ERROR_INACL_TABLE_MISS,
- ~0 /* way */, 0 /* is_output */);
+ vm, node, frame, IN_OUT_ACL_TABLE_IP4_PUNT,
+ ip4_main.fib_index_by_sw_if_index, &ip4_input_node, IP4_ERROR_NONE,
+ IP4_ERROR_INACL_SESSION_DENY, IP4_ERROR_INACL_TABLE_MISS, ~0 /* way */,
+ 0 /* is_output */);
}
VLIB_NODE_FN (ip4_outacl_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
return ip_in_out_acl_inline (
- vm, node, frame, IN_OUT_ACL_TABLE_IP4, &ip4_input_node, IP4_ERROR_NONE,
- IP4_ERROR_INACL_SESSION_DENY, IP4_ERROR_INACL_TABLE_MISS, VLIB_TX,
- 1 /* is_output */);
+ vm, node, frame, IN_OUT_ACL_TABLE_IP4, NULL, &ip4_input_node,
+ IP4_ERROR_NONE, IP4_ERROR_INACL_SESSION_DENY, IP4_ERROR_INACL_TABLE_MISS,
+ VLIB_TX, 1 /* is_output */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_inacl_node) = {
.name = "ip4-inacl",
.vector_size = sizeof (u32),
@@ -803,7 +854,6 @@ VLIB_REGISTER_NODE (ip4_outacl_node) = {
[ACL_NEXT_INDEX_DENY] = "ip4-drop",
},
};
-/* *INDENT-ON* */
VNET_FEATURE_INIT (ip4_punt_acl_feature) = {
.arc_name = "ip4-punt",
@@ -815,30 +865,30 @@ VLIB_NODE_FN (ip6_inacl_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
return ip_in_out_acl_inline (
- vm, node, frame, IN_OUT_ACL_TABLE_IP6, &ip6_input_node, IP6_ERROR_NONE,
- IP6_ERROR_INACL_SESSION_DENY, IP6_ERROR_INACL_TABLE_MISS, VLIB_RX,
- 0 /* is_output */);
+ vm, node, frame, IN_OUT_ACL_TABLE_IP6, ip6_main.fib_index_by_sw_if_index,
+ &ip6_input_node, IP6_ERROR_NONE, IP6_ERROR_INACL_SESSION_DENY,
+ IP6_ERROR_INACL_TABLE_MISS, VLIB_RX, 0 /* is_output */);
}
VLIB_NODE_FN (ip6_punt_acl_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
return ip_in_out_acl_inline (
- vm, node, frame, IN_OUT_ACL_TABLE_IP6_PUNT, &ip6_input_node,
- IP6_ERROR_NONE, IP6_ERROR_INACL_SESSION_DENY, IP6_ERROR_INACL_TABLE_MISS,
- ~0 /* way */, 0 /* is_output */);
+ vm, node, frame, IN_OUT_ACL_TABLE_IP6_PUNT,
+ ip4_main.fib_index_by_sw_if_index, &ip6_input_node, IP6_ERROR_NONE,
+ IP6_ERROR_INACL_SESSION_DENY, IP6_ERROR_INACL_TABLE_MISS, ~0 /* way */,
+ 0 /* is_output */);
}
VLIB_NODE_FN (ip6_outacl_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
return ip_in_out_acl_inline (
- vm, node, frame, IN_OUT_ACL_TABLE_IP6, &ip6_input_node, IP6_ERROR_NONE,
- IP6_ERROR_INACL_SESSION_DENY, IP6_ERROR_INACL_TABLE_MISS, VLIB_TX,
- 1 /* is_output */);
+ vm, node, frame, IN_OUT_ACL_TABLE_IP6, NULL, &ip6_input_node,
+ IP6_ERROR_NONE, IP6_ERROR_INACL_SESSION_DENY, IP6_ERROR_INACL_TABLE_MISS,
+ VLIB_TX, 1 /* is_output */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_inacl_node) = {
.name = "ip6-inacl",
.vector_size = sizeof (u32),
@@ -877,7 +927,6 @@ VLIB_REGISTER_NODE (ip6_outacl_node) = {
[ACL_NEXT_INDEX_DENY] = "ip6-drop",
},
};
-/* *INDENT-ON* */
VNET_FEATURE_INIT (ip6_punt_acl_feature) = {
.arc_name = "ip6-punt",
diff --git a/src/vnet/ip/ip_init.c b/src/vnet/ip/ip_init.c
index 8894a878881..c2490f196ef 100644
--- a/src/vnet/ip/ip_init.c
+++ b/src/vnet/ip/ip_init.c
@@ -104,7 +104,6 @@ do { \
return error;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip_main_init) = {
.init_order = VLIB_INITS ("vnet_main_init", "ip4_init", "ip6_init",
"icmp4_init", "icmp6_init", "ip6_hop_by_hop_init",
@@ -112,7 +111,6 @@ VLIB_INIT_FUNCTION (ip_main_init) = {
"in_out_acl_init", "policer_classify_init",
"flow_classify_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip_interface.c b/src/vnet/ip/ip_interface.c
index 48c20a6cf34..ca1938f651a 100644
--- a/src/vnet/ip/ip_interface.c
+++ b/src/vnet/ip/ip_interface.c
@@ -145,27 +145,23 @@ ip_interface_has_address (u32 sw_if_index, ip46_address_t * ip, u8 is_ip4)
{
ip_lookup_main_t *lm4 = &ip4_main.lookup_main;
ip4_address_t *ip4;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* unnumbered */ ,
({
ip4 = ip_interface_address_get_address (lm4, ia);
if (ip4_address_compare (ip4, &ip->ip4) == 0)
return 1;
}));
- /* *INDENT-ON* */
}
else
{
ip_lookup_main_t *lm6 = &ip6_main.lookup_main;
ip6_address_t *ip6;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm6, ia, sw_if_index, 1 /* unnumbered */ ,
({
ip6 = ip_interface_address_get_address (lm6, ia);
if (ip6_address_compare (ip6, &ip->ip6) == 0)
return 1;
}));
- /* *INDENT-ON* */
}
return 0;
}
@@ -179,16 +175,13 @@ ip_interface_get_first_ip (u32 sw_if_index, u8 is_ip4)
if (is_ip4)
{
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* unnumbered */ ,
({
return ip_interface_address_get_address (lm4, ia);
}));
- /* *INDENT-ON* */
}
else
{
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm6, ia, sw_if_index, 1 /* unnumbered */ ,
({
ip6_address_t *rv;
@@ -197,21 +190,19 @@ ip_interface_get_first_ip (u32 sw_if_index, u8 is_ip4)
if (!ip6_address_is_link_local_unicast (rv))
return rv;
}));
- /* *INDENT-ON* */
}
return 0;
}
-static walk_rc_t
-ip_interface_address_mark_one_interface (vnet_main_t * vnm,
- vnet_sw_interface_t * si, void *ctx)
+walk_rc_t
+ip_interface_address_mark_one_interface (vnet_main_t *vnm,
+ vnet_sw_interface_t *si, void *ctx)
{
ip_lookup_main_t *lm4 = &ip4_main.lookup_main;
ip_lookup_main_t *lm6 = &ip6_main.lookup_main;
ip_interface_address_t *ia = 0;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm4, ia, si->sw_if_index, 1 /* unnumbered */ ,
({
ia->flags |= IP_INTERFACE_ADDRESS_FLAG_STALE;
@@ -220,7 +211,6 @@ ip_interface_address_mark_one_interface (vnet_main_t * vnm,
({
ia->flags |= IP_INTERFACE_ADDRESS_FLAG_STALE;
}));
- /* *INDENT-ON* */
return (WALK_CONTINUE);
}
@@ -246,7 +236,6 @@ ip_interface_address_sweep_one_interface (vnet_main_t * vnm,
u32 *ip4_masks = 0;
int i;
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im4->lookup_main, ia, si->sw_if_index, 1,
({
if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
@@ -268,7 +257,6 @@ ip_interface_address_sweep_one_interface (vnet_main_t * vnm,
vec_add1 (ip6_masks, ia->address_length);
}
}));
- /* *INDENT-ON* */
for (i = 0; i < vec_len (ip4_addrs); i++)
ip4_add_del_interface_address (vm, si->sw_if_index, &ip4_addrs[i],
diff --git a/src/vnet/ip/ip_interface.h b/src/vnet/ip/ip_interface.h
index b48eebdbc90..f0034ed0314 100644
--- a/src/vnet/ip/ip_interface.h
+++ b/src/vnet/ip/ip_interface.h
@@ -38,6 +38,9 @@ void ip_interface_address_sweep (void);
u32 ip_interface_address_find (ip_lookup_main_t * lm,
void *addr_fib, u32 address_length);
u8 ip_interface_has_address (u32 sw_if_index, ip46_address_t * ip, u8 is_ip4);
+walk_rc_t ip_interface_address_mark_one_interface (vnet_main_t *vnm,
+ vnet_sw_interface_t *si,
+ void *ctx);
always_inline void *
ip_interface_address_get_address (ip_lookup_main_t * lm,
@@ -53,7 +56,6 @@ ip_get_interface_prefix (ip_lookup_main_t * lm, ip_interface_prefix_key_t * k)
return p ? pool_elt_at_index (lm->if_prefix_pool, p[0]) : 0;
}
-/* *INDENT-OFF* */
#define foreach_ip_interface_address(lm,a,sw_if_index,loop,body) \
do { \
vnet_main_t *_vnm = vnet_get_main(); \
@@ -87,7 +89,6 @@ do { \
body; \
} \
} while (0)
-/* *INDENT-ON* */
#endif /* included_ip_interface_h */
diff --git a/src/vnet/ip/ip_packet.h b/src/vnet/ip/ip_packet.h
index d862caa3a52..04cf9f11d70 100644..100755
--- a/src/vnet/ip/ip_packet.h
+++ b/src/vnet/ip/ip_packet.h
@@ -149,98 +149,6 @@ STATIC_ASSERT_SIZEOF (ip_ecn_t, 1);
extern u8 *format_ip_ecn (u8 * s, va_list * va);
-/* IP checksum support. */
-
-static_always_inline u16
-ip_csum (void *data, u16 n_left)
-{
- u32 sum;
-#ifdef CLIB_HAVE_VEC256
- u16x16 v1, v2;
- u32x8 zero = { 0 };
- u32x8 sum8 = { 0 };
- u32x4 sum4;
-#endif
-
- /* if there is odd number of bytes, pad by zero and store in sum */
- sum = (n_left & 1) ? ((u8 *) data)[n_left - 1] << 8 : 0;
-
- /* we deal with words */
- n_left >>= 1;
-
-#ifdef CLIB_HAVE_VEC256
- while (n_left >= 32)
- {
- v1 = u16x16_load_unaligned (data);
- v2 = u16x16_load_unaligned (data + 32);
-
-#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN
- v1 = u16x16_byte_swap (v1);
- v2 = u16x16_byte_swap (v2);
-#endif
- sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v1));
- sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v1));
- sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v2));
- sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v2));
- n_left -= 32;
- data += 64;
- }
-
- if (n_left >= 16)
- {
- v1 = u16x16_load_unaligned (data);
-#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN
- v1 = u16x16_byte_swap (v1);
-#endif
- sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v1));
- sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v1));
- n_left -= 16;
- data += 32;
- }
-
- if (n_left)
- {
- v1 = u16x16_load_unaligned (data);
-#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN
- v1 = u16x16_byte_swap (v1);
-#endif
- v1 = u16x16_mask_last (v1, 16 - n_left);
- sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v1));
- sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v1));
- }
-
- sum8 = u32x8_hadd (sum8, zero);
- sum4 = u32x8_extract_lo (sum8) + u32x8_extract_hi (sum8);
- sum += sum4[0] + sum4[1];
-
-#else
- /* scalar version */
- while (n_left >= 8)
- {
- sum += clib_net_to_host_u16 (*((u16 *) data + 0));
- sum += clib_net_to_host_u16 (*((u16 *) data + 1));
- sum += clib_net_to_host_u16 (*((u16 *) data + 2));
- sum += clib_net_to_host_u16 (*((u16 *) data + 3));
- sum += clib_net_to_host_u16 (*((u16 *) data + 4));
- sum += clib_net_to_host_u16 (*((u16 *) data + 5));
- sum += clib_net_to_host_u16 (*((u16 *) data + 6));
- sum += clib_net_to_host_u16 (*((u16 *) data + 7));
- n_left -= 8;
- data += 16;
- }
- while (n_left)
- {
- sum += clib_net_to_host_u16 (*(u16 *) data);
- n_left -= 1;
- data += 2;
- }
-#endif
-
- sum = (sum & 0xffff) + (sum >> 16);
- sum = (sum & 0xffff) + (sum >> 16);
- return ~((u16) sum);
-}
-
/* Incremental checksum update. */
typedef uword ip_csum_t;
@@ -301,7 +209,7 @@ always_inline u16
ip_csum_fold (ip_csum_t c)
{
/* Reduce to 16 bits. */
-#ifdef __x86_64__
+#if defined(__x86_64__) && defined(__BMI2__)
u64 tmp;
asm volatile(
/* using ADC is much faster than mov, shift, add sequence
diff --git a/src/vnet/ip/ip_path_mtu.c b/src/vnet/ip/ip_path_mtu.c
index 38adb44065b..ccb57e1e352 100644
--- a/src/vnet/ip/ip_path_mtu.c
+++ b/src/vnet/ip/ip_path_mtu.c
@@ -297,10 +297,19 @@ ip_ptmu_adj_walk_update (adj_index_t ai, void *ctx)
static ip_pmtu_dpo_t *
ip_pmtu_dpo_alloc (void)
{
+ vlib_main_t *vm = vlib_get_main ();
+ u8 need_barrier_sync = pool_get_will_expand (ip_pmtu_dpo_pool);
ip_pmtu_dpo_t *ipm;
+
+ if (need_barrier_sync)
+ vlib_worker_thread_barrier_sync (vm);
+
pool_get_aligned_zero (ip_pmtu_dpo_pool, ipm, sizeof (ip_pmtu_dpo_t));
+ if (need_barrier_sync)
+ vlib_worker_thread_barrier_release (vm);
+
return (ipm);
}
@@ -353,18 +362,16 @@ ip_pmtu_dpo_get_urpf (const dpo_id_t *dpo)
}
void
-ip_pmtu_dpo_add_or_lock (fib_protocol_t fproto, u16 pmtu, dpo_id_t *dpo)
+ip_pmtu_dpo_add_or_lock (u16 pmtu, const dpo_id_t *parent, dpo_id_t *dpo)
{
ip_pmtu_dpo_t *ipm;
- dpo_id_t parent = DPO_INVALID;
ipm = ip_pmtu_dpo_alloc ();
- ipm->ipm_proto = fib_proto_to_dpo (fproto);
+ ipm->ipm_proto = parent->dpoi_proto;
ipm->ipm_pmtu = pmtu;
- dpo_copy (&parent, drop_dpo_get (ipm->ipm_proto));
- dpo_stack (ip_pmtu_dpo_type, ipm->ipm_proto, &ipm->ipm_dpo, &parent);
+ dpo_stack (ip_pmtu_dpo_type, ipm->ipm_proto, &ipm->ipm_dpo, parent);
dpo_set (dpo, ip_pmtu_dpo_type, ipm->ipm_proto, ip_pmtu_dpo_get_index (ipm));
}
@@ -516,7 +523,9 @@ ip_pmtu_alloc (u32 fib_index, const fib_prefix_t *pfx,
/*
* interpose a policy DPO from the nh so that MTU is applied
*/
- ip_pmtu_dpo_add_or_lock (pfx->fp_proto, ipt->ipt_oper_pmtu, &ip_dpo);
+ ip_pmtu_dpo_add_or_lock (ipt->ipt_oper_pmtu,
+ drop_dpo_get (fib_proto_to_dpo (pfx->fp_proto)),
+ &ip_dpo);
fib_table_entry_special_dpo_add (fib_index, pfx, ip_pmtu_source,
FIB_ENTRY_FLAG_INTERPOSE, &ip_dpo);
@@ -587,7 +596,9 @@ ip_pmtu_stack (ip_pmtu_t *ipt)
{
dpo_id_t ip_dpo = DPO_INVALID;
- ip_pmtu_dpo_add_or_lock (pfx->fp_proto, ipt->ipt_oper_pmtu, &ip_dpo);
+ ip_pmtu_dpo_add_or_lock (
+ ipt->ipt_oper_pmtu,
+ drop_dpo_get (fib_proto_to_dpo (pfx->fp_proto)), &ip_dpo);
fib_table_entry_special_dpo_update (
fib_index, pfx, ip_pmtu_source, FIB_ENTRY_FLAG_INTERPOSE, &ip_dpo);
@@ -826,7 +837,8 @@ ip_path_module_init (vlib_main_t *vm)
adj_delegate_register_new_type (&ip_path_adj_delegate_vft);
ip_pmtu_source = fib_source_allocate ("path-mtu", FIB_SOURCE_PRIORITY_HI,
FIB_SOURCE_BH_SIMPLE);
- ip_pmtu_fib_type = fib_node_register_new_type (&ip_ptmu_fib_node_vft);
+ ip_pmtu_fib_type =
+ fib_node_register_new_type ("ip-pmtu", &ip_ptmu_fib_node_vft);
ip_pmtu_db = hash_create_mem (0, sizeof (ip_pmtu_key_t), sizeof (index_t));
ip_pmtu_logger = vlib_log_register_class ("ip", "pmtu");
diff --git a/src/vnet/ip/ip_path_mtu.h b/src/vnet/ip/ip_path_mtu.h
index 2c54fcd7401..96a5227237a 100644
--- a/src/vnet/ip/ip_path_mtu.h
+++ b/src/vnet/ip/ip_path_mtu.h
@@ -100,6 +100,9 @@ extern int ip_path_mtu_replace_end (void);
extern u32 ip_pmtu_get_table_id (const ip_pmtu_t *ipt);
extern void ip_pmtu_get_ip (const ip_pmtu_t *ipt, ip_address_t *ip);
+extern void ip_pmtu_dpo_add_or_lock (u16 pmtu, const dpo_id_t *parent,
+ dpo_id_t *dpo);
+
/**
* Data-plane accessor functions
*/
diff --git a/src/vnet/ip/ip_path_mtu_node.c b/src/vnet/ip/ip_path_mtu_node.c
index 33be4bbcae9..cadf1cbe137 100644
--- a/src/vnet/ip/ip_path_mtu_node.c
+++ b/src/vnet/ip/ip_path_mtu_node.c
@@ -49,7 +49,6 @@ ip_pmtu_dpo_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_frame_t *frame, ip_address_family_t af)
{
u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
- u32 frag_sent = 0, small_packets = 0;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -114,8 +113,6 @@ ip_pmtu_dpo_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (error0 == IP_FRAG_ERROR_NONE)
{
/* Free original buffer chain */
- frag_sent += vec_len (buffer);
- small_packets += (vec_len (buffer) == 1);
vlib_buffer_free_one (vm, pi0); /* Free original packet */
}
else
@@ -177,7 +174,7 @@ VLIB_REGISTER_NODE (ip4_ip_pmtu_dpo_node) = {
.vector_size = sizeof (u32),
.format_trace = format_ip_pmtu_trace,
.n_errors = IP_FRAG_N_ERROR,
- .error_strings = ip4_frag_error_strings,
+ .error_counters = ip_frag_error_counters,
.n_next_nodes = IP_PMTU_N_NEXT,
.next_nodes =
{
@@ -189,7 +186,7 @@ VLIB_REGISTER_NODE (ip6_ip_pmtu_dpo_node) = {
.vector_size = sizeof (u32),
.format_trace = format_ip_pmtu_trace,
.n_errors = IP_FRAG_N_ERROR,
- .error_strings = ip4_frag_error_strings,
+ .error_counters = ip_frag_error_counters,
.n_next_nodes = IP_PMTU_N_NEXT,
.next_nodes =
{
diff --git a/src/vnet/ip/ip_psh_cksum.h b/src/vnet/ip/ip_psh_cksum.h
index eaac401f223..a80211561b7 100644
--- a/src/vnet/ip/ip_psh_cksum.h
+++ b/src/vnet/ip/ip_psh_cksum.h
@@ -7,6 +7,7 @@
#define included_ip_psh_cksum_h
#include <vnet/ip/ip.h>
+#include <vppinfra/vector/ip_csum.h>
typedef struct _ip4_psh
{
@@ -37,7 +38,7 @@ ip4_pseudo_header_cksum (ip4_header_t *ip4)
psh.proto = ip4->protocol;
psh.l4len = clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) -
sizeof (ip4_header_t));
- return ~clib_net_to_host_u16 (ip_csum (&psh, sizeof (ip4_psh_t)));
+ return ~(clib_ip_csum ((u8 *) &psh, sizeof (ip4_psh_t)));
}
static_always_inline u16
@@ -48,7 +49,7 @@ ip6_pseudo_header_cksum (ip6_header_t *ip6)
psh.dst = ip6->dst_address;
psh.l4len = ip6->payload_length;
psh.proto = clib_host_to_net_u32 ((u32) ip6->protocol);
- return ~clib_net_to_host_u16 (ip_csum (&psh, sizeof (ip6_psh_t)));
+ return ~(clib_ip_csum ((u8 *) &psh, sizeof (ip6_psh_t)));
}
#endif /* included_ip_psh_cksum_h */
diff --git a/src/vnet/ip/ip_punt_drop.c b/src/vnet/ip/ip_punt_drop.c
index bf01adadb10..dc113f51386 100644
--- a/src/vnet/ip/ip_punt_drop.c
+++ b/src/vnet/ip/ip_punt_drop.c
@@ -143,9 +143,8 @@ format_ip_punt_redirect (u8 * s, va_list * args)
rx = ip_punt_redirect_get (rxs[rx_sw_if_index]);
- s = format (s, " rx %U via:\n",
- format_vnet_sw_interface_name, vnm,
- vnet_get_sw_interface (vnm, rx_sw_if_index));
+ s = format (s, " rx %U via:\n", format_vnet_sw_if_index_name, vnm,
+ rx_sw_if_index);
s = format (s, " %U", format_fib_path_list, rx->pl, 2);
s = format (s, " forwarding\n", format_dpo_id, &rx->dpo, 0);
s = format (s, " %U\n", format_dpo_id, &rx->dpo, 0);
diff --git a/src/vnet/ip/ip_test.c b/src/vnet/ip/ip_test.c
index c47cd3d208e..727afba67f4 100644
--- a/src/vnet/ip/ip_test.c
+++ b/src/vnet/ip/ip_test.c
@@ -36,6 +36,10 @@
#include <vnet/ip/ip.api.h>
#undef vl_endianfun
+#define vl_calcsizefun
+#include <vnet/ip/ip.api.h>
+#undef vl_calcsizefun
+
typedef struct
{
/* API message ID base */
@@ -1010,6 +1014,24 @@ api_ip_reassembly_enable_disable (vat_main_t *vat)
return -1;
}
+static int
+api_ip_local_reass_enable_disable (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ip_local_reass_get (vat_main_t *vat)
+{
+ return -1;
+}
+
+static void
+vl_api_ip_local_reass_get_reply_t_handler (
+ vl_api_ip_local_reass_get_reply_t *mp)
+{
+}
+
static void
vl_api_ip_reassembly_get_reply_t_handler (vl_api_ip_reassembly_get_reply_t *mp)
{
@@ -1255,6 +1277,12 @@ api_set_ip_flow_hash_v2 (vat_main_t *vat)
}
static int
+api_set_ip_flow_hash_v3 (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
api_ip_mroute_add_del (vat_main_t *vam)
{
unformat_input_t *i = vam->input;
diff --git a/src/vnet/ip/ip_types.c b/src/vnet/ip/ip_types.c
index 3e5ecebf142..ec80a96f15c 100644
--- a/src/vnet/ip/ip_types.c
+++ b/src/vnet/ip/ip_types.c
@@ -41,14 +41,16 @@ uword
unformat_ip_address (unformat_input_t * input, va_list * args)
{
ip_address_t *a = va_arg (*args, ip_address_t *);
+ ip_address_t tmp, *p_tmp = &tmp;
- clib_memset (a, 0, sizeof (*a));
- if (unformat (input, "%U", unformat_ip4_address, &ip_addr_v4 (a)))
- ip_addr_version (a) = AF_IP4;
- else if (unformat_user (input, unformat_ip6_address, &ip_addr_v6 (a)))
- ip_addr_version (a) = AF_IP6;
+ clib_memset (p_tmp, 0, sizeof (*p_tmp));
+ if (unformat (input, "%U", unformat_ip4_address, &ip_addr_v4 (p_tmp)))
+ ip_addr_version (p_tmp) = AF_IP4;
+ else if (unformat_user (input, unformat_ip6_address, &ip_addr_v6 (p_tmp)))
+ ip_addr_version (p_tmp) = AF_IP6;
else
return 0;
+ *a = *p_tmp;
return 1;
}
@@ -288,6 +290,13 @@ ip_address_to_fib_prefix (const ip_address_t * addr, fib_prefix_t * prefix)
}
void
+ip_address_to_prefix (const ip_address_t *addr, ip_prefix_t *prefix)
+{
+ prefix->len = (addr->version == AF_IP4 ? 32 : 128);
+ clib_memcpy (&prefix->addr, addr, sizeof (prefix->addr));
+}
+
+void
ip_address_increment (ip_address_t * ip)
{
ip46_address_increment ((ip_addr_version (ip) == AF_IP4 ?
@@ -380,23 +389,24 @@ ip_prefix_copy (void *dst, void *src)
}
int
-ip_prefix_cmp (ip_prefix_t * p1, ip_prefix_t * p2)
+ip_prefix_cmp (const ip_prefix_t *ipp1, const ip_prefix_t *ipp2)
{
+ ip_prefix_t p1 = *ipp1, p2 = *ipp2;
int cmp = 0;
- ip_prefix_normalize (p1);
- ip_prefix_normalize (p2);
+ ip_prefix_normalize (&p1);
+ ip_prefix_normalize (&p2);
- cmp = ip_address_cmp (&ip_prefix_addr (p1), &ip_prefix_addr (p2));
+ cmp = ip_address_cmp (&ip_prefix_addr (&p1), &ip_prefix_addr (&p2));
if (cmp == 0)
{
- if (ip_prefix_len (p1) < ip_prefix_len (p2))
+ if (ip_prefix_len (&p1) < ip_prefix_len (&p2))
{
cmp = 1;
}
else
{
- if (ip_prefix_len (p1) > ip_prefix_len (p2))
+ if (ip_prefix_len (&p1) > ip_prefix_len (&p2))
cmp = 2;
}
}
diff --git a/src/vnet/ip/ip_types.h b/src/vnet/ip/ip_types.h
index 83a0f6adc72..f1b387df194 100644
--- a/src/vnet/ip/ip_types.h
+++ b/src/vnet/ip/ip_types.h
@@ -75,13 +75,11 @@ typedef enum ip_feature_location_t_
#define N_IP_FEATURE_LOCATIONS (IP_FEATURE_DROP+1)
-/* *INDENT-OFF* */
typedef struct ip_address
{
ip46_address_t ip;
ip_address_family_t version;
} __clib_packed ip_address_t;
-/* *INDENT-ON* */
#define IP_ADDRESS_V4_ALL_0S {.ip.ip4.as_u32 = 0, .version = AF_IP4}
#define IP_ADDRESS_V6_ALL_0S {.ip.ip6.as_u64 = {0, 0}, .version = AF_IP6}
@@ -112,13 +110,11 @@ extern void ip_address_from_46 (const ip46_address_t * a,
extern void ip_address_increment (ip_address_t * ip);
extern void ip_address_reset (ip_address_t * ip);
-/* *INDENT-OFF* */
typedef struct ip_prefix
{
ip_address_t addr;
u8 len;
} __clib_packed ip_prefix_t;
-/* *INDENT-ON* */
#define ip_prefix_addr(_a) (_a)->addr
#define ip_prefix_version(_a) ip_addr_version(&ip_prefix_addr(_a))
@@ -126,11 +122,13 @@ typedef struct ip_prefix
#define ip_prefix_v4(_a) ip_addr_v4(&ip_prefix_addr(_a))
#define ip_prefix_v6(_a) ip_addr_v6(&ip_prefix_addr(_a))
-extern int ip_prefix_cmp (ip_prefix_t * p1, ip_prefix_t * p2);
+extern int ip_prefix_cmp (const ip_prefix_t *p1, const ip_prefix_t *p2);
extern void ip_prefix_normalize (ip_prefix_t * a);
extern void ip_address_to_fib_prefix (const ip_address_t * addr,
fib_prefix_t * prefix);
+extern void ip_address_to_prefix (const ip_address_t *addr,
+ ip_prefix_t *prefix);
extern void ip_prefix_to_fib_prefix (const ip_prefix_t * ipp,
fib_prefix_t * fibp);
extern u8 *format_ip_prefix (u8 * s, va_list * args);
diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c
index b678ce330c3..c0fa430e0aa 100644
--- a/src/vnet/ip/lookup.c
+++ b/src/vnet/ip/lookup.c
@@ -128,6 +128,42 @@ format_ip_flow_hash_config (u8 * s, va_list * args)
return s;
}
+uword
+unformat_ip_flow_hash_config (unformat_input_t *input, va_list *args)
+{
+ flow_hash_config_t *flow_hash_config = va_arg (*args, flow_hash_config_t *);
+ uword start_index = unformat_check_input (input);
+ int matched_once = 0;
+
+ if (unformat (input, "default"))
+ {
+ *flow_hash_config = IP_FLOW_HASH_DEFAULT;
+ return 1;
+ }
+ while (!unformat_is_eof (input) &&
+ !is_white_space (unformat_peek_input (input)))
+ {
+ if (unformat (input, "%_,"))
+ ;
+#define _(a, b, c) \
+ else if (unformat (input, "%_" #a)) \
+ { \
+ *flow_hash_config |= c; \
+ matched_once = 1; \
+ }
+ foreach_flow_hash_bit
+#undef _
+ else
+ {
+ /* Roll back to our start */
+ input->index = start_index;
+ return 0;
+ }
+ }
+
+ return matched_once;
+}
+
u8 *
format_ip_adjacency_packet_data (u8 * s, va_list * args)
{
@@ -184,6 +220,27 @@ const ip46_address_t zero_addr = {
0, 0},
};
+bool
+fib_prefix_validate (const fib_prefix_t *prefix)
+{
+ if (FIB_PROTOCOL_IP4 == prefix->fp_proto)
+ {
+ if (prefix->fp_len > 32)
+ {
+ return false;
+ }
+ }
+
+ if (FIB_PROTOCOL_IP6 == prefix->fp_proto)
+ {
+ if (prefix->fp_len > 128)
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
static clib_error_t *
vnet_ip_route_cmd (vlib_main_t * vm,
unformat_input_t * main_input, vlib_cli_command_t * cmd)
@@ -317,6 +374,12 @@ vnet_ip_route_cmd (vlib_main_t * vm,
.fp_addr = prefixs[i].fp_addr,
};
+ if (!fib_prefix_validate (&rpfx))
+ {
+ vlib_cli_output (vm, "Invalid prefix len: %d", rpfx.fp_len);
+ continue;
+ }
+
if (is_del)
fib_table_entry_path_remove2 (fib_index,
&rpfx, FIB_SOURCE_CLI, rpaths);
@@ -410,6 +473,7 @@ vnet_ip_table_cmd (vlib_main_t * vm,
}
done:
+ vec_free (name);
unformat_free (line_input);
return error;
}
@@ -465,13 +529,13 @@ vnet_show_ip_table_cmd (vlib_main_t *vm, unformat_input_t *main_input,
}
fib = fib_table_get (fib_index, fproto);
- vlib_cli_output (vm, "[%3u] table_id:%3u %v", fib->ft_index,
+ vlib_cli_output (vm, "[%u] table_id:%u %v", fib->ft_index,
fib->ft_table_id, fib->ft_desc);
}
else
{
pool_foreach (fib, fibs)
- vlib_cli_output (vm, "[%3u] table_id:%3u %v", fib->ft_index,
+ vlib_cli_output (vm, "[%u] table_id:%u %v", fib->ft_index,
fib->ft_table_id, fib->ft_desc);
}
@@ -493,33 +557,25 @@ vnet_show_ip6_table_cmd (vlib_main_t *vm, unformat_input_t *main_input,
return (vnet_show_ip_table_cmd (vm, main_input, cmd, FIB_PROTOCOL_IP6));
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_ip_command, static) = {
.path = "ip",
.short_help = "Internet protocol (IP) commands",
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_ip6_command, static) = {
.path = "ip6",
.short_help = "Internet protocol version 6 (IPv6) commands",
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_show_ip_command, static) = {
.path = "show ip",
.short_help = "Internet protocol (IP) show commands",
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_show_ip6_command, static) = {
.path = "show ip6",
.short_help = "Internet protocol version 6 (IPv6) show commands",
};
-/* *INDENT-ON* */
/*?
* This command is used to add or delete IPv4 or IPv6 routes. All
@@ -547,38 +603,40 @@ VLIB_CLI_COMMAND (vlib_cli_show_ip6_command, static) = {
* @cliexcmd{ip route add 7.0.0.1/32 via 6.0.0.2 GigabitEthernet2/0/0 weight 3}
* To add a route to a particular FIB table (VRF), use:
* @cliexcmd{ip route add 172.16.24.0/24 table 7 via GigabitEthernet2/0/0}
+ * To add a route to drop the traffic:
+ * @cliexcmd{ip route add 172.16.24.0/24 table 100 via 127.0.0.1 drop}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip_route_command, static) = {
.path = "ip route",
- .short_help = "ip route [add|del] [count <n>] <dst-ip-addr>/<width> [table <table-id>] via [next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight <value>] [preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-connected] [rx-ip4 <interface>] [out-labels <value value value>]",
+ .short_help = "ip route [add|del] [count <n>] <dst-ip-addr>/<width> [table "
+ "<table-id>] via [next-hop-address] [next-hop-interface] "
+ "[next-hop-table <value>] [weight <value>] [preference "
+ "<value>] [udp-encap <value>] [ip4-lookup-in-table <value>] "
+ "[ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] "
+ "[resolve-via-host] [resolve-via-connected] [rx-ip4|rx-ip6 "
+ "<interface>] [out-labels <value value value>] [drop]",
.function = vnet_ip_route_cmd,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*?
* This command is used to add or delete IPv4 Tables. All
* Tables must be explicitly added before that can be used. Creating a
* table will add both unicast and multicast FIBs
*
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip4_table_command, static) = {
.path = "ip table",
.short_help = "ip table [add|del] <table-id>",
.function = vnet_ip4_table_cmd,
};
-/* *INDENT-ON* */
-/* *INDENT-ON* */
/*?
* This command is used to add or delete IPv4 Tables. All
* Tables must be explicitly added before that can be used. Creating a
* table will add both unicast and multicast FIBs
*
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_table_command, static) = {
.path = "ip6 table",
.short_help = "ip6 table [add|del] <table-id>",
@@ -683,14 +741,12 @@ ip6_table_bind_cmd (vlib_main_t * vm,
* Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
* @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
{
.path = "set interface ip table",
.function = ip4_table_bind_cmd,
.short_help = "set interface ip table <interface> <table-id>",
};
-/* *INDENT-ON* */
/*?
* Place the indicated interface into the supplied IPv6 FIB table (also known
@@ -711,14 +767,12 @@ VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
* Example of how to add an interface to an IPv6 FIB table (where 2 is the table-id):
* @cliexcmd{set interface ip6 table GigabitEthernet2/0/0 2}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip6_table_command, static) =
{
.path = "set interface ip6 table",
.function = ip6_table_bind_cmd,
.short_help = "set interface ip6 table <interface> <table-id>"
};
-/* *INDENT-ON* */
clib_error_t *
vnet_ip_mroute_cmd (vlib_main_t * vm,
@@ -955,7 +1009,6 @@ done:
* @cliexcmd{ip mroute add 232.1.1.1 Signal}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip_mroute_command, static) =
{
.path = "ip mroute",
@@ -963,7 +1016,6 @@ VLIB_CLI_COMMAND (ip_mroute_command, static) =
.function = vnet_ip_mroute_cmd,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/lookup.h b/src/vnet/ip/lookup.h
index 48ba468d7c2..8083d974df6 100644
--- a/src/vnet/ip/lookup.h
+++ b/src/vnet/ip/lookup.h
@@ -162,23 +162,22 @@ typedef struct ip_lookup_main_t
} ip_lookup_main_t;
u8 *format_ip_flow_hash_config (u8 * s, va_list * args);
-
+uword unformat_ip_flow_hash_config (unformat_input_t *input, va_list *args);
always_inline void
ip_lookup_set_buffer_fib_index (u32 * fib_index_by_sw_if_index,
vlib_buffer_t * b)
{
- /* *INDENT-OFF* */
vnet_buffer (b)->ip.fib_index =
vec_elt (fib_index_by_sw_if_index, vnet_buffer (b)->sw_if_index[VLIB_RX]);
vnet_buffer (b)->ip.fib_index =
((vnet_buffer (b)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
vnet_buffer (b)->ip.fib_index :
vnet_buffer (b)->sw_if_index[VLIB_TX]);
- /* *INDENT-ON* */
}
void ip_lookup_init (ip_lookup_main_t * lm, u32 ip_lookup_node_index);
+bool fib_prefix_validate (const fib_prefix_t *prefix);
#endif /* included_ip_lookup_h */
/*
diff --git a/src/vnet/ip/punt.c b/src/vnet/ip/punt.c
index eb191da1394..3c46549634a 100644
--- a/src/vnet/ip/punt.c
+++ b/src/vnet/ip/punt.c
@@ -148,14 +148,31 @@ punt_socket_register_l4 (vlib_main_t * vm,
punt_main_t *pm = &punt_main;
punt_client_t *c;
- /* For now we only support UDP punt */
- if (protocol != IP_PROTOCOL_UDP)
- return clib_error_return (0,
- "only UDP protocol (%d) is supported, got %d",
- IP_PROTOCOL_UDP, protocol);
-
if (port == (u16) ~ 0)
- return clib_error_return (0, "UDP port number required");
+ return clib_error_return (0, "Port number required");
+
+ u32 node_index;
+ switch (protocol)
+ {
+ case IP_PROTOCOL_UDP:
+ node_index = (af == AF_IP4 ? udp4_punt_socket_node.index :
+ udp6_punt_socket_node.index);
+ udp_register_dst_port (vm, port, node_index, af == AF_IP4);
+ break;
+ case IP_PROTOCOL_ICMP6:
+ if (af != AF_IP6)
+ return clib_error_return (
+ 0, "only UDP or ICMP6 protocol (%d, %d) is supported, got %d",
+ IP_PROTOCOL_UDP, IP_PROTOCOL_ICMP6, protocol);
+
+ node_index = icmp6_punt_socket_node.index;
+ icmp6_register_type (vm, port, node_index);
+ break;
+ default:
+ return clib_error_return (
+ 0, "only UDP or ICMP6 protocol (%d) is supported, got %d",
+ IP_PROTOCOL_UDP, protocol);
+ }
c = punt_client_l4_get (af, port);
@@ -165,19 +182,14 @@ punt_socket_register_l4 (vlib_main_t * vm,
punt_client_l4_db_add (af, port, c - pm->punt_client_pool);
}
- memcpy (c->caddr.sun_path, client_pathname, sizeof (c->caddr.sun_path));
+ snprintf (c->caddr.sun_path, sizeof (c->caddr.sun_path), "%s",
+ client_pathname);
c->caddr.sun_family = AF_UNIX;
c->reg.type = PUNT_TYPE_L4;
c->reg.punt.l4.port = port;
c->reg.punt.l4.protocol = protocol;
c->reg.punt.l4.af = af;
- u32 node_index = (af == AF_IP4 ?
- udp4_punt_socket_node.index :
- udp6_punt_socket_node.index);
-
- udp_register_dst_port (vm, port, node_index, af == AF_IP4);
-
return (NULL);
}
@@ -197,7 +209,8 @@ punt_socket_register_ip_proto (vlib_main_t * vm,
punt_client_ip_proto_db_add (af, proto, c - pm->punt_client_pool);
}
- memcpy (c->caddr.sun_path, client_pathname, sizeof (c->caddr.sun_path));
+ snprintf (c->caddr.sun_path, sizeof (c->caddr.sun_path), "%s",
+ client_pathname);
c->caddr.sun_family = AF_UNIX;
c->reg.type = PUNT_TYPE_IP_PROTO;
c->reg.punt.ip_proto.protocol = proto;
@@ -227,7 +240,8 @@ punt_socket_register_exception (vlib_main_t * vm,
punt_client_exception_db_add (reason, pc - pm->punt_client_pool);
}
- memcpy (pc->caddr.sun_path, client_pathname, sizeof (pc->caddr.sun_path));
+ snprintf (pc->caddr.sun_path, sizeof (pc->caddr.sun_path), "%s",
+ client_pathname);
pc->caddr.sun_family = AF_UNIX;
pc->reg.type = PUNT_TYPE_EXCEPTION;
pc->reg.punt.exception.reason = reason;
@@ -460,7 +474,6 @@ punt_cli (vlib_main_t * vm,
unformat_input_t line_input, *input = &line_input;
clib_error_t *error = NULL;
bool is_add = true;
- /* *INDENT-OFF* */
punt_reg_t pr = {
.punt = {
.l4 = {
@@ -472,7 +485,6 @@ punt_cli (vlib_main_t * vm,
.type = PUNT_TYPE_L4,
};
u32 port;
- /* *INDENT-ON* */
if (!unformat_user (input__, unformat_line_input, input))
return 0;
@@ -538,13 +550,11 @@ done:
* @cliexcmd{set punt udp del all}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (punt_command, static) = {
.path = "set punt",
.short_help = "set punt [IPV4|ip6|ipv6] [UDP|tcp] [del] [ALL|<port-num>]",
.function = punt_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
punt_socket_register_cmd (vlib_main_t * vm,
@@ -554,7 +564,6 @@ punt_socket_register_cmd (vlib_main_t * vm,
unformat_input_t line_input, *input = &line_input;
u8 *socket_name = 0;
clib_error_t *error = NULL;
- /* *INDENT-OFF* */
punt_reg_t pr = {
.punt = {
.l4 = {
@@ -565,7 +574,6 @@ punt_socket_register_cmd (vlib_main_t * vm,
},
.type = PUNT_TYPE_L4,
};
- /* *INDENT-ON* */
if (!unformat_user (input__, unformat_line_input, input))
return 0;
@@ -613,7 +621,6 @@ done:
* @cliexcmd{punt socket register socket punt_l4_foo.sock}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (punt_socket_register_command, static) =
{
.path = "punt socket register",
@@ -621,7 +628,6 @@ VLIB_CLI_COMMAND (punt_socket_register_command, static) =
.short_help = "punt socket register [IPV4|ipv6] [UDP|tcp] [ALL|<port-num>] socket <socket>",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
punt_socket_deregister_cmd (vlib_main_t * vm,
@@ -630,7 +636,6 @@ punt_socket_deregister_cmd (vlib_main_t * vm,
{
unformat_input_t line_input, *input = &line_input;
clib_error_t *error = NULL;
- /* *INDENT-OFF* */
punt_reg_t pr = {
.punt = {
.l4 = {
@@ -641,7 +646,6 @@ punt_socket_deregister_cmd (vlib_main_t * vm,
},
.type = PUNT_TYPE_L4,
};
- /* *INDENT-ON* */
if (!unformat_user (input__, unformat_line_input, input))
return 0;
@@ -682,7 +686,6 @@ done:
* @cliexpar
* @cliexcmd{punt socket register}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (punt_socket_deregister_command, static) =
{
.path = "punt socket deregister",
@@ -690,7 +693,6 @@ VLIB_CLI_COMMAND (punt_socket_deregister_command, static) =
.short_help = "punt socket deregister [IPV4|ipv6] [UDP|tcp] [ALL|<port-num>]",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
void
punt_client_walk (punt_type_t pt, punt_client_walk_cb_t cb, void *ctx)
@@ -703,24 +705,20 @@ punt_client_walk (punt_type_t pt, punt_client_walk_cb_t cb, void *ctx)
{
u32 pci, key;
- /* *INDENT-OFF* */
hash_foreach(key, pci, pm->db.clients_by_l4_port,
({
cb (pool_elt_at_index(pm->punt_client_pool, pci), ctx);
}));
- /* *INDENT-ON* */
break;
}
case PUNT_TYPE_IP_PROTO:
{
u32 pci, key;
- /* *INDENT-OFF* */
hash_foreach(key, pci, pm->db.clients_by_ip_proto,
({
cb (pool_elt_at_index(pm->punt_client_pool, pci), ctx);
}));
- /* *INDENT-ON* */
break;
}
case PUNT_TYPE_EXCEPTION:
@@ -818,7 +816,6 @@ done:
* @cliexpar
* @cliexcmd{show punt socket ipv4}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_punt_socket_registration_command, static) =
{
.path = "show punt socket registrations",
@@ -826,7 +823,6 @@ VLIB_CLI_COMMAND (show_punt_socket_registration_command, static) =
.short_help = "show punt socket registrations [l4|exception]",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
clib_error_t *
ip_punt_init (vlib_main_t * vm)
diff --git a/src/vnet/ip/punt.h b/src/vnet/ip/punt.h
index a2612d60f07..e8495caad61 100644
--- a/src/vnet/ip/punt.h
+++ b/src/vnet/ip/punt.h
@@ -20,7 +20,12 @@
#ifndef included_punt_h
#define included_punt_h
+#ifdef __linux__
#include <linux/un.h>
+#elif __FreeBSD__
+#include <sys/un.h>
+#define UNIX_PATH_MAX SUNPATHLEN
+#endif /* __linux__ */
#include <stdbool.h>
#include <vnet/ip/ip.h>
@@ -239,6 +244,7 @@ extern vlib_node_registration_t udp4_punt_node;
extern vlib_node_registration_t udp6_punt_node;
extern vlib_node_registration_t udp4_punt_socket_node;
extern vlib_node_registration_t udp6_punt_socket_node;
+extern vlib_node_registration_t icmp6_punt_socket_node;
extern vlib_node_registration_t ip4_proto_punt_socket_node;
extern vlib_node_registration_t ip6_proto_punt_socket_node;
extern vlib_node_registration_t punt_socket_rx_node;
diff --git a/src/vnet/ip/punt_api.c b/src/vnet/ip/punt_api.c
index bcbf939f69d..20297af2e75 100644
--- a/src/vnet/ip/punt_api.c
+++ b/src/vnet/ip/punt_api.c
@@ -224,12 +224,10 @@ vl_api_punt_socket_register_t_handler (vl_api_punt_socket_register_t * mp)
char *p = vnet_punt_get_server_pathname ();
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_PUNT_SOCKET_REGISTER_REPLY,
({
memcpy ((char *) rmp->pathname, p, sizeof (rmp->pathname));
}));
- /* *INDENT-ON* */
}
typedef struct punt_socket_send_ctx_t_
diff --git a/src/vnet/ip/punt_node.c b/src/vnet/ip/punt_node.c
index 7f9beef0ffe..6400e49c626 100644
--- a/src/vnet/ip/punt_node.c
+++ b/src/vnet/ip/punt_node.c
@@ -23,6 +23,7 @@
*/
#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
#include <vlib/vlib.h>
#include <vnet/ip/punt.h>
#include <vlib/unix/unix.h>
@@ -182,7 +183,6 @@ VLIB_NODE_FN (udp6_punt_node) (vlib_main_t * vm,
return udp46_punt_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp4_punt_node) = {
.name = "ip4-udp-punt",
/* Takes a vector of packets. */
@@ -214,7 +214,6 @@ VLIB_REGISTER_NODE (udp6_punt_node) = {
#undef _
},
};
-/* *INDENT-ON* */
typedef struct
{
@@ -243,10 +242,9 @@ format_udp_punt_trace (u8 * s, va_list * args)
}
always_inline uword
-punt_socket_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame,
- punt_type_t pt, ip_address_family_t af)
+punt_socket_inline2 (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, punt_type_t pt,
+ ip_address_family_t af, ip_protocol_t protocol)
{
u32 *buffers = vlib_frame_vector_args (frame);
u32 thread_index = vm->thread_index;
@@ -266,33 +264,42 @@ punt_socket_inline (vlib_main_t * vm,
uword l;
punt_packetdesc_t packetdesc;
punt_client_t *c;
-
+ u16 port = 0;
b = vlib_get_buffer (vm, buffers[i]);
if (PUNT_TYPE_L4 == pt)
{
- /* Reverse UDP Punt advance */
- udp_header_t *udp;
- if (AF_IP4 == af)
+ if (protocol == IP_PROTOCOL_UDP)
{
- vlib_buffer_advance (b, -(sizeof (ip4_header_t) +
- sizeof (udp_header_t)));
- ip4_header_t *ip = vlib_buffer_get_current (b);
- udp = (udp_header_t *) (ip + 1);
+ /* Reverse UDP Punt advance */
+ udp_header_t *udp;
+ if (AF_IP4 == af)
+ {
+ vlib_buffer_advance (
+ b, -(sizeof (ip4_header_t) + sizeof (udp_header_t)));
+ ip4_header_t *ip = vlib_buffer_get_current (b);
+ udp = (udp_header_t *) (ip + 1);
+ }
+ else
+ {
+ vlib_buffer_advance (
+ b, -(sizeof (ip6_header_t) + sizeof (udp_header_t)));
+ ip6_header_t *ip = vlib_buffer_get_current (b);
+ udp = (udp_header_t *) (ip + 1);
+ }
+ port = clib_net_to_host_u16 (udp->dst_port);
}
- else
+ else if (protocol == IP_PROTOCOL_ICMP6)
{
- vlib_buffer_advance (b, -(sizeof (ip6_header_t) +
- sizeof (udp_header_t)));
ip6_header_t *ip = vlib_buffer_get_current (b);
- udp = (udp_header_t *) (ip + 1);
+ icmp46_header_t *icmp = ip6_next_header (ip);
+ port = icmp->type;
}
-
/*
* Find registerered client
* If no registered client, drop packet and count
*/
- c = punt_client_l4_get (af, clib_net_to_host_u16 (udp->dst_port));
+ c = punt_client_l4_get (af, port);
}
else if (PUNT_TYPE_IP_PROTO == pt)
{
@@ -339,7 +346,7 @@ punt_socket_inline (vlib_main_t * vm,
iov->iov_len = sizeof (packetdesc);
/** VLIB buffer chain -> Unix iovec(s). */
- vlib_buffer_advance (b, -(sizeof (ethernet_header_t)));
+ vlib_buffer_advance (b, -ethernet_buffer_header_size (b));
vec_add2 (ptd->iovecs, iov, 1);
iov->iov_base = b->data + b->current_data;
iov->iov_len = l = b->current_length;
@@ -396,6 +403,14 @@ error:
return n_packets;
}
+always_inline uword
+punt_socket_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, punt_type_t pt,
+ ip_address_family_t af)
+{
+ return punt_socket_inline2 (vm, node, frame, pt, af, IP_PROTOCOL_UDP);
+}
+
static uword
udp4_punt_socket (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * from_frame)
@@ -427,6 +442,14 @@ ip6_proto_punt_socket (vlib_main_t * vm,
}
static uword
+icmp6_punt_socket (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *from_frame)
+{
+ return punt_socket_inline2 (vm, node, from_frame, PUNT_TYPE_L4, AF_IP6,
+ IP_PROTOCOL_ICMP6);
+}
+
+static uword
exception_punt_socket (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * from_frame)
{
@@ -435,7 +458,6 @@ exception_punt_socket (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp4_punt_socket_node) = {
.function = udp4_punt_socket,
.name = "ip4-udp-punt-socket",
@@ -483,7 +505,16 @@ VLIB_REGISTER_NODE (exception_punt_socket_node) = {
.n_errors = PUNT_N_ERROR,
.error_strings = punt_error_strings,
};
-/* *INDENT-ON* */
+VLIB_REGISTER_NODE (icmp6_punt_socket_node) = {
+ .function = icmp6_punt_socket,
+ .name = "ip6-icmp-punt-socket",
+ .format_trace = format_udp_punt_trace,
+ .flags = VLIB_NODE_FLAG_IS_DROP,
+ .vector_size = sizeof (u32),
+ .n_errors = PUNT_N_ERROR,
+ .error_strings = punt_error_strings,
+};
+
typedef struct
{
@@ -614,7 +645,6 @@ punt_socket_rx (vlib_main_t * vm,
return total_count;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (punt_socket_rx_node) =
{
.function = punt_socket_rx,
@@ -633,7 +663,6 @@ VLIB_REGISTER_NODE (punt_socket_rx_node) =
},
.format_trace = format_punt_trace,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/reass/ip4_full_reass.c b/src/vnet/ip/reass/ip4_full_reass.c
index d2069c0876c..bab7d479dcf 100644
--- a/src/vnet/ip/reass/ip4_full_reass.c
+++ b/src/vnet/ip/reass/ip4_full_reass.c
@@ -23,16 +23,21 @@
#include <vppinfra/vec.h>
#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
+#include <vnet/ip/ip.api_enum.h>
#include <vppinfra/fifo.h>
#include <vppinfra/bihash_16_8.h>
#include <vnet/ip/reass/ip4_full_reass.h>
#include <stddef.h>
#define MSEC_PER_SEC 1000
-#define IP4_REASS_TIMEOUT_DEFAULT_MS 100
-#define IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default
+#define IP4_REASS_TIMEOUT_DEFAULT_MS 200
+
+/* As there are only 1024 reass context per thread, either the DDOS attacks
+ * or fractions of real timeouts, would consume these contexts quickly and
+ * running out context space and unable to perform reassembly */
+#define IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 50 // 50 ms default
#define IP4_REASS_MAX_REASSEMBLIES_DEFAULT 1024
-#define IP4_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3
+#define IP4_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3
#define IP4_REASS_HT_LOAD_FACTOR (0.75)
#define IP4_REASS_DEBUG_BUFFERS 0
@@ -68,21 +73,19 @@ typedef enum
typedef struct
{
- union
+ struct
{
- struct
- {
- u32 xx_id;
- ip4_address_t src;
- ip4_address_t dst;
- u16 frag_id;
- u8 proto;
- u8 unused;
- };
- u64 as_u64[2];
+ u16 frag_id;
+ u8 proto;
+ u8 unused;
+ u32 fib_index;
+ ip4_address_t src;
+ ip4_address_t dst;
};
} ip4_full_reass_key_t;
+STATIC_ASSERT_SIZEOF (ip4_full_reass_key_t, 16);
+
typedef union
{
struct
@@ -155,6 +158,8 @@ typedef struct
ip4_full_reass_t *pool;
u32 reass_n;
u32 id_counter;
+ // for pacing the main thread timeouts
+ u32 last_id;
clib_spinlock_t lock;
} ip4_full_reass_per_thread_t;
@@ -177,17 +182,19 @@ typedef struct
// convenience
vlib_main_t *vlib_main;
- // node index of ip4-drop node
- u32 ip4_drop_idx;
u32 ip4_full_reass_expire_node_idx;
/** Worker handoff */
u32 fq_index;
+ u32 fq_local_index;
u32 fq_feature_index;
u32 fq_custom_index;
// reference count for enabling/disabling feature - per interface
u32 *feature_use_refcount_per_intf;
+
+ // whether local fragmented packets are reassembled or not
+ int is_local_reass_enabled;
} ip4_full_reass_main_t;
extern ip4_full_reass_main_t ip4_full_reass_main;
@@ -219,6 +226,7 @@ typedef enum
RANGE_OVERLAP,
FINALIZE,
HANDOFF,
+ PASSTHROUGH,
} ip4_full_reass_trace_operation_e;
typedef struct
@@ -329,6 +337,9 @@ format_ip4_full_reass_trace (u8 * s, va_list * args)
format (s, "handoff from thread #%u to thread #%u", t->thread_id,
t->thread_id_to);
break;
+ case PASSTHROUGH:
+ s = format (s, "passthrough - not a fragment");
+ break;
}
return s;
}
@@ -404,13 +415,16 @@ ip4_full_reass_free (ip4_full_reass_main_t * rm,
ip4_full_reass_per_thread_t * rt,
ip4_full_reass_t * reass)
{
- clib_bihash_kv_16_8_t kv;
- kv.key[0] = reass->key.as_u64[0];
- kv.key[1] = reass->key.as_u64[1];
+ clib_bihash_kv_16_8_t kv = {};
+ clib_memcpy_fast (&kv, &reass->key, sizeof (kv.key));
clib_bihash_add_del_16_8 (&rm->hash, &kv, 0);
return ip4_full_reass_free_ctx (rt, reass);
}
+/* n_left_to_next, and to_next are taken as input params, as this function
+ * could be called from a graphnode, where its managing local copy of these
+ * variables, and ignoring those and still trying to enqueue the buffers
+ * with local variables would cause either buffer leak or corruption */
always_inline void
ip4_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node,
ip4_full_reass_t *reass)
@@ -419,58 +433,103 @@ ip4_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_buffer_t *range_b;
vnet_buffer_opaque_t *range_vnb;
u32 *to_free = NULL;
+
while (~0 != range_bi)
{
range_b = vlib_get_buffer (vm, range_bi);
range_vnb = vnet_buffer (range_b);
- u32 bi = range_bi;
- while (~0 != bi)
+
+ if (~0 != range_bi)
{
- vec_add1 (to_free, bi);
- vlib_buffer_t *b = vlib_get_buffer (vm, bi);
- if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
- {
- bi = b->next_buffer;
- b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
- }
- else
- {
- bi = ~0;
- }
+ vec_add1 (to_free, range_bi);
}
+
range_bi = range_vnb->ip.reass.next_range_bi;
}
+
/* send to next_error_index */
- if (~0 != reass->error_next_index)
+ if (~0 != reass->error_next_index &&
+ reass->error_next_index < node->n_next_nodes)
{
- u32 n_left_to_next, *to_next, next_index;
+ u32 n_free = vec_len (to_free);
+
+ /* record number of packets sent to custom app */
+ vlib_node_increment_counter (vm, node->node_index,
+ IP4_ERROR_REASS_TO_CUSTOM_APP, n_free);
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ for (u32 i = 0; i < n_free; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, to_free[i]);
+ if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
+ ip4_full_reass_add_trace (vm, node, reass, to_free[i],
+ RANGE_DISCARD, 0, ~0);
+ }
- next_index = reass->error_next_index;
- u32 bi = ~0;
+ vlib_buffer_enqueue_to_single_next (vm, node, to_free,
+ reass->error_next_index, n_free);
+ }
+ else
+ {
+ vlib_buffer_free (vm, to_free, vec_len (to_free));
+ }
+ vec_free (to_free);
+}
- while (vec_len (to_free) > 0)
- {
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+always_inline void
+sanitize_reass_buffers_add_missing (vlib_main_t *vm, ip4_full_reass_t *reass,
+ u32 *bi0)
+{
+ u32 range_bi = reass->first_bi;
+ vlib_buffer_t *range_b;
+ vnet_buffer_opaque_t *range_vnb;
- while (vec_len (to_free) > 0 && n_left_to_next > 0)
+ while (~0 != range_bi)
+ {
+ range_b = vlib_get_buffer (vm, range_bi);
+ range_vnb = vnet_buffer (range_b);
+ u32 bi = range_bi;
+ if (~0 != bi)
+ {
+ if (bi == *bi0)
+ *bi0 = ~0;
+ if (range_b->flags & VLIB_BUFFER_NEXT_PRESENT)
{
- bi = vec_pop (to_free);
-
- if (~0 != bi)
+ u32 _bi = bi;
+ vlib_buffer_t *_b = vlib_get_buffer (vm, _bi);
+ while (_b->flags & VLIB_BUFFER_NEXT_PRESENT)
{
- to_next[0] = bi;
- to_next += 1;
- n_left_to_next -= 1;
+ if (_b->next_buffer != range_vnb->ip.reass.next_range_bi)
+ {
+ _bi = _b->next_buffer;
+ _b = vlib_get_buffer (vm, _bi);
+ }
+ else
+ {
+ _b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
+ break;
+ }
}
}
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ range_bi = range_vnb->ip.reass.next_range_bi;
}
}
- else
+ if (*bi0 != ~0)
{
- vlib_buffer_free (vm, to_free, vec_len (to_free));
+ vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0);
+ vnet_buffer_opaque_t *fvnb = vnet_buffer (fb);
+ if (~0 != reass->first_bi)
+ {
+ fvnb->ip.reass.next_range_bi = reass->first_bi;
+ reass->first_bi = *bi0;
+ }
+ else
+ {
+ reass->first_bi = *bi0;
+ fvnb->ip.reass.next_range_bi = ~0;
+ }
+ *bi0 = ~0;
}
- vec_free (to_free);
}
always_inline void
@@ -484,10 +543,10 @@ ip4_full_reass_init (ip4_full_reass_t * reass)
}
always_inline ip4_full_reass_t *
-ip4_full_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip4_full_reass_main_t * rm,
- ip4_full_reass_per_thread_t * rt,
- ip4_full_reass_kv_t * kv, u8 * do_handoff)
+ip4_full_reass_find_or_create (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip4_full_reass_main_t *rm,
+ ip4_full_reass_per_thread_t *rt,
+ ip4_full_reass_kv_t *kv, u8 *do_handoff)
{
ip4_full_reass_t *reass;
f64 now;
@@ -510,6 +569,8 @@ again:
if (now > reass->last_heard + rm->timeout)
{
+ vlib_node_increment_counter (vm, node->node_index,
+ IP4_ERROR_REASS_TIMEOUT, 1);
ip4_full_reass_drop_all (vm, node, reass);
ip4_full_reass_free (rm, rt, reass);
reass = NULL;
@@ -538,8 +599,7 @@ again:
++rt->reass_n;
}
- reass->key.as_u64[0] = kv->kv.key[0];
- reass->key.as_u64[1] = kv->kv.key[1];
+ clib_memcpy_fast (&reass->key, &kv->kv.key, sizeof (reass->key));
kv->v.reass_index = (reass - rt->pool);
kv->v.memory_owner_thread_index = vm->thread_index;
reass->last_heard = now;
@@ -568,7 +628,6 @@ ip4_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_buffer_t *last_b = NULL;
u32 sub_chain_bi = reass->first_bi;
u32 total_length = 0;
- u32 buf_cnt = 0;
do
{
u32 tmp_bi = sub_chain_bi;
@@ -605,7 +664,6 @@ ip4_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_buffer_length_in_chain (vm, tmp) - trim_front - trim_end;
while (1)
{
- ++buf_cnt;
if (trim_front)
{
if (trim_front > tmp->current_length)
@@ -755,6 +813,16 @@ ip4_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
*next0 = reass->next_index;
}
vnet_buffer (first_b)->ip.reass.estimated_mtu = reass->min_fragment_length;
+
+ /* Keep track of number of successfully reassembled packets and number of
+ * fragments reassembled */
+ vlib_node_increment_counter (vm, node->node_index, IP4_ERROR_REASS_SUCCESS,
+ 1);
+
+ vlib_node_increment_counter (vm, node->node_index,
+ IP4_ERROR_REASS_FRAGMENTS_REASSEMBLED,
+ reass->fragments_n);
+
*error0 = IP4_ERROR_NONE;
ip4_full_reass_free (rm, rt, reass);
reass = NULL;
@@ -1090,199 +1158,216 @@ ip4_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
}
always_inline uword
-ip4_full_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame, ip4_full_reass_node_type_t type)
+ip4_full_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, ip4_full_reass_node_type_t type,
+ bool is_local)
{
u32 *from = vlib_frame_vector_args (frame);
- u32 n_left_from, n_left_to_next, *to_next, next_index;
+ u32 n_left, n_next = 0, to_next[VLIB_FRAME_SIZE];
ip4_full_reass_main_t *rm = &ip4_full_reass_main;
ip4_full_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
+ u16 nexts[VLIB_FRAME_SIZE];
+
clib_spinlock_lock (&rt->lock);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
- while (n_left_from > 0)
+ n_left = frame->n_vectors;
+ while (n_left > 0)
{
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0;
- vlib_buffer_t *b0;
- u32 next0;
- u32 error0 = IP4_ERROR_NONE;
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 error0 = IP4_ERROR_NONE;
- bi0 = from[0];
- b0 = vlib_get_buffer (vm, bi0);
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
- ip4_header_t *ip0 = vlib_buffer_get_current (b0);
- if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0))
+ ip4_header_t *ip0 = vlib_buffer_get_current (b0);
+ if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0))
+ {
+ // this is a whole packet - no fragmentation
+ if (CUSTOM != type)
{
- // this is a whole packet - no fragmentation
- if (CUSTOM != type)
- {
- next0 = IP4_FULL_REASS_NEXT_INPUT;
- }
- else
- {
- next0 = vnet_buffer (b0)->ip.reass.next_index;
- }
- goto packet_enqueue;
+ next0 = IP4_FULL_REASS_NEXT_INPUT;
}
- const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
- const u32 fragment_length =
- clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0);
- const u32 fragment_last = fragment_first + fragment_length - 1;
- if (fragment_first > fragment_last || fragment_first + fragment_length > UINT16_MAX - 20 || (fragment_length < 8 && ip4_get_fragment_more (ip0))) // 8 is minimum frag length per RFC 791
+ else
{
- next0 = IP4_FULL_REASS_NEXT_DROP;
- error0 = IP4_ERROR_REASS_MALFORMED_PACKET;
- goto packet_enqueue;
+ next0 = vnet_buffer (b0)->ip.reass.next_index;
}
- ip4_full_reass_kv_t kv;
- u8 do_handoff = 0;
-
- kv.k.as_u64[0] =
- (u64) vec_elt (ip4_main.fib_index_by_sw_if_index,
- vnet_buffer (b0)->sw_if_index[VLIB_RX]) |
- (u64) ip0->src_address.as_u32 << 32;
- kv.k.as_u64[1] =
- (u64) ip0->dst_address.
- as_u32 | (u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48;
-
- ip4_full_reass_t *reass =
- ip4_full_reass_find_or_create (vm, node, rm, rt, &kv,
- &do_handoff);
-
- if (reass)
+ ip4_full_reass_add_trace (vm, node, NULL, bi0, PASSTHROUGH, 0, ~0);
+ goto packet_enqueue;
+ }
+
+ if (is_local && !rm->is_local_reass_enabled)
+ {
+ next0 = IP4_FULL_REASS_NEXT_DROP;
+ goto packet_enqueue;
+ }
+
+ const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
+ const u32 fragment_length =
+ clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0);
+ const u32 fragment_last = fragment_first + fragment_length - 1;
+
+ /* Keep track of received fragments */
+ vlib_node_increment_counter (vm, node->node_index,
+ IP4_ERROR_REASS_FRAGMENTS_RCVD, 1);
+
+ if (fragment_first > fragment_last ||
+ fragment_first + fragment_length > UINT16_MAX - 20 ||
+ (fragment_length < 8 && // 8 is minimum frag length per RFC 791
+ ip4_get_fragment_more (ip0)))
+ {
+ next0 = IP4_FULL_REASS_NEXT_DROP;
+ error0 = IP4_ERROR_REASS_MALFORMED_PACKET;
+ goto packet_enqueue;
+ }
+
+ u32 fib_index = (vnet_buffer (b0)->sw_if_index[VLIB_TX] == (u32) ~0) ?
+ vec_elt (ip4_main.fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]) :
+ vnet_buffer (b0)->sw_if_index[VLIB_TX];
+
+ ip4_full_reass_kv_t kv = { .k.fib_index = fib_index,
+ .k.src.as_u32 = ip0->src_address.as_u32,
+ .k.dst.as_u32 = ip0->dst_address.as_u32,
+ .k.frag_id = ip0->fragment_id,
+ .k.proto = ip0->protocol
+
+ };
+ u8 do_handoff = 0;
+
+ ip4_full_reass_t *reass =
+ ip4_full_reass_find_or_create (vm, node, rm, rt, &kv, &do_handoff);
+
+ if (reass)
+ {
+ const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
+ if (0 == fragment_first)
{
- const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
- if (0 == fragment_first)
- {
- reass->sendout_thread_index = vm->thread_index;
- }
+ reass->sendout_thread_index = vm->thread_index;
}
+ }
- if (PREDICT_FALSE (do_handoff))
+ if (PREDICT_FALSE (do_handoff))
+ {
+ next0 = IP4_FULL_REASS_NEXT_HANDOFF;
+ vnet_buffer (b0)->ip.reass.owner_thread_index =
+ kv.v.memory_owner_thread_index;
+ }
+ else if (reass)
+ {
+ u32 handoff_thread_idx;
+ u32 counter = ~0;
+ switch (ip4_full_reass_update (vm, node, rm, rt, reass, &bi0, &next0,
+ &error0, CUSTOM == type,
+ &handoff_thread_idx))
{
+ case IP4_REASS_RC_OK:
+ /* nothing to do here */
+ break;
+ case IP4_REASS_RC_HANDOFF:
next0 = IP4_FULL_REASS_NEXT_HANDOFF;
+ b0 = vlib_get_buffer (vm, bi0);
vnet_buffer (b0)->ip.reass.owner_thread_index =
- kv.v.memory_owner_thread_index;
- }
- else if (reass)
- {
- u32 handoff_thread_idx;
- switch (ip4_full_reass_update
- (vm, node, rm, rt, reass, &bi0, &next0,
- &error0, CUSTOM == type, &handoff_thread_idx))
- {
- case IP4_REASS_RC_OK:
- /* nothing to do here */
- break;
- case IP4_REASS_RC_HANDOFF:
- next0 = IP4_FULL_REASS_NEXT_HANDOFF;
- b0 = vlib_get_buffer (vm, bi0);
- vnet_buffer (b0)->ip.reass.owner_thread_index =
- handoff_thread_idx;
- break;
- case IP4_REASS_RC_TOO_MANY_FRAGMENTS:
- vlib_node_increment_counter (vm, node->node_index,
- IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
- 1);
- ip4_full_reass_drop_all (vm, node, reass);
- ip4_full_reass_free (rm, rt, reass);
- goto next_packet;
- break;
- case IP4_REASS_RC_NO_BUF:
- vlib_node_increment_counter (vm, node->node_index,
- IP4_ERROR_REASS_NO_BUF, 1);
- ip4_full_reass_drop_all (vm, node, reass);
- ip4_full_reass_free (rm, rt, reass);
- goto next_packet;
- break;
- case IP4_REASS_RC_INTERNAL_ERROR:
- /* drop everything and start with a clean slate */
- vlib_node_increment_counter (vm, node->node_index,
- IP4_ERROR_REASS_INTERNAL_ERROR,
- 1);
- ip4_full_reass_drop_all (vm, node, reass);
- ip4_full_reass_free (rm, rt, reass);
- goto next_packet;
- break;
- }
+ handoff_thread_idx;
+ break;
+ case IP4_REASS_RC_TOO_MANY_FRAGMENTS:
+ counter = IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG;
+ break;
+ case IP4_REASS_RC_NO_BUF:
+ counter = IP4_ERROR_REASS_NO_BUF;
+ break;
+ case IP4_REASS_RC_INTERNAL_ERROR:
+ counter = IP4_ERROR_REASS_INTERNAL_ERROR;
+ /* Sanitization is needed in internal error cases only, as
+ * the incoming packet is already dropped in other cases,
+ * also adding bi0 back to the reassembly list, fixes the
+ * leaking of buffers during internal errors.
+ *
+ * Also it doesnt make sense to send these buffers custom
+ * app, these fragments are with internal errors */
+ sanitize_reass_buffers_add_missing (vm, reass, &bi0);
+ reass->error_next_index = ~0;
+ break;
}
- else
+
+ if (~0 != counter)
{
- next0 = IP4_FULL_REASS_NEXT_DROP;
- error0 = IP4_ERROR_REASS_LIMIT_REACHED;
+ vlib_node_increment_counter (vm, node->node_index, counter, 1);
+ ip4_full_reass_drop_all (vm, node, reass);
+ ip4_full_reass_free (rm, rt, reass);
+ goto next_packet;
}
+ }
+ else
+ {
+ next0 = IP4_FULL_REASS_NEXT_DROP;
+ error0 = IP4_ERROR_REASS_LIMIT_REACHED;
+ }
+ packet_enqueue:
- packet_enqueue:
-
- if (bi0 != ~0)
+ if (bi0 != ~0)
+ {
+ /* bi0 might have been updated by reass_finalize, reload */
+ b0 = vlib_get_buffer (vm, bi0);
+ if (IP4_ERROR_NONE != error0)
{
- to_next[0] = bi0;
- to_next += 1;
- n_left_to_next -= 1;
+ b0->error = node->errors[error0];
+ }
- /* bi0 might have been updated by reass_finalize, reload */
- b0 = vlib_get_buffer (vm, bi0);
- if (IP4_ERROR_NONE != error0)
+ if (next0 == IP4_FULL_REASS_NEXT_HANDOFF)
+ {
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- b0->error = node->errors[error0];
+ ip4_full_reass_add_trace (
+ vm, node, NULL, bi0, HANDOFF, 0,
+ vnet_buffer (b0)->ip.reass.owner_thread_index);
}
+ }
+ else if (FEATURE == type && IP4_ERROR_NONE == error0)
+ {
+ vnet_feature_next (&next0, b0);
+ }
- if (next0 == IP4_FULL_REASS_NEXT_HANDOFF)
- {
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- ip4_full_reass_add_trace (
- vm, node, NULL, bi0, HANDOFF, 0,
- vnet_buffer (b0)->ip.reass.owner_thread_index);
- }
- }
- else if (FEATURE == type && IP4_ERROR_NONE == error0)
- {
- vnet_feature_next (&next0, b0);
- }
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- IP4_REASS_DEBUG_BUFFER (bi0, enqueue_next);
+ /* Increment the counter to-custom-app also as this fragment is
+ * also going to application */
+ if (CUSTOM == type)
+ {
+ vlib_node_increment_counter (vm, node->node_index,
+ IP4_ERROR_REASS_TO_CUSTOM_APP, 1);
}
- next_packet:
- from += 1;
- n_left_from -= 1;
+ to_next[n_next] = bi0;
+ nexts[n_next] = next0;
+ n_next++;
+ IP4_REASS_DEBUG_BUFFER (bi0, enqueue_next);
}
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ next_packet:
+ from += 1;
+ n_left -= 1;
}
clib_spinlock_unlock (&rt->lock);
+
+ vlib_buffer_enqueue_to_next (vm, node, to_next, nexts, n_next);
return frame->n_vectors;
}
-static char *ip4_full_reass_error_strings[] = {
-#define _(sym, string) string,
- foreach_ip4_error
-#undef _
-};
-
VLIB_NODE_FN (ip4_full_reass_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_full_reass_inline (vm, node, frame, NORMAL);
+ return ip4_full_reass_inline (vm, node, frame, NORMAL, false /* is_local */);
}
VLIB_REGISTER_NODE (ip4_full_reass_node) = {
.name = "ip4-full-reassembly",
.vector_size = sizeof (u32),
.format_trace = format_ip4_full_reass_trace,
- .n_errors = ARRAY_LEN (ip4_full_reass_error_strings),
- .error_strings = ip4_full_reass_error_strings,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP4_FULL_REASS_N_NEXT,
.next_nodes =
{
@@ -1293,19 +1378,42 @@ VLIB_REGISTER_NODE (ip4_full_reass_node) = {
},
};
+VLIB_NODE_FN (ip4_local_full_reass_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip4_full_reass_inline (vm, node, frame, NORMAL, true /* is_local */);
+}
+
+VLIB_REGISTER_NODE (ip4_local_full_reass_node) = {
+ .name = "ip4-local-full-reassembly",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip4_full_reass_trace,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
+ .n_next_nodes = IP4_FULL_REASS_N_NEXT,
+ .next_nodes =
+ {
+ [IP4_FULL_REASS_NEXT_INPUT] = "ip4-input",
+ [IP4_FULL_REASS_NEXT_DROP] = "ip4-drop",
+ [IP4_FULL_REASS_NEXT_HANDOFF] = "ip4-local-full-reassembly-handoff",
+
+ },
+};
+
VLIB_NODE_FN (ip4_full_reass_node_feature) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_full_reass_inline (vm, node, frame, FEATURE);
+ return ip4_full_reass_inline (vm, node, frame, FEATURE,
+ false /* is_local */);
}
VLIB_REGISTER_NODE (ip4_full_reass_node_feature) = {
.name = "ip4-full-reassembly-feature",
.vector_size = sizeof (u32),
.format_trace = format_ip4_full_reass_trace,
- .n_errors = ARRAY_LEN (ip4_full_reass_error_strings),
- .error_strings = ip4_full_reass_error_strings,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP4_FULL_REASS_N_NEXT,
.next_nodes =
{
@@ -1316,26 +1424,26 @@ VLIB_REGISTER_NODE (ip4_full_reass_node_feature) = {
};
VNET_FEATURE_INIT (ip4_full_reass_feature, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "ip4-full-reassembly-feature",
- .runs_before = VNET_FEATURES ("ip4-lookup",
- "ipsec4-input-feature"),
- .runs_after = 0,
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-full-reassembly-feature",
+ .runs_before = VNET_FEATURES ("ip4-lookup", "ipsec4-input-feature",
+ "ip4-sv-reassembly-feature"),
+ .runs_after = 0,
};
VLIB_NODE_FN (ip4_full_reass_node_custom) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_full_reass_inline (vm, node, frame, CUSTOM);
+ return ip4_full_reass_inline (vm, node, frame, CUSTOM, false /* is_local */);
}
VLIB_REGISTER_NODE (ip4_full_reass_node_custom) = {
.name = "ip4-full-reassembly-custom",
.vector_size = sizeof (u32),
.format_trace = format_ip4_full_reass_trace,
- .n_errors = ARRAY_LEN (ip4_full_reass_error_strings),
- .error_strings = ip4_full_reass_error_strings,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP4_FULL_REASS_N_NEXT,
.next_nodes =
{
@@ -1345,15 +1453,6 @@ VLIB_REGISTER_NODE (ip4_full_reass_node_custom) = {
},
};
-VNET_FEATURE_INIT (ip4_full_reass_custom, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "ip4-full-reassembly-feature",
- .runs_before = VNET_FEATURES ("ip4-lookup",
- "ipsec4-input-feature"),
- .runs_after = 0,
-};
-
-
#ifndef CLIB_MARCH_VARIANT
uword
ip4_full_reass_custom_register_next_node (uword node_index)
@@ -1369,7 +1468,9 @@ ip4_full_reass_get_nbuckets ()
u32 nbuckets;
u8 i;
- nbuckets = (u32) (rm->max_reass_n / IP4_REASS_HT_LOAD_FACTOR);
+ /* need more mem with more workers */
+ nbuckets = (u32) (rm->max_reass_n * (vlib_num_workers () + 1) /
+ IP4_REASS_HT_LOAD_FACTOR);
for (i = 0; i < 31; i++)
if ((1 << i) >= nbuckets)
@@ -1495,17 +1596,17 @@ ip4_full_reass_init_function (vlib_main_t * vm)
nbuckets = ip4_full_reass_get_nbuckets ();
clib_bihash_init_16_8 (&rm->hash, "ip4-dr", nbuckets, nbuckets * 1024);
- node = vlib_get_node_by_name (vm, (u8 *) "ip4-drop");
- ASSERT (node);
- rm->ip4_drop_idx = node->index;
-
rm->fq_index = vlib_frame_queue_main_init (ip4_full_reass_node.index, 0);
+ rm->fq_local_index =
+ vlib_frame_queue_main_init (ip4_local_full_reass_node.index, 0);
rm->fq_feature_index =
vlib_frame_queue_main_init (ip4_full_reass_node_feature.index, 0);
rm->fq_custom_index =
vlib_frame_queue_main_init (ip4_full_reass_node_custom.index, 0);
rm->feature_use_refcount_per_intf = NULL;
+ rm->is_local_reass_enabled = 1;
+
return error;
}
@@ -1547,6 +1648,7 @@ ip4_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
uword thread_index = 0;
int index;
const uword nthreads = vlib_num_workers () + 1;
+
for (thread_index = 0; thread_index < nthreads; ++thread_index)
{
ip4_full_reass_per_thread_t *rt =
@@ -1554,13 +1656,39 @@ ip4_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
clib_spinlock_lock (&rt->lock);
vec_reset_length (pool_indexes_to_free);
- pool_foreach_index (index, rt->pool) {
- reass = pool_elt_at_index (rt->pool, index);
- if (now > reass->last_heard + rm->timeout)
- {
- vec_add1 (pool_indexes_to_free, index);
- }
- }
+
+ /* Pace the number of timeouts handled per thread,to avoid barrier
+ * sync issues in real world scenarios */
+
+ u32 beg = rt->last_id;
+ /* to ensure we walk at least once per sec per context */
+ u32 end =
+ beg + (IP4_REASS_MAX_REASSEMBLIES_DEFAULT *
+ IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS / MSEC_PER_SEC +
+ 1);
+ if (end > vec_len (rt->pool))
+ {
+ end = vec_len (rt->pool);
+ rt->last_id = 0;
+ }
+ else
+ {
+ rt->last_id = end;
+ }
+
+ pool_foreach_stepping_index (index, beg, end, rt->pool)
+ {
+ reass = pool_elt_at_index (rt->pool, index);
+ if (now > reass->last_heard + rm->timeout)
+ {
+ vec_add1 (pool_indexes_to_free, index);
+ }
+ }
+
+ if (vec_len (pool_indexes_to_free))
+ vlib_node_increment_counter (vm, node->node_index,
+ IP4_ERROR_REASS_TIMEOUT,
+ vec_len (pool_indexes_to_free));
int *i;
vec_foreach (i, pool_indexes_to_free)
{
@@ -1575,7 +1703,7 @@ ip4_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
vec_free (pool_indexes_to_free);
if (event_data)
{
- _vec_len (event_data) = 0;
+ vec_set_len (event_data, 0);
}
}
@@ -1583,13 +1711,12 @@ ip4_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
}
VLIB_REGISTER_NODE (ip4_full_reass_expire_node) = {
- .function = ip4_full_reass_walk_expired,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "ip4-full-reassembly-expire-walk",
- .format_trace = format_ip4_full_reass_trace,
- .n_errors = ARRAY_LEN (ip4_full_reass_error_strings),
- .error_strings = ip4_full_reass_error_strings,
-
+ .function = ip4_full_reass_walk_expired,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "ip4-full-reassembly-expire-walk",
+ .format_trace = format_ip4_full_reass_trace,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
};
static u8 *
@@ -1597,9 +1724,8 @@ format_ip4_full_reass_key (u8 * s, va_list * args)
{
ip4_full_reass_key_t *key = va_arg (*args, ip4_full_reass_key_t *);
s =
- format (s,
- "xx_id: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
- key->xx_id, format_ip4_address, &key->src, format_ip4_address,
+ format (s, "fib_index: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
+ key->fib_index, format_ip4_address, &key->src, format_ip4_address,
&key->dst, clib_net_to_host_u16 (key->frag_id), key->proto);
return s;
}
@@ -1750,10 +1876,10 @@ format_ip4_full_reass_handoff_trace (u8 * s, va_list * args)
}
always_inline uword
-ip4_full_reass_handoff_node_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame,
- ip4_full_reass_node_type_t type)
+ip4_full_reass_handoff_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame,
+ ip4_full_reass_node_type_t type,
+ bool is_local)
{
ip4_full_reass_main_t *rm = &ip4_full_reass_main;
@@ -1772,7 +1898,14 @@ ip4_full_reass_handoff_node_inline (vlib_main_t * vm,
switch (type)
{
case NORMAL:
- fq_index = rm->fq_index;
+ if (is_local)
+ {
+ fq_index = rm->fq_local_index;
+ }
+ else
+ {
+ fq_index = rm->fq_index;
+ }
break;
case FEATURE:
fq_index = rm->fq_feature_index;
@@ -1782,7 +1915,6 @@ ip4_full_reass_handoff_node_inline (vlib_main_t * vm,
break;
default:
clib_warning ("Unexpected `type' (%d)!", type);
- ASSERT (0);
}
while (n_left_from > 0)
@@ -1816,7 +1948,8 @@ VLIB_NODE_FN (ip4_full_reass_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_full_reass_handoff_node_inline (vm, node, frame, NORMAL);
+ return ip4_full_reass_handoff_node_inline (vm, node, frame, NORMAL,
+ false /* is_local */);
}
@@ -1834,16 +1967,36 @@ VLIB_REGISTER_NODE (ip4_full_reass_handoff_node) = {
},
};
+VLIB_NODE_FN (ip4_local_full_reass_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip4_full_reass_handoff_node_inline (vm, node, frame, NORMAL,
+ true /* is_local */);
+}
+
+VLIB_REGISTER_NODE (ip4_local_full_reass_handoff_node) = {
+ .name = "ip4-local-full-reassembly-handoff",
+ .vector_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(ip4_full_reass_handoff_error_strings),
+ .error_strings = ip4_full_reass_handoff_error_strings,
+ .format_trace = format_ip4_full_reass_handoff_trace,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
VLIB_NODE_FN (ip4_full_reass_feature_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t *
node,
vlib_frame_t * frame)
{
- return ip4_full_reass_handoff_node_inline (vm, node, frame, FEATURE);
+ return ip4_full_reass_handoff_node_inline (vm, node, frame, FEATURE,
+ false /* is_local */);
}
-
VLIB_REGISTER_NODE (ip4_full_reass_feature_handoff_node) = {
.name = "ip4-full-reass-feature-hoff",
.vector_size = sizeof (u32),
@@ -1863,10 +2016,10 @@ VLIB_NODE_FN (ip4_full_reass_custom_handoff_node) (vlib_main_t * vm,
node,
vlib_frame_t * frame)
{
- return ip4_full_reass_handoff_node_inline (vm, node, frame, CUSTOM);
+ return ip4_full_reass_handoff_node_inline (vm, node, frame, CUSTOM,
+ false /* is_local */);
}
-
VLIB_REGISTER_NODE (ip4_full_reass_custom_handoff_node) = {
.name = "ip4-full-reass-custom-hoff",
.vector_size = sizeof (u32),
@@ -1906,8 +2059,28 @@ ip4_full_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
"ip4-full-reassembly-feature",
sw_if_index, 0, 0, 0);
}
- return -1;
+ return 0;
}
+
+void
+ip4_local_full_reass_enable_disable (int enable)
+{
+ if (enable)
+ {
+ ip4_full_reass_main.is_local_reass_enabled = 1;
+ }
+ else
+ {
+ ip4_full_reass_main.is_local_reass_enabled = 0;
+ }
+}
+
+int
+ip4_local_full_reass_enabled ()
+{
+ return ip4_full_reass_main.is_local_reass_enabled;
+}
+
#endif
/*
diff --git a/src/vnet/ip/reass/ip4_full_reass.h b/src/vnet/ip/reass/ip4_full_reass.h
index 000c80c5906..5df8107ca48 100644
--- a/src/vnet/ip/reass/ip4_full_reass.h
+++ b/src/vnet/ip/reass/ip4_full_reass.h
@@ -47,6 +47,9 @@ int ip4_full_reass_enable_disable_with_refcnt (u32 sw_if_index,
int is_enable);
uword ip4_full_reass_custom_register_next_node (uword node_index);
+
+void ip4_local_full_reass_enable_disable (int enable);
+int ip4_local_full_reass_enabled ();
#endif /* __included_ip4_full_reass_h__ */
/*
diff --git a/src/vnet/ip/reass/ip4_sv_reass.c b/src/vnet/ip/reass/ip4_sv_reass.c
index cd5e19b65d3..7c3c2fff217 100644
--- a/src/vnet/ip/reass/ip4_sv_reass.c
+++ b/src/vnet/ip/reass/ip4_sv_reass.c
@@ -48,7 +48,7 @@ typedef struct
{
struct
{
- u32 xx_id;
+ u32 fib_index;
ip4_address_t src;
ip4_address_t dst;
u16 frag_id;
@@ -150,6 +150,7 @@ typedef struct
/** Worker handoff */
u32 fq_index;
u32 fq_feature_index;
+ u32 fq_custom_context_index;
// reference count for enabling/disabling feature - per interface
u32 *feature_use_refcount_per_intf;
@@ -189,6 +190,7 @@ typedef struct
u8 ip_proto;
u16 l4_src_port;
u16 l4_dst_port;
+ int l4_layer_truncated;
} ip4_sv_reass_trace_t;
extern vlib_node_registration_t ip4_sv_reass_node;
@@ -225,6 +227,10 @@ format_ip4_sv_reass_trace (u8 * s, va_list * args)
s = format (s, "[not-fragmented]");
break;
}
+ if (t->l4_layer_truncated)
+ {
+ s = format (s, " [l4-layer-truncated]");
+ }
return s;
}
@@ -232,7 +238,8 @@ static void
ip4_sv_reass_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
ip4_sv_reass_t *reass, u32 bi,
ip4_sv_reass_trace_operation_e action, u32 ip_proto,
- u16 l4_src_port, u16 l4_dst_port)
+ u16 l4_src_port, u16 l4_dst_port,
+ int l4_layer_truncated)
{
vlib_buffer_t *b = vlib_get_buffer (vm, bi);
if (pool_is_free_index
@@ -253,6 +260,7 @@ ip4_sv_reass_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
t->ip_proto = ip_proto;
t->l4_src_port = l4_src_port;
t->l4_dst_port = l4_dst_port;
+ t->l4_layer_truncated = l4_layer_truncated;
#if 0
static u8 *s = NULL;
s = format (s, "%U", format_ip4_sv_reass_trace, NULL, NULL, t);
@@ -314,6 +322,8 @@ ip4_sv_reass_find_or_create (vlib_main_t * vm, ip4_sv_reass_main_t * rm,
ip4_sv_reass_t *reass = NULL;
f64 now = vlib_time_now (vm);
+again:
+
if (!clib_bihash_search_16_8 (&rm->hash, &kv->kv, &kv->kv))
{
if (vm->thread_index != kv->v.thread_index)
@@ -368,10 +378,14 @@ ip4_sv_reass_find_or_create (vlib_main_t * vm, ip4_sv_reass_main_t * rm,
kv->v.thread_index = vm->thread_index;
reass->last_heard = now;
- if (clib_bihash_add_del_16_8 (&rm->hash, &kv->kv, 1))
+ int rv = clib_bihash_add_del_16_8 (&rm->hash, &kv->kv, 2);
+ if (rv)
{
ip4_sv_reass_free (vm, rm, rt, reass);
reass = NULL;
+ // if other worker created a context already work with the other copy
+ if (-2 == rv)
+ goto again;
}
return reass;
@@ -407,9 +421,10 @@ ip4_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- ip4_sv_reass_add_trace (vm, node, reass, bi0, REASS_FINISH,
- reass->ip_proto, reass->l4_src_port,
- reass->l4_dst_port);
+ ip4_sv_reass_add_trace (
+ vm, node, reass, bi0, REASS_FINISH, reass->ip_proto,
+ reass->l4_src_port, reass->l4_dst_port,
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated);
}
}
vec_add1 (reass->cached_buffers, bi0);
@@ -417,8 +432,9 @@ ip4_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
{
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- ip4_sv_reass_add_trace (vm, node, reass, bi0, REASS_FRAGMENT_CACHE,
- ~0, ~0, ~0);
+ ip4_sv_reass_add_trace (
+ vm, node, reass, bi0, REASS_FRAGMENT_CACHE, ~0, ~0, ~0,
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated);
}
if (vec_len (reass->cached_buffers) > rm->max_reass_len)
{
@@ -428,15 +444,33 @@ ip4_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
return rc;
}
+always_inline int
+l4_layer_truncated (ip4_header_t *ip)
+{
+ static const int l4_layer_length[256] = {
+ [IP_PROTOCOL_TCP] = sizeof (tcp_header_t),
+ [IP_PROTOCOL_UDP] = sizeof (udp_header_t),
+ [IP_PROTOCOL_ICMP] = sizeof (icmp46_header_t),
+ };
+
+ return ((u8 *) ip + ip4_header_bytes (ip) + l4_layer_length[ip->protocol] >
+ (u8 *) ip + clib_net_to_host_u16 (ip->length));
+}
+
always_inline uword
-ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame, bool is_feature,
- bool is_output_feature, bool is_custom)
+ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_feature,
+ bool is_output_feature, bool is_custom,
+ bool with_custom_context)
{
u32 *from = vlib_frame_vector_args (frame);
- u32 n_left_from, n_left_to_next, *to_next, next_index;
+ u32 n_left_from, n_left_to_next, *to_next, *to_next_aux, next_index;
ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
+ u32 *context;
+ if (with_custom_context)
+ context = vlib_frame_aux_args (frame);
+
clib_spinlock_lock (&rt->lock);
n_left_from = frame->n_vectors;
@@ -482,6 +516,7 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
(is_output_feature ? 1 : 0) *
vnet_buffer (b1)->
ip.save_rewrite_length);
+
if (PREDICT_FALSE
(ip4_get_fragment_more (ip0) || ip4_get_fragment_offset (ip0))
|| (ip4_get_fragment_more (ip1) || ip4_get_fragment_offset (ip1)))
@@ -506,29 +541,40 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0;
vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol;
- if (IP_PROTOCOL_TCP == ip0->protocol)
+ if (l4_layer_truncated (ip0))
{
- vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- ((tcp_header_t *) (ip0 + 1))->flags;
- vnet_buffer (b0)->ip.reass.tcp_ack_number =
- ((tcp_header_t *) (ip0 + 1))->ack_number;
- vnet_buffer (b0)->ip.reass.tcp_seq_number =
- ((tcp_header_t *) (ip0 + 1))->seq_number;
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated = 1;
+ vnet_buffer (b0)->ip.reass.l4_src_port = 0;
+ vnet_buffer (b0)->ip.reass.l4_dst_port = 0;
}
- else if (IP_PROTOCOL_ICMP == ip0->protocol)
+ else
{
- vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- ((icmp46_header_t *) (ip0 + 1))->type;
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated = 0;
+ if (IP_PROTOCOL_TCP == ip0->protocol)
+ {
+ vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+ ((tcp_header_t *) (ip0 + 1))->flags;
+ vnet_buffer (b0)->ip.reass.tcp_ack_number =
+ ((tcp_header_t *) (ip0 + 1))->ack_number;
+ vnet_buffer (b0)->ip.reass.tcp_seq_number =
+ ((tcp_header_t *) (ip0 + 1))->seq_number;
+ }
+ else if (IP_PROTOCOL_ICMP == ip0->protocol)
+ {
+ vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+ ((icmp46_header_t *) (ip0 + 1))->type;
+ }
+ vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
+ vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
}
- vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
- vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- ip4_sv_reass_add_trace (vm, node, NULL, from[(b - 2) - bufs],
- REASS_PASSTHROUGH,
- vnet_buffer (b0)->ip.reass.ip_proto,
- vnet_buffer (b0)->ip.reass.l4_src_port,
- vnet_buffer (b0)->ip.reass.l4_dst_port);
+ ip4_sv_reass_add_trace (
+ vm, node, NULL, from[(b - 2) - bufs], REASS_PASSTHROUGH,
+ vnet_buffer (b0)->ip.reass.ip_proto,
+ vnet_buffer (b0)->ip.reass.l4_src_port,
+ vnet_buffer (b0)->ip.reass.l4_dst_port,
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated);
}
if (is_feature)
{
@@ -541,35 +587,48 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
vnet_buffer (b1)->ip.reass.is_non_first_fragment = 0;
vnet_buffer (b1)->ip.reass.ip_proto = ip1->protocol;
- if (IP_PROTOCOL_TCP == ip1->protocol)
+ if (l4_layer_truncated (ip1))
{
- vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags =
- ((tcp_header_t *) (ip1 + 1))->flags;
- vnet_buffer (b1)->ip.reass.tcp_ack_number =
- ((tcp_header_t *) (ip1 + 1))->ack_number;
- vnet_buffer (b1)->ip.reass.tcp_seq_number =
- ((tcp_header_t *) (ip1 + 1))->seq_number;
+ vnet_buffer (b1)->ip.reass.l4_layer_truncated = 1;
+ vnet_buffer (b1)->ip.reass.l4_src_port = 0;
+ vnet_buffer (b1)->ip.reass.l4_dst_port = 0;
}
- else if (IP_PROTOCOL_ICMP == ip1->protocol)
+ else
{
- vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags =
- ((icmp46_header_t *) (ip1 + 1))->type;
+ vnet_buffer (b1)->ip.reass.l4_layer_truncated = 0;
+ if (IP_PROTOCOL_TCP == ip1->protocol)
+ {
+ vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags =
+ ((tcp_header_t *) (ip1 + 1))->flags;
+ vnet_buffer (b1)->ip.reass.tcp_ack_number =
+ ((tcp_header_t *) (ip1 + 1))->ack_number;
+ vnet_buffer (b1)->ip.reass.tcp_seq_number =
+ ((tcp_header_t *) (ip1 + 1))->seq_number;
+ }
+ else if (IP_PROTOCOL_ICMP == ip1->protocol)
+ {
+ vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags =
+ ((icmp46_header_t *) (ip1 + 1))->type;
+ }
+ vnet_buffer (b1)->ip.reass.l4_src_port = ip4_get_port (ip1, 1);
+ vnet_buffer (b1)->ip.reass.l4_dst_port = ip4_get_port (ip1, 0);
}
- vnet_buffer (b1)->ip.reass.l4_src_port = ip4_get_port (ip1, 1);
- vnet_buffer (b1)->ip.reass.l4_dst_port = ip4_get_port (ip1, 0);
if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
{
- ip4_sv_reass_add_trace (vm, node, NULL, from[(b - 1) - bufs],
- REASS_PASSTHROUGH,
- vnet_buffer (b1)->ip.reass.ip_proto,
- vnet_buffer (b1)->ip.reass.l4_src_port,
- vnet_buffer (b1)->ip.reass.l4_dst_port);
+ ip4_sv_reass_add_trace (
+ vm, node, NULL, from[(b - 1) - bufs], REASS_PASSTHROUGH,
+ vnet_buffer (b1)->ip.reass.ip_proto,
+ vnet_buffer (b1)->ip.reass.l4_src_port,
+ vnet_buffer (b1)->ip.reass.l4_dst_port,
+ vnet_buffer (b1)->ip.reass.l4_layer_truncated);
}
n_left_from -= 2;
next[0] = next0;
next[1] = next1;
next += 2;
+ if (with_custom_context)
+ context += 2;
}
while (n_left_from > 0)
@@ -608,34 +667,45 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0;
vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol;
- if (IP_PROTOCOL_TCP == ip0->protocol)
+ if (l4_layer_truncated (ip0))
{
- vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- ((tcp_header_t *) (ip0 + 1))->flags;
- vnet_buffer (b0)->ip.reass.tcp_ack_number =
- ((tcp_header_t *) (ip0 + 1))->ack_number;
- vnet_buffer (b0)->ip.reass.tcp_seq_number =
- ((tcp_header_t *) (ip0 + 1))->seq_number;
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated = 1;
}
- else if (IP_PROTOCOL_ICMP == ip0->protocol)
+ else
{
- vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- ((icmp46_header_t *) (ip0 + 1))->type;
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated = 0;
+ if (IP_PROTOCOL_TCP == ip0->protocol)
+ {
+ vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+ ((tcp_header_t *) (ip0 + 1))->flags;
+ vnet_buffer (b0)->ip.reass.tcp_ack_number =
+ ((tcp_header_t *) (ip0 + 1))->ack_number;
+ vnet_buffer (b0)->ip.reass.tcp_seq_number =
+ ((tcp_header_t *) (ip0 + 1))->seq_number;
+ }
+ else if (IP_PROTOCOL_ICMP == ip0->protocol)
+ {
+ vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+ ((icmp46_header_t *) (ip0 + 1))->type;
+ }
+ vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
+ vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
}
- vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
- vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- ip4_sv_reass_add_trace (vm, node, NULL, from[(b - 1) - bufs],
- REASS_PASSTHROUGH,
- vnet_buffer (b0)->ip.reass.ip_proto,
- vnet_buffer (b0)->ip.reass.l4_src_port,
- vnet_buffer (b0)->ip.reass.l4_dst_port);
+ ip4_sv_reass_add_trace (
+ vm, node, NULL, from[(b - 1) - bufs], REASS_PASSTHROUGH,
+ vnet_buffer (b0)->ip.reass.ip_proto,
+ vnet_buffer (b0)->ip.reass.l4_src_port,
+ vnet_buffer (b0)->ip.reass.l4_dst_port,
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated);
}
n_left_from -= 1;
next[0] = next0;
next += 1;
+ if (with_custom_context)
+ context += 1;
}
vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
@@ -649,7 +719,11 @@ slow_path:
while (n_left_from > 0)
{
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ if (with_custom_context)
+ vlib_get_next_frame_with_aux_safe (vm, node, next_index, to_next,
+ to_next_aux, n_left_to_next);
+ else
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
while (n_left_from > 0 && n_left_to_next > 0)
{
@@ -657,6 +731,7 @@ slow_path:
vlib_buffer_t *b0;
u32 next0;
u32 error0 = IP4_ERROR_NONE;
+ u8 forward_context = 0;
bi0 = from[0];
b0 = vlib_get_buffer (vm, bi0);
@@ -679,29 +754,42 @@ slow_path:
}
vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0;
vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol;
- if (IP_PROTOCOL_TCP == ip0->protocol)
+ if (l4_layer_truncated (ip0))
{
- vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- ((tcp_header_t *) (ip0 + 1))->flags;
- vnet_buffer (b0)->ip.reass.tcp_ack_number =
- ((tcp_header_t *) (ip0 + 1))->ack_number;
- vnet_buffer (b0)->ip.reass.tcp_seq_number =
- ((tcp_header_t *) (ip0 + 1))->seq_number;
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated = 1;
+ vnet_buffer (b0)->ip.reass.l4_src_port = 0;
+ vnet_buffer (b0)->ip.reass.l4_dst_port = 0;
}
- else if (IP_PROTOCOL_ICMP == ip0->protocol)
+ else
{
- vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- ((icmp46_header_t *) (ip0 + 1))->type;
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated = 0;
+ if (IP_PROTOCOL_TCP == ip0->protocol)
+ {
+ vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+ ((tcp_header_t *) (ip0 + 1))->flags;
+ vnet_buffer (b0)->ip.reass.tcp_ack_number =
+ ((tcp_header_t *) (ip0 + 1))->ack_number;
+ vnet_buffer (b0)->ip.reass.tcp_seq_number =
+ ((tcp_header_t *) (ip0 + 1))->seq_number;
+ }
+ else if (IP_PROTOCOL_ICMP == ip0->protocol)
+ {
+ vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+ ((icmp46_header_t *) (ip0 + 1))->type;
+ }
+ vnet_buffer (b0)->ip.reass.l4_src_port =
+ ip4_get_port (ip0, 1);
+ vnet_buffer (b0)->ip.reass.l4_dst_port =
+ ip4_get_port (ip0, 0);
}
- vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
- vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
ip4_sv_reass_add_trace (
vm, node, NULL, bi0, REASS_PASSTHROUGH,
vnet_buffer (b0)->ip.reass.ip_proto,
vnet_buffer (b0)->ip.reass.l4_src_port,
- vnet_buffer (b0)->ip.reass.l4_dst_port);
+ vnet_buffer (b0)->ip.reass.l4_dst_port,
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated);
}
goto packet_enqueue;
}
@@ -719,13 +807,17 @@ slow_path:
ip4_sv_reass_kv_t kv;
u8 do_handoff = 0;
- kv.k.as_u64[0] =
- (u64) vec_elt (ip4_main.fib_index_by_sw_if_index,
- vnet_buffer (b0)->sw_if_index[VLIB_RX]) |
- (u64) ip0->src_address.as_u32 << 32;
- kv.k.as_u64[1] =
- (u64) ip0->dst_address.
- as_u32 | (u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48;
+ if (with_custom_context)
+ kv.k.as_u64[0] = (u64) *context | (u64) ip0->src_address.as_u32
+ << 32;
+ else
+ kv.k.as_u64[0] =
+ (u64) vec_elt (ip4_main.fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]) |
+ (u64) ip0->src_address.as_u32 << 32;
+ kv.k.as_u64[1] = (u64) ip0->dst_address.as_u32 |
+ (u64) ip0->fragment_id << 32 |
+ (u64) ip0->protocol << 48;
ip4_sv_reass_t *reass =
ip4_sv_reass_find_or_create (vm, rm, rt, &kv, &do_handoff);
@@ -735,6 +827,8 @@ slow_path:
next0 = IP4_SV_REASSEMBLY_NEXT_HANDOFF;
vnet_buffer (b0)->ip.reass.owner_thread_index =
kv.v.thread_index;
+ if (with_custom_context)
+ forward_context = 1;
goto packet_enqueue;
}
@@ -771,31 +865,32 @@ slow_path:
{
ip4_sv_reass_add_trace (
vm, node, reass, bi0, REASS_FRAGMENT_FORWARD,
- reass->ip_proto, reass->l4_src_port, reass->l4_dst_port);
+ reass->ip_proto, reass->l4_src_port, reass->l4_dst_port,
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated);
}
goto packet_enqueue;
}
ip4_sv_reass_rc_t rc =
ip4_sv_reass_update (vm, node, rm, ip0, reass, bi0);
+ u32 counter = ~0;
switch (rc)
{
case IP4_SV_REASS_RC_OK:
/* nothing to do here */
break;
case IP4_SV_REASS_RC_TOO_MANY_FRAGMENTS:
- vlib_node_increment_counter (vm, node->node_index,
- IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
- 1);
- ip4_sv_reass_free (vm, rm, rt, reass);
- goto next_packet;
+ counter = IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG;
break;
case IP4_SV_REASS_RC_UNSUPP_IP_PROTO:
- vlib_node_increment_counter (vm, node->node_index,
- IP4_ERROR_REASS_UNSUPP_IP_PROT, 1);
+ counter = IP4_ERROR_REASS_UNSUPP_IP_PROT;
+ break;
+ }
+ if (~0 != counter)
+ {
+ vlib_node_increment_counter (vm, node->node_index, counter, 1);
ip4_sv_reass_free (vm, rm, rt, reass);
goto next_packet;
- break;
}
if (reass->is_complete)
{
@@ -843,13 +938,15 @@ slow_path:
{
ip4_sv_reass_add_trace (
vm, node, reass, bi0, REASS_FRAGMENT_FORWARD,
- reass->ip_proto, reass->l4_src_port, reass->l4_dst_port);
+ reass->ip_proto, reass->l4_src_port, reass->l4_dst_port,
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated);
}
vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
to_next, n_left_to_next, bi0,
next0);
}
- _vec_len (reass->cached_buffers) = 0; // buffers are owned by frame now
+ vec_set_len (reass->cached_buffers,
+ 0); // buffers are owned by frame now
}
goto next_packet;
@@ -862,13 +959,26 @@ slow_path:
b0 = vlib_get_buffer (vm, bi0);
vnet_feature_next (&next0, b0);
}
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
+ if (with_custom_context && forward_context)
+ {
+ if (to_next_aux)
+ {
+ to_next_aux[0] = *context;
+ to_next_aux += 1;
+ }
+ vlib_validate_buffer_enqueue_with_aux_x1 (
+ vm, node, next_index, to_next, to_next_aux, n_left_to_next,
+ bi0, *context, next0);
+ }
+ else
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
next_packet:
from += 1;
n_left_from -= 1;
+ if (with_custom_context)
+ context += 1;
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
@@ -879,28 +989,21 @@ done:
return frame->n_vectors;
}
-static char *ip4_sv_reass_error_strings[] = {
-#define _(sym, string) string,
- foreach_ip4_error
-#undef _
-};
-
VLIB_NODE_FN (ip4_sv_reass_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ ,
- false /* is_output_feature */ ,
- false /* is_custom */ );
+ return ip4_sv_reass_inline (
+ vm, node, frame, false /* is_feature */, false /* is_output_feature */,
+ false /* is_custom */, false /* with_custom_context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_node) = {
.name = "ip4-sv-reassembly",
.vector_size = sizeof (u32),
.format_trace = format_ip4_sv_reass_trace,
- .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
- .error_strings = ip4_sv_reass_error_strings,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
.next_nodes =
{
@@ -910,24 +1013,22 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node) = {
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip4_sv_reass_node_feature) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_sv_reass_inline (vm, node, frame, true /* is_feature */ ,
- false /* is_output_feature */ ,
- false /* is_custom */ );
+ return ip4_sv_reass_inline (
+ vm, node, frame, true /* is_feature */, false /* is_output_feature */,
+ false /* is_custom */, false /* with_custom_context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_node_feature) = {
.name = "ip4-sv-reassembly-feature",
.vector_size = sizeof (u32),
.format_trace = format_ip4_sv_reass_trace,
- .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
- .error_strings = ip4_sv_reass_error_strings,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
.next_nodes =
{
@@ -936,34 +1037,30 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node_feature) = {
[IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip4_sv_reass_feature) = {
.arc_name = "ip4-unicast",
.node_name = "ip4-sv-reassembly-feature",
.runs_before = VNET_FEATURES ("ip4-lookup"),
.runs_after = 0,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip4_sv_reass_node_output_feature) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_sv_reass_inline (vm, node, frame, true /* is_feature */ ,
- true /* is_output_feature */ ,
- false /* is_custom */ );
+ return ip4_sv_reass_inline (
+ vm, node, frame, true /* is_feature */, true /* is_output_feature */,
+ false /* is_custom */, false /* with_custom_context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_node_output_feature) = {
.name = "ip4-sv-reassembly-output-feature",
.vector_size = sizeof (u32),
.format_trace = format_ip4_sv_reass_trace,
- .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
- .error_strings = ip4_sv_reass_error_strings,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
.next_nodes =
{
@@ -972,24 +1069,20 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node_output_feature) = {
[IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip4_sv_reass_output_feature) = {
.arc_name = "ip4-output",
.node_name = "ip4-sv-reassembly-output-feature",
.runs_before = 0,
.runs_after = 0,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = {
.name = "ip4-sv-reassembly-custom-next",
.vector_size = sizeof (u32),
.format_trace = format_ip4_sv_reass_trace,
- .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
- .error_strings = ip4_sv_reass_error_strings,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
.next_nodes =
{
@@ -999,15 +1092,39 @@ VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = {
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip4_sv_reass_custom_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ ,
- false /* is_output_feature */ ,
- true /* is_custom */ );
+ return ip4_sv_reass_inline (
+ vm, node, frame, false /* is_feature */, false /* is_output_feature */,
+ true /* is_custom */, false /* with_custom_context */);
+}
+
+VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_node) = {
+ .name = "ip4-sv-reassembly-custom-context",
+ .vector_size = sizeof (u32),
+ .aux_size = sizeof(u32),
+ .format_trace = format_ip4_sv_reass_trace,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
+ .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
+ .next_nodes =
+ {
+ [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
+ [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
+ [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reassembly-custom-context-handoff",
+
+ },
+};
+
+VLIB_NODE_FN (ip4_sv_reass_custom_context_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip4_sv_reass_inline (
+ vm, node, frame, false /* is_feature */, false /* is_output_feature */,
+ true /* is_custom */, true /* with_custom_context */);
}
#ifndef CLIB_MARCH_VARIANT
@@ -1152,6 +1269,8 @@ ip4_sv_reass_init_function (vlib_main_t * vm)
rm->fq_index = vlib_frame_queue_main_init (ip4_sv_reass_node.index, 0);
rm->fq_feature_index =
vlib_frame_queue_main_init (ip4_sv_reass_node_feature.index, 0);
+ rm->fq_custom_context_index =
+ vlib_frame_queue_main_init (ip4_sv_reass_custom_context_node.index, 0);
rm->feature_use_refcount_per_intf = NULL;
rm->output_feature_use_refcount_per_intf = NULL;
@@ -1204,7 +1323,6 @@ ip4_sv_reass_walk_expired (vlib_main_t *vm,
clib_spinlock_lock (&rt->lock);
vec_reset_length (pool_indexes_to_free);
- /* *INDENT-OFF* */
pool_foreach_index (index, rt->pool) {
reass = pool_elt_at_index (rt->pool, index);
if (now > reass->last_heard + rm->timeout)
@@ -1212,15 +1330,12 @@ ip4_sv_reass_walk_expired (vlib_main_t *vm,
vec_add1 (pool_indexes_to_free, index);
}
}
- /* *INDENT-ON* */
int *i;
- /* *INDENT-OFF* */
vec_foreach (i, pool_indexes_to_free)
{
ip4_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
ip4_sv_reass_free (vm, rm, rt, reass);
}
- /* *INDENT-ON* */
clib_spinlock_unlock (&rt->lock);
}
@@ -1228,33 +1343,29 @@ ip4_sv_reass_walk_expired (vlib_main_t *vm,
vec_free (pool_indexes_to_free);
if (event_data)
{
- _vec_len (event_data) = 0;
+ vec_set_len (event_data, 0);
}
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_expire_node) = {
- .function = ip4_sv_reass_walk_expired,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "ip4-sv-reassembly-expire-walk",
- .format_trace = format_ip4_sv_reass_trace,
- .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
- .error_strings = ip4_sv_reass_error_strings,
-
+ .function = ip4_sv_reass_walk_expired,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "ip4-sv-reassembly-expire-walk",
+ .format_trace = format_ip4_sv_reass_trace,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
};
-/* *INDENT-ON* */
static u8 *
format_ip4_sv_reass_key (u8 * s, va_list * args)
{
ip4_sv_reass_key_t *key = va_arg (*args, ip4_sv_reass_key_t *);
s =
- format (s,
- "xx_id: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
- key->xx_id, format_ip4_address, &key->src, format_ip4_address,
+ format (s, "fib_index: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
+ key->fib_index, format_ip4_address, &key->src, format_ip4_address,
&key->dst, clib_net_to_host_u16 (key->frag_id), key->proto);
return s;
}
@@ -1313,11 +1424,9 @@ show_ip4_reass (vlib_main_t * vm,
clib_spinlock_lock (&rt->lock);
if (details)
{
- /* *INDENT-OFF* */
pool_foreach (reass, rt->pool) {
vlib_cli_output (vm, "%U", format_ip4_sv_reass, vm, reass);
}
- /* *INDENT-ON* */
}
sum_reass_n += rt->reass_n;
clib_spinlock_unlock (&rt->lock);
@@ -1341,13 +1450,11 @@ show_ip4_reass (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip4_sv_reass_cmd, static) = {
.path = "show ip4-sv-reassembly",
.short_help = "show ip4-sv-reassembly [details]",
.function = show_ip4_reass,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
vnet_api_error_t
@@ -1398,25 +1505,30 @@ format_ip4_sv_reass_handoff_trace (u8 * s, va_list * args)
}
always_inline uword
-ip4_sv_reass_handoff_node_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, bool is_feature)
+ip4_sv_reass_handoff_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_feature,
+ bool is_custom_context)
{
ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
- u32 n_enq, n_left_from, *from;
+ u32 n_enq, n_left_from, *from, *context;
u16 thread_indices[VLIB_FRAME_SIZE], *ti;
u32 fq_index;
from = vlib_frame_vector_args (frame);
+ if (is_custom_context)
+ context = vlib_frame_aux_args (frame);
+
n_left_from = frame->n_vectors;
vlib_get_buffers (vm, from, bufs, n_left_from);
b = bufs;
ti = thread_indices;
- fq_index = (is_feature) ? rm->fq_feature_index : rm->fq_index;
+ fq_index = (is_feature) ? rm->fq_feature_index :
+ (is_custom_context ? rm->fq_custom_context_index :
+ rm->fq_index);
while (n_left_from > 0)
{
@@ -1435,8 +1547,12 @@ ip4_sv_reass_handoff_node_inline (vlib_main_t * vm,
ti += 1;
b += 1;
}
- n_enq = vlib_buffer_enqueue_to_thread (vm, node, fq_index, from,
- thread_indices, frame->n_vectors, 1);
+ if (is_custom_context)
+ n_enq = vlib_buffer_enqueue_to_thread_with_aux (
+ vm, node, fq_index, from, context, thread_indices, frame->n_vectors, 1);
+ else
+ n_enq = vlib_buffer_enqueue_to_thread (
+ vm, node, fq_index, from, thread_indices, frame->n_vectors, 1);
if (n_enq < frame->n_vectors)
vlib_node_increment_counter (vm, node->node_index,
@@ -1449,12 +1565,11 @@ VLIB_NODE_FN (ip4_sv_reass_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_sv_reass_handoff_node_inline (vm, node, frame,
- false /* is_feature */ );
+ return ip4_sv_reass_handoff_node_inline (
+ vm, node, frame, false /* is_feature */, false /* is_custom_context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_handoff_node) = {
.name = "ip4-sv-reassembly-handoff",
.vector_size = sizeof (u32),
@@ -1468,22 +1583,39 @@ VLIB_REGISTER_NODE (ip4_sv_reass_handoff_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
+VLIB_NODE_FN (ip4_sv_reass_custom_context_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip4_sv_reass_handoff_node_inline (
+ vm, node, frame, false /* is_feature */, true /* is_custom_context */);
+}
+
+VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_handoff_node) = {
+ .name = "ip4-sv-reassembly-custom-context-handoff",
+ .vector_size = sizeof (u32),
+ .aux_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(ip4_sv_reass_handoff_error_strings),
+ .error_strings = ip4_sv_reass_handoff_error_strings,
+ .format_trace = format_ip4_sv_reass_handoff_trace,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
-/* *INDENT-OFF* */
VLIB_NODE_FN (ip4_sv_reass_feature_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t *
node,
vlib_frame_t * frame)
{
- return ip4_sv_reass_handoff_node_inline (vm, node, frame,
- true /* is_feature */ );
+ return ip4_sv_reass_handoff_node_inline (
+ vm, node, frame, true /* is_feature */, false /* is_custom_context */);
}
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_feature_handoff_node) = {
.name = "ip4-sv-reass-feature-hoff",
.vector_size = sizeof (u32),
@@ -1497,7 +1629,6 @@ VLIB_REGISTER_NODE (ip4_sv_reass_feature_handoff_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
int
@@ -1535,6 +1666,13 @@ ip4_sv_reass_custom_register_next_node (uword node_index)
node_index);
}
+uword
+ip4_sv_reass_custom_context_register_next_node (uword node_index)
+{
+ return vlib_node_add_next (
+ vlib_get_main (), ip4_sv_reass_custom_context_node.index, node_index);
+}
+
int
ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index,
int is_enable)
diff --git a/src/vnet/ip/reass/ip4_sv_reass.h b/src/vnet/ip/reass/ip4_sv_reass.h
index e926dbeebcc..3a684eb9809 100644
--- a/src/vnet/ip/reass/ip4_sv_reass.h
+++ b/src/vnet/ip/reass/ip4_sv_reass.h
@@ -49,6 +49,7 @@ int ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index,
int is_enable);
uword ip4_sv_reass_custom_register_next_node (uword node_index);
+uword ip4_sv_reass_custom_context_register_next_node (uword node_index);
#endif /* __included_ip4_sv_reass_h__ */
diff --git a/src/vnet/ip/reass/ip6_full_reass.c b/src/vnet/ip/reass/ip6_full_reass.c
index 9ec40cd347c..27647985877 100644
--- a/src/vnet/ip/reass/ip6_full_reass.c
+++ b/src/vnet/ip/reass/ip6_full_reass.c
@@ -25,10 +25,14 @@
#include <vnet/ip/ip.h>
#include <vppinfra/bihash_48_8.h>
#include <vnet/ip/reass/ip6_full_reass.h>
+#include <vnet/ip/ip6_inlines.h>
#define MSEC_PER_SEC 1000
-#define IP6_FULL_REASS_TIMEOUT_DEFAULT_MS 100
-#define IP6_FULL_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default
+#define IP6_FULL_REASS_TIMEOUT_DEFAULT_MS 200
+/* As there are only 1024 reass context per thread, either the DDOS attacks
+ * or fractions of real timeouts, would consume these contexts quickly and
+ * running out context space and unable to perform reassembly */
+#define IP6_FULL_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 50 // 50 ms default
#define IP6_FULL_REASS_MAX_REASSEMBLIES_DEFAULT 1024
#define IP6_FULL_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3
#define IP6_FULL_REASS_HT_LOAD_FACTOR (0.75)
@@ -40,6 +44,8 @@ typedef enum
IP6_FULL_REASS_RC_TOO_MANY_FRAGMENTS,
IP6_FULL_REASS_RC_NO_BUF,
IP6_FULL_REASS_RC_HANDOFF,
+ IP6_FULL_REASS_RC_INVALID_FRAG_LEN,
+ IP6_FULL_REASS_RC_OVERLAP,
} ip6_full_reass_rc_t;
typedef struct
@@ -132,6 +138,8 @@ typedef struct
ip6_full_reass_t *pool;
u32 reass_n;
u32 id_counter;
+ // for pacing the main thread timeouts
+ u32 last_id;
clib_spinlock_t lock;
} ip6_full_reass_per_thread_t;
@@ -155,17 +163,20 @@ typedef struct
// convenience
vlib_main_t *vlib_main;
- // node index of ip6-drop node
- u32 ip6_drop_idx;
u32 ip6_icmp_error_idx;
u32 ip6_full_reass_expire_node_idx;
/** Worker handoff */
u32 fq_index;
+ u32 fq_local_index;
u32 fq_feature_index;
+ u32 fq_custom_index;
// reference count for enabling/disabling feature - per interface
u32 *feature_use_refcount_per_intf;
+
+ // whether local fragmented packets are reassembled or not
+ int is_local_reass_enabled;
} ip6_full_reass_main_t;
extern ip6_full_reass_main_t ip6_full_reass_main;
@@ -185,13 +196,22 @@ typedef enum
typedef enum
{
+ NORMAL,
+ FEATURE,
+ CUSTOM
+} ip6_full_reass_node_type_t;
+
+typedef enum
+{
RANGE_NEW,
+ RANGE_DISCARD,
RANGE_OVERLAP,
ICMP_ERROR_RT_EXCEEDED,
ICMP_ERROR_FL_TOO_BIG,
ICMP_ERROR_FL_NOT_MULT_8,
FINALIZE,
HANDOFF,
+ PASSTHROUGH,
} ip6_full_reass_trace_operation_e;
typedef struct
@@ -278,6 +298,10 @@ format_ip6_full_reass_trace (u8 * s, va_list * args)
s = format (s, "\n%Unew %U", format_white_space, indent,
format_ip6_full_reass_range_trace, &t->trace_range);
break;
+ case RANGE_DISCARD:
+ s = format (s, "\n%Udiscard %U", format_white_space, indent,
+ format_ip6_full_reass_range_trace, &t->trace_range);
+ break;
case RANGE_OVERLAP:
s = format (s, "\n%Uoverlap %U", format_white_space, indent,
format_ip6_full_reass_range_trace, &t->trace_range);
@@ -304,6 +328,9 @@ format_ip6_full_reass_trace (u8 * s, va_list * args)
format (s, "handoff from thread #%u to thread #%u", t->thread_id,
t->thread_id_to);
break;
+ case PASSTHROUGH:
+ s = format (s, "passthrough - not a fragment");
+ break;
}
return s;
}
@@ -396,59 +423,69 @@ ip6_full_reass_free (ip6_full_reass_main_t * rm,
ip6_full_reass_free_ctx (rt, reass);
}
+/* n_left_to_next, and to_next are taken as input params, as this function
+ * could be called from a graphnode, where its managing local copy of these
+ * variables, and ignoring those and still trying to enqueue the buffers
+ * with local variables would cause either buffer leak or corruption */
always_inline void
ip6_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node,
- ip6_full_reass_t *reass)
+ ip6_full_reass_t *reass, u32 *n_left_to_next,
+ u32 **to_next)
{
u32 range_bi = reass->first_bi;
vlib_buffer_t *range_b;
vnet_buffer_opaque_t *range_vnb;
u32 *to_free = NULL;
+
while (~0 != range_bi)
{
range_b = vlib_get_buffer (vm, range_bi);
range_vnb = vnet_buffer (range_b);
- u32 bi = range_bi;
- while (~0 != bi)
+
+ if (~0 != range_bi)
{
- vec_add1 (to_free, bi);
- vlib_buffer_t *b = vlib_get_buffer (vm, bi);
- if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
- {
- bi = b->next_buffer;
- b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
- }
- else
- {
- bi = ~0;
- }
+ vec_add1 (to_free, range_bi);
}
range_bi = range_vnb->ip.reass.next_range_bi;
}
+
/* send to next_error_index */
- if (~0 != reass->error_next_index)
+ if (~0 != reass->error_next_index &&
+ reass->error_next_index < node->n_next_nodes)
{
- u32 n_left_to_next, *to_next, next_index;
+ u32 next_index;
next_index = reass->error_next_index;
u32 bi = ~0;
+ /* record number of packets sent to custom app */
+ vlib_node_increment_counter (vm, node->node_index,
+ IP6_ERROR_REASS_TO_CUSTOM_APP,
+ vec_len (to_free));
+
while (vec_len (to_free) > 0)
{
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ vlib_get_next_frame (vm, node, next_index, *to_next,
+ (*n_left_to_next));
- while (vec_len (to_free) > 0 && n_left_to_next > 0)
+ while (vec_len (to_free) > 0 && (*n_left_to_next) > 0)
{
bi = vec_pop (to_free);
if (~0 != bi)
{
- to_next[0] = bi;
- to_next += 1;
- n_left_to_next -= 1;
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ip6_full_reass_add_trace (vm, node, reass, bi, NULL,
+ RANGE_DISCARD, ~0);
+ }
+ *to_next[0] = bi;
+ (*to_next) += 1;
+ (*n_left_to_next) -= 1;
}
}
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ vlib_put_next_frame (vm, node, next_index, (*n_left_to_next));
}
}
else
@@ -459,8 +496,65 @@ ip6_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node,
}
always_inline void
-ip6_full_reass_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip6_full_reass_t * reass, u32 * icmp_bi)
+sanitize_reass_buffers_add_missing (vlib_main_t *vm, ip6_full_reass_t *reass,
+ u32 *bi0)
+{
+ u32 range_bi = reass->first_bi;
+ vlib_buffer_t *range_b;
+ vnet_buffer_opaque_t *range_vnb;
+
+ while (~0 != range_bi)
+ {
+ range_b = vlib_get_buffer (vm, range_bi);
+ range_vnb = vnet_buffer (range_b);
+ u32 bi = range_bi;
+ if (~0 != bi)
+ {
+ if (bi == *bi0)
+ *bi0 = ~0;
+ if (range_b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ u32 _bi = bi;
+ vlib_buffer_t *_b = vlib_get_buffer (vm, _bi);
+ while (_b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ if (_b->next_buffer != range_vnb->ip.reass.next_range_bi)
+ {
+ _bi = _b->next_buffer;
+ _b = vlib_get_buffer (vm, _bi);
+ }
+ else
+ {
+ _b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
+ break;
+ }
+ }
+ }
+ range_bi = range_vnb->ip.reass.next_range_bi;
+ }
+ }
+ if (*bi0 != ~0)
+ {
+ vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0);
+ vnet_buffer_opaque_t *fvnb = vnet_buffer (fb);
+ if (~0 != reass->first_bi)
+ {
+ fvnb->ip.reass.next_range_bi = reass->first_bi;
+ reass->first_bi = *bi0;
+ }
+ else
+ {
+ reass->first_bi = *bi0;
+ fvnb->ip.reass.next_range_bi = ~0;
+ }
+ *bi0 = ~0;
+ }
+}
+
+always_inline void
+ip6_full_reass_on_timeout (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip6_full_reass_t *reass, u32 *icmp_bi,
+ u32 *n_left_to_next, u32 **to_next)
{
if (~0 == reass->first_bi)
{
@@ -493,15 +587,16 @@ ip6_full_reass_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * node,
0);
}
}
- ip6_full_reass_drop_all (vm, node, reass);
+ ip6_full_reass_drop_all (vm, node, reass, n_left_to_next, to_next);
}
always_inline ip6_full_reass_t *
-ip6_full_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip6_full_reass_main_t * rm,
- ip6_full_reass_per_thread_t * rt,
- ip6_full_reass_kv_t * kv, u32 * icmp_bi,
- u8 * do_handoff)
+ip6_full_reass_find_or_create (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip6_full_reass_main_t *rm,
+ ip6_full_reass_per_thread_t *rt,
+ ip6_full_reass_kv_t *kv, u32 *icmp_bi,
+ u8 *do_handoff, int skip_bihash,
+ u32 *n_left_to_next, u32 **to_next)
{
ip6_full_reass_t *reass;
f64 now;
@@ -511,7 +606,7 @@ again:
reass = NULL;
now = vlib_time_now (vm);
- if (!clib_bihash_search_48_8 (&rm->hash, &kv->kv, &kv->kv))
+ if (!skip_bihash && !clib_bihash_search_48_8 (&rm->hash, &kv->kv, &kv->kv))
{
if (vm->thread_index != kv->v.memory_owner_thread_index)
{
@@ -526,7 +621,10 @@ again:
if (now > reass->last_heard + rm->timeout)
{
- ip6_full_reass_on_timeout (vm, node, reass, icmp_bi);
+ vlib_node_increment_counter (vm, node->node_index,
+ IP6_ERROR_REASS_TIMEOUT, 1);
+ ip6_full_reass_on_timeout (vm, node, reass, icmp_bi, n_left_to_next,
+ to_next);
ip6_full_reass_free (rm, rt, reass);
reass = NULL;
}
@@ -554,27 +652,41 @@ again:
reass->data_len = 0;
reass->next_index = ~0;
reass->error_next_index = ~0;
+ reass->memory_owner_thread_index = vm->thread_index;
++rt->reass_n;
}
- reass->key.as_u64[0] = kv->kv.key[0];
- reass->key.as_u64[1] = kv->kv.key[1];
- reass->key.as_u64[2] = kv->kv.key[2];
- reass->key.as_u64[3] = kv->kv.key[3];
- reass->key.as_u64[4] = kv->kv.key[4];
- reass->key.as_u64[5] = kv->kv.key[5];
kv->v.reass_index = (reass - rt->pool);
kv->v.memory_owner_thread_index = vm->thread_index;
reass->last_heard = now;
- int rv = clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 2);
- if (rv)
+ if (!skip_bihash)
{
- ip6_full_reass_free (rm, rt, reass);
- reass = NULL;
- // if other worker created a context already work with the other copy
- if (-2 == rv)
- goto again;
+ reass->key.as_u64[0] = kv->kv.key[0];
+ reass->key.as_u64[1] = kv->kv.key[1];
+ reass->key.as_u64[2] = kv->kv.key[2];
+ reass->key.as_u64[3] = kv->kv.key[3];
+ reass->key.as_u64[4] = kv->kv.key[4];
+ reass->key.as_u64[5] = kv->kv.key[5];
+
+ int rv = clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 2);
+ if (rv)
+ {
+ ip6_full_reass_free (rm, rt, reass);
+ reass = NULL;
+ // if other worker created a context already work with the other copy
+ if (-2 == rv)
+ goto again;
+ }
+ }
+ else
+ {
+ reass->key.as_u64[0] = ~0;
+ reass->key.as_u64[1] = ~0;
+ reass->key.as_u64[2] = ~0;
+ reass->key.as_u64[3] = ~0;
+ reass->key.as_u64[4] = ~0;
+ reass->key.as_u64[5] = ~0;
}
return reass;
@@ -593,8 +705,6 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_buffer_t *last_b = NULL;
u32 sub_chain_bi = reass->first_bi;
u32 total_length = 0;
- u32 buf_cnt = 0;
- u32 dropped_cnt = 0;
u32 *vec_drop_compress = NULL;
ip6_full_reass_rc_t rv = IP6_FULL_REASS_RC_OK;
do
@@ -636,19 +746,18 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_buffer_length_in_chain (vm, tmp) - trim_front - trim_end;
while (1)
{
- ++buf_cnt;
if (trim_front)
{
if (trim_front > tmp->current_length)
{
/* drop whole buffer */
- vec_add1 (vec_drop_compress, tmp_bi);
- trim_front -= tmp->current_length;
if (!(tmp->flags & VLIB_BUFFER_NEXT_PRESENT))
{
rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
goto free_buffers_and_return;
}
+ trim_front -= tmp->current_length;
+ vec_add1 (vec_drop_compress, tmp_bi);
tmp->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
tmp_bi = tmp->next_buffer;
tmp = vlib_get_buffer (vm, tmp_bi);
@@ -686,13 +795,12 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
}
else
{
- vec_add1 (vec_drop_compress, tmp_bi);
if (reass->first_bi == tmp_bi)
{
rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
goto free_buffers_and_return;
}
- ++dropped_cnt;
+ vec_add1 (vec_drop_compress, tmp_bi);
}
if (tmp->flags & VLIB_BUFFER_NEXT_PRESENT)
{
@@ -729,19 +837,27 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
vnet_buffer_opaque_t *first_b_vnb = vnet_buffer (first_b);
ip6_header_t *ip = vlib_buffer_get_current (first_b);
u16 ip6_frag_hdr_offset = first_b_vnb->ip.reass.ip6_frag_hdr_offset;
- ip6_ext_header_t *prev_hdr;
- frag_hdr =
- ip6_ext_header_find (vm, first_b, ip, IP_PROTOCOL_IPV6_FRAGMENTATION,
- &prev_hdr);
- if (prev_hdr)
+ ip6_ext_hdr_chain_t hdr_chain;
+ ip6_ext_header_t *prev_hdr = 0;
+ int res = ip6_ext_header_walk (first_b, ip, IP_PROTOCOL_IPV6_FRAGMENTATION,
+ &hdr_chain);
+ if (res < 0 ||
+ (hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION))
{
+ rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
+ goto free_buffers_and_return;
+ }
+ frag_hdr = ip6_ext_next_header_offset (ip, hdr_chain.eh[res].offset);
+ if (res > 0)
+ {
+ prev_hdr = ip6_ext_next_header_offset (ip, hdr_chain.eh[res - 1].offset);
prev_hdr->next_hdr = frag_hdr->next_hdr;
}
else
{
ip->protocol = frag_hdr->next_hdr;
}
- if (!((u8 *) frag_hdr - (u8 *) ip == ip6_frag_hdr_offset))
+ if (hdr_chain.eh[res].offset != ip6_frag_hdr_offset)
{
rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
goto free_buffers_and_return;
@@ -799,6 +915,15 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
*next0 = reass->next_index;
}
vnet_buffer (first_b)->ip.reass.estimated_mtu = reass->min_fragment_length;
+ /* Keep track of number of successfully reassembled packets and number of
+ * fragments reassembled */
+ vlib_node_increment_counter (vm, node->node_index, IP6_ERROR_REASS_SUCCESS,
+ 1);
+
+ vlib_node_increment_counter (vm, node->node_index,
+ IP6_ERROR_REASS_FRAGMENTS_REASSEMBLED,
+ reass->fragments_n);
+
ip6_full_reass_free (rm, rt, reass);
reass = NULL;
free_buffers_and_return:
@@ -834,12 +959,13 @@ ip6_full_reass_insert_range_in_chain (vlib_main_t * vm,
}
always_inline ip6_full_reass_rc_t
-ip6_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip6_full_reass_main_t * rm,
- ip6_full_reass_per_thread_t * rt,
- ip6_full_reass_t * reass, u32 * bi0, u32 * next0,
- u32 * error0, ip6_frag_hdr_t * frag_hdr,
- bool is_custom_app, u32 * handoff_thread_idx)
+ip6_full_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip6_full_reass_main_t *rm,
+ ip6_full_reass_per_thread_t *rt,
+ ip6_full_reass_t *reass, u32 *bi0, u32 *next0,
+ u32 *error0, ip6_frag_hdr_t *frag_hdr,
+ bool is_custom_app, u32 *handoff_thread_idx,
+ int skip_bihash)
{
int consumed = 0;
vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0);
@@ -865,6 +991,10 @@ ip6_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
u32 fragment_length =
vlib_buffer_length_in_chain (vm, fb) -
(fvnb->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr));
+ if (0 == fragment_length)
+ {
+ return IP6_FULL_REASS_RC_INVALID_FRAG_LEN;
+ }
u32 fragment_last = fvnb->ip.reass.fragment_last =
fragment_first + fragment_length - 1;
int more_fragments = ip6_frag_hdr_more (frag_hdr);
@@ -929,11 +1059,7 @@ ip6_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
ip6_full_reass_add_trace (vm, node, reass, *bi0, frag_hdr,
RANGE_OVERLAP, ~0);
}
- ip6_full_reass_drop_all (vm, node, reass);
- ip6_full_reass_free (rm, rt, reass);
- *next0 = IP6_FULL_REASSEMBLY_NEXT_DROP;
- *error0 = IP6_ERROR_REASS_OVERLAPPING_FRAGMENT;
- return IP6_FULL_REASS_RC_OK;
+ return IP6_FULL_REASS_RC_OVERLAP;
}
break;
}
@@ -947,6 +1073,12 @@ check_if_done_maybe:
~0);
}
}
+ else if (skip_bihash)
+ {
+ // if this reassembly is not in bihash, then the packet must have been
+ // consumed
+ return IP6_FULL_REASS_RC_INTERNAL_ERROR;
+ }
if (~0 != reass->last_packet_octet &&
reass->data_len == reass->last_packet_octet + 1)
{
@@ -964,6 +1096,12 @@ check_if_done_maybe:
}
else
{
+ if (skip_bihash)
+ {
+ // if this reassembly is not in bihash, it should've been an atomic
+ // fragment and thus finalized
+ return IP6_FULL_REASS_RC_INTERNAL_ERROR;
+ }
if (consumed)
{
*bi0 = ~0;
@@ -982,31 +1120,28 @@ check_if_done_maybe:
}
always_inline bool
-ip6_full_reass_verify_upper_layer_present (vlib_node_runtime_t * node,
- vlib_buffer_t * b,
- ip6_frag_hdr_t * frag_hdr)
+ip6_full_reass_verify_upper_layer_present (vlib_node_runtime_t *node,
+ vlib_buffer_t *b,
+ ip6_ext_hdr_chain_t *hc)
{
- ip6_ext_header_t *tmp = (ip6_ext_header_t *) frag_hdr;
- while (ip6_ext_hdr (tmp->next_hdr))
- {
- tmp = ip6_ext_next_header (tmp);
- }
- if (IP_PROTOCOL_IP6_NONXT == tmp->next_hdr)
+ int nh = hc->eh[hc->length - 1].protocol;
+ /* Checking to see if it's a terminating header */
+ if (ip6_ext_hdr (nh))
{
- icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem,
- ICMP6_parameter_problem_first_fragment_has_incomplete_header_chain,
- 0);
+ icmp6_error_set_vnet_buffer (
+ b, ICMP6_parameter_problem,
+ ICMP6_parameter_problem_first_fragment_has_incomplete_header_chain, 0);
b->error = node->errors[IP6_ERROR_REASS_MISSING_UPPER];
-
return false;
}
return true;
}
always_inline bool
-ip6_full_reass_verify_fragment_multiple_8 (vlib_main_t * vm,
- vlib_buffer_t * b,
- ip6_frag_hdr_t * frag_hdr)
+ip6_full_reass_verify_fragment_multiple_8 (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_buffer_t *b,
+ ip6_frag_hdr_t *frag_hdr)
{
vnet_buffer_opaque_t *vnb = vnet_buffer (b);
ip6_header_t *ip = vlib_buffer_get_current (b);
@@ -1019,15 +1154,17 @@ ip6_full_reass_verify_fragment_multiple_8 (vlib_main_t * vm,
icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem,
ICMP6_parameter_problem_erroneous_header_field,
(u8 *) & ip->payload_length - (u8 *) ip);
+ b->error = node->errors[IP6_ERROR_REASS_INVALID_FRAG_SIZE];
return false;
}
return true;
}
always_inline bool
-ip6_full_reass_verify_packet_size_lt_64k (vlib_main_t * vm,
- vlib_buffer_t * b,
- ip6_frag_hdr_t * frag_hdr)
+ip6_full_reass_verify_packet_size_lt_64k (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_buffer_t *b,
+ ip6_frag_hdr_t *frag_hdr)
{
vnet_buffer_opaque_t *vnb = vnet_buffer (b);
u32 fragment_first = ip6_frag_hdr_offset_bytes (frag_hdr);
@@ -1041,16 +1178,16 @@ ip6_full_reass_verify_packet_size_lt_64k (vlib_main_t * vm,
ICMP6_parameter_problem_erroneous_header_field,
(u8 *) & frag_hdr->fragment_offset_and_more
- (u8 *) ip0);
+ b->error = node->errors[IP6_ERROR_REASS_INVALID_FRAG_SIZE];
return false;
}
return true;
}
always_inline uword
-ip6_full_reassembly_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, bool is_feature,
- bool is_custom_app)
+ip6_full_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_feature,
+ bool is_custom_app, bool is_local)
{
u32 *from = vlib_frame_vector_args (frame);
u32 n_left_from, n_left_to_next, *to_next, next_index;
@@ -1077,55 +1214,95 @@ ip6_full_reassembly_inline (vlib_main_t * vm,
ip6_header_t *ip0 = vlib_buffer_get_current (b0);
ip6_frag_hdr_t *frag_hdr = NULL;
- ip6_ext_header_t *prev_hdr;
- if (ip6_ext_hdr (ip0->protocol))
+ ip6_ext_hdr_chain_t hdr_chain;
+ vnet_buffer_opaque_t *fvnb = vnet_buffer (b0);
+
+ int res = ip6_ext_header_walk (
+ b0, ip0, IP_PROTOCOL_IPV6_FRAGMENTATION, &hdr_chain);
+ if (res < 0 ||
+ hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION)
{
- frag_hdr =
- ip6_ext_header_find (vm, b0, ip0,
- IP_PROTOCOL_IPV6_FRAGMENTATION,
- &prev_hdr);
+ vlib_node_increment_counter (vm, node->node_index,
+ IP6_ERROR_REASS_NO_FRAG_HDR, 1);
+ // this is a mangled packet - no fragmentation
+ next0 = is_custom_app ? fvnb->ip.reass.error_next_index :
+ IP6_FULL_REASSEMBLY_NEXT_DROP;
+ ip6_full_reass_add_trace (vm, node, NULL, bi0, NULL, PASSTHROUGH,
+ ~0);
+ goto skip_reass;
}
- if (!frag_hdr)
+ if (is_local && !rm->is_local_reass_enabled)
{
- // this is a regular packet - no fragmentation
- next0 = IP6_FULL_REASSEMBLY_NEXT_INPUT;
+ next0 = IP6_FULL_REASSEMBLY_NEXT_DROP;
goto skip_reass;
}
+
+ /* Keep track of received fragments */
+ vlib_node_increment_counter (vm, node->node_index,
+ IP6_ERROR_REASS_FRAGMENTS_RCVD, 1);
+ frag_hdr =
+ ip6_ext_next_header_offset (ip0, hdr_chain.eh[res].offset);
vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset =
- (u8 *) frag_hdr - (u8 *) ip0;
+ hdr_chain.eh[res].offset;
if (0 == ip6_frag_hdr_offset (frag_hdr))
{
// first fragment - verify upper-layer is present
- if (!ip6_full_reass_verify_upper_layer_present
- (node, b0, frag_hdr))
+ if (!ip6_full_reass_verify_upper_layer_present (node, b0,
+ &hdr_chain))
{
- next0 = IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
+ next0 = is_custom_app ? fvnb->ip.reass.error_next_index :
+ IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
goto skip_reass;
}
}
- if (!ip6_full_reass_verify_fragment_multiple_8 (vm, b0, frag_hdr) ||
- !ip6_full_reass_verify_packet_size_lt_64k (vm, b0, frag_hdr))
+
+ if (!ip6_full_reass_verify_fragment_multiple_8 (vm, node, b0,
+ frag_hdr) ||
+ !ip6_full_reass_verify_packet_size_lt_64k (vm, node, b0,
+ frag_hdr))
{
- next0 = IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
+ next0 = is_custom_app ? fvnb->ip.reass.error_next_index :
+ IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
goto skip_reass;
}
+
+ int skip_bihash = 0;
ip6_full_reass_kv_t kv;
u8 do_handoff = 0;
- kv.k.as_u64[0] = ip0->src_address.as_u64[0];
- kv.k.as_u64[1] = ip0->src_address.as_u64[1];
- kv.k.as_u64[2] = ip0->dst_address.as_u64[0];
- kv.k.as_u64[3] = ip0->dst_address.as_u64[1];
- kv.k.as_u64[4] =
- ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index,
- vnet_buffer (b0)->sw_if_index[VLIB_RX])) << 32 |
- (u64) frag_hdr->identification;
- kv.k.as_u64[5] = ip0->protocol;
+ if (0 == ip6_frag_hdr_offset (frag_hdr) &&
+ !ip6_frag_hdr_more (frag_hdr))
+ {
+ // this is atomic fragment and needs to be processed separately
+ skip_bihash = 1;
+ }
+ else
+ {
+ u32 fib_index =
+ (vnet_buffer (b0)->sw_if_index[VLIB_TX] == (u32) ~0) ?
+ vec_elt (ip6_main.fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]) :
+ vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ kv.k.as_u64[0] = ip0->src_address.as_u64[0];
+ kv.k.as_u64[1] = ip0->src_address.as_u64[1];
+ kv.k.as_u64[2] = ip0->dst_address.as_u64[0];
+ kv.k.as_u64[3] = ip0->dst_address.as_u64[1];
+ kv.k.as_u64[4] =
+ ((u64) fib_index) << 32 | (u64) frag_hdr->identification;
+ /* RFC 8200: The Next Header values in the Fragment headers of
+ * different fragments of the same original packet may differ.
+ * Only the value from the Offset zero fragment packet is used
+ * for reassembly.
+ *
+ * Also, IPv6 Header doesnt contain the protocol value unlike
+ * IPv4.*/
+ kv.k.as_u64[5] = 0;
+ }
- ip6_full_reass_t *reass =
- ip6_full_reass_find_or_create (vm, node, rm, rt, &kv, &icmp_bi,
- &do_handoff);
+ ip6_full_reass_t *reass = ip6_full_reass_find_or_create (
+ vm, node, rm, rt, &kv, &icmp_bi, &do_handoff, skip_bihash,
+ &n_left_to_next, &to_next);
if (reass)
{
@@ -1144,9 +1321,10 @@ ip6_full_reassembly_inline (vlib_main_t * vm,
else if (reass)
{
u32 handoff_thread_idx;
- switch (ip6_full_reass_update
- (vm, node, rm, rt, reass, &bi0, &next0, &error0,
- frag_hdr, is_custom_app, &handoff_thread_idx))
+ u32 counter = ~0;
+ switch (ip6_full_reass_update (
+ vm, node, rm, rt, reass, &bi0, &next0, &error0, frag_hdr,
+ is_custom_app, &handoff_thread_idx, skip_bihash))
{
case IP6_FULL_REASS_RC_OK:
/* nothing to do here */
@@ -1158,25 +1336,36 @@ ip6_full_reassembly_inline (vlib_main_t * vm,
handoff_thread_idx;
break;
case IP6_FULL_REASS_RC_TOO_MANY_FRAGMENTS:
- vlib_node_increment_counter (vm, node->node_index,
- IP6_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
- 1);
- ip6_full_reass_drop_all (vm, node, reass);
- ip6_full_reass_free (rm, rt, reass);
- goto next_packet;
+ counter = IP6_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG;
break;
case IP6_FULL_REASS_RC_NO_BUF:
- vlib_node_increment_counter (vm, node->node_index,
- IP6_ERROR_REASS_NO_BUF, 1);
- ip6_full_reass_drop_all (vm, node, reass);
- ip6_full_reass_free (rm, rt, reass);
- goto next_packet;
+ counter = IP6_ERROR_REASS_NO_BUF;
+ break;
+ case IP6_FULL_REASS_RC_INVALID_FRAG_LEN:
+ counter = IP6_ERROR_REASS_INVALID_FRAG_LEN;
+ break;
+ case IP6_FULL_REASS_RC_OVERLAP:
+ counter = IP6_ERROR_REASS_OVERLAPPING_FRAGMENT;
break;
case IP6_FULL_REASS_RC_INTERNAL_ERROR:
- vlib_node_increment_counter (vm, node->node_index,
- IP6_ERROR_REASS_INTERNAL_ERROR,
+ counter = IP6_ERROR_REASS_INTERNAL_ERROR;
+ /* Sanitization is needed in internal error cases only, as
+ * the incoming packet is already dropped in other cases,
+ * also adding bi0 back to the reassembly list, fixes the
+ * leaking of buffers during internal errors.
+ *
+ * Also it doesnt make sense to send these buffers custom
+ * app, these fragments are with internal errors */
+ sanitize_reass_buffers_add_missing (vm, reass, &bi0);
+ reass->error_next_index = ~0;
+ break;
+ }
+ if (~0 != counter)
+ {
+ vlib_node_increment_counter (vm, node->node_index, counter,
1);
- ip6_full_reass_drop_all (vm, node, reass);
+ ip6_full_reass_drop_all (vm, node, reass, &n_left_to_next,
+ &to_next);
ip6_full_reass_free (rm, rt, reass);
goto next_packet;
break;
@@ -1190,7 +1379,6 @@ ip6_full_reassembly_inline (vlib_main_t * vm,
}
else
{
- vnet_buffer_opaque_t *fvnb = vnet_buffer (b0);
next0 = fvnb->ip.reass.error_next_index;
}
error0 = IP6_ERROR_REASS_LIMIT_REACHED;
@@ -1223,6 +1411,15 @@ ip6_full_reassembly_inline (vlib_main_t * vm,
{
vnet_feature_next (&next0, b0);
}
+
+ /* Increment the counter to-custom-app also as this fragment is
+ * also going to application */
+ if (is_custom_app)
+ {
+ vlib_node_increment_counter (
+ vm, node->node_index, IP6_ERROR_REASS_TO_CUSTOM_APP, 1);
+ }
+
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
}
@@ -1249,26 +1446,21 @@ ip6_full_reassembly_inline (vlib_main_t * vm,
return frame->n_vectors;
}
-static char *ip6_full_reassembly_error_strings[] = {
-#define _(sym, string) string,
- foreach_ip6_error
-#undef _
-};
-
VLIB_NODE_FN (ip6_full_reass_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_full_reassembly_inline (vm, node, frame, false /* is_feature */ ,
- false /* is_custom_app */ );
+ return ip6_full_reassembly_inline (vm, node, frame, false /* is_feature */,
+ false /* is_custom_app */,
+ false /* is_local */);
}
VLIB_REGISTER_NODE (ip6_full_reass_node) = {
.name = "ip6-full-reassembly",
.vector_size = sizeof (u32),
.format_trace = format_ip6_full_reass_trace,
- .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings),
- .error_strings = ip6_full_reassembly_error_strings,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
.n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT,
.next_nodes =
{
@@ -1279,20 +1471,45 @@ VLIB_REGISTER_NODE (ip6_full_reass_node) = {
},
};
+VLIB_NODE_FN (ip6_local_full_reass_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_full_reassembly_inline (vm, node, frame, false /* is_feature */,
+ false /* is_custom_app */,
+ true /* is_local */);
+}
+
+VLIB_REGISTER_NODE (ip6_local_full_reass_node) = {
+ .name = "ip6-local-full-reassembly",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_full_reass_trace,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
+ .n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT,
+ .next_nodes =
+ {
+ [IP6_FULL_REASSEMBLY_NEXT_INPUT] = "ip6-input",
+ [IP6_FULL_REASSEMBLY_NEXT_DROP] = "ip6-drop",
+ [IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error",
+ [IP6_FULL_REASSEMBLY_NEXT_HANDOFF] = "ip6-local-full-reassembly-handoff",
+ },
+};
+
VLIB_NODE_FN (ip6_full_reass_node_feature) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_full_reassembly_inline (vm, node, frame, true /* is_feature */ ,
- false /* is_custom_app */ );
+ return ip6_full_reassembly_inline (vm, node, frame, true /* is_feature */,
+ false /* is_custom_app */,
+ false /* is_local */);
}
VLIB_REGISTER_NODE (ip6_full_reass_node_feature) = {
.name = "ip6-full-reassembly-feature",
.vector_size = sizeof (u32),
.format_trace = format_ip6_full_reass_trace,
- .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings),
- .error_strings = ip6_full_reassembly_error_strings,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
.n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT,
.next_nodes =
{
@@ -1311,6 +1528,30 @@ VNET_FEATURE_INIT (ip6_full_reassembly_feature, static) = {
.runs_after = 0,
};
+VLIB_NODE_FN (ip6_full_reass_node_custom)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_full_reassembly_inline (vm, node, frame, false /* is_feature */,
+ true /* is_custom_app */,
+ false /* is_local */);
+}
+
+VLIB_REGISTER_NODE (ip6_full_reass_node_custom) = {
+ .name = "ip6-full-reassembly-custom",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_full_reass_trace,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
+ .n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT,
+ .next_nodes =
+ {
+ [IP6_FULL_REASSEMBLY_NEXT_INPUT] = "ip6-input",
+ [IP6_FULL_REASSEMBLY_NEXT_DROP] = "ip6-drop",
+ [IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error",
+ [IP6_FULL_REASSEMBLY_NEXT_HANDOFF] = "ip6-full-reass-custom-hoff",
+ },
+};
+
#ifndef CLIB_MARCH_VARIANT
static u32
ip6_full_reass_get_nbuckets ()
@@ -1319,7 +1560,9 @@ ip6_full_reass_get_nbuckets ()
u32 nbuckets;
u8 i;
- nbuckets = (u32) (rm->max_reass_n / IP6_FULL_REASS_HT_LOAD_FACTOR);
+ /* need more mem with more workers */
+ nbuckets = (u32) (rm->max_reass_n * (vlib_num_workers () + 1) /
+ IP6_FULL_REASS_HT_LOAD_FACTOR);
for (i = 0; i < 31; i++)
if ((1 << i) >= nbuckets)
@@ -1446,9 +1689,6 @@ ip6_full_reass_init_function (vlib_main_t * vm)
clib_bihash_init_48_8 (&rm->hash, "ip6-full-reass", nbuckets,
nbuckets * 1024);
- node = vlib_get_node_by_name (vm, (u8 *) "ip6-drop");
- ASSERT (node);
- rm->ip6_drop_idx = node->index;
node = vlib_get_node_by_name (vm, (u8 *) "ip6-icmp-error");
ASSERT (node);
rm->ip6_icmp_error_idx = node->index;
@@ -1456,11 +1696,16 @@ ip6_full_reass_init_function (vlib_main_t * vm)
if ((error = vlib_call_init_function (vm, ip_main_init)))
return error;
ip6_register_protocol (IP_PROTOCOL_IPV6_FRAGMENTATION,
- ip6_full_reass_node.index);
+ ip6_local_full_reass_node.index);
+ rm->is_local_reass_enabled = 1;
rm->fq_index = vlib_frame_queue_main_init (ip6_full_reass_node.index, 0);
+ rm->fq_local_index =
+ vlib_frame_queue_main_init (ip6_local_full_reass_node.index, 0);
rm->fq_feature_index =
vlib_frame_queue_main_init (ip6_full_reass_node_feature.index, 0);
+ rm->fq_custom_index =
+ vlib_frame_queue_main_init (ip6_full_reass_node_custom.index, 0);
rm->feature_use_refcount_per_intf = NULL;
return error;
@@ -1504,26 +1749,53 @@ ip6_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
int index;
const uword nthreads = vlib_num_workers () + 1;
u32 *vec_icmp_bi = NULL;
+ u32 n_left_to_next, *to_next;
+
for (thread_index = 0; thread_index < nthreads; ++thread_index)
{
ip6_full_reass_per_thread_t *rt =
&rm->per_thread_data[thread_index];
+ u32 reass_timeout_cnt = 0;
clib_spinlock_lock (&rt->lock);
vec_reset_length (pool_indexes_to_free);
- pool_foreach_index (index, rt->pool) {
- reass = pool_elt_at_index (rt->pool, index);
- if (now > reass->last_heard + rm->timeout)
- {
- vec_add1 (pool_indexes_to_free, index);
- }
- }
+ /* Pace the number of timeouts handled per thread,to avoid barrier
+ * sync issues in real world scenarios */
+
+ u32 beg = rt->last_id;
+ /* to ensure we walk at least once per sec per context */
+ u32 end = beg + (IP6_FULL_REASS_MAX_REASSEMBLIES_DEFAULT *
+ IP6_FULL_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS /
+ MSEC_PER_SEC +
+ 1);
+ if (end > vec_len (rt->pool))
+ {
+ end = vec_len (rt->pool);
+ rt->last_id = 0;
+ }
+ else
+ {
+ rt->last_id = end;
+ }
+
+ pool_foreach_stepping_index (index, beg, end, rt->pool)
+ {
+ reass = pool_elt_at_index (rt->pool, index);
+ if (now > reass->last_heard + rm->timeout)
+ {
+ vec_add1 (pool_indexes_to_free, index);
+ }
+ }
+
int *i;
vec_foreach (i, pool_indexes_to_free)
{
ip6_full_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
u32 icmp_bi = ~0;
- ip6_full_reass_on_timeout (vm, node, reass, &icmp_bi);
+
+ reass_timeout_cnt += reass->fragments_n;
+ ip6_full_reass_on_timeout (vm, node, reass, &icmp_bi,
+ &n_left_to_next, &to_next);
if (~0 != icmp_bi)
vec_add1 (vec_icmp_bi, icmp_bi);
@@ -1531,6 +1803,10 @@ ip6_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
}
clib_spinlock_unlock (&rt->lock);
+ if (reass_timeout_cnt)
+ vlib_node_increment_counter (vm, node->node_index,
+ IP6_ERROR_REASS_TIMEOUT,
+ reass_timeout_cnt);
}
while (vec_len (vec_icmp_bi) > 0)
@@ -1546,7 +1822,6 @@ ip6_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_buffer_t *b = vlib_get_buffer (vm, bi);
if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
trace_frame = 1;
- b->error = node->errors[IP6_ERROR_REASS_TIMEOUT];
to_next[0] = bi;
++f->n_vectors;
to_next += 1;
@@ -1560,7 +1835,7 @@ ip6_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
vec_free (vec_icmp_bi);
if (event_data)
{
- _vec_len (event_data) = 0;
+ vec_set_len (event_data, 0);
}
}
@@ -1568,14 +1843,13 @@ ip6_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
}
VLIB_REGISTER_NODE (ip6_full_reass_expire_node) = {
- .function = ip6_full_reass_walk_expired,
- .format_trace = format_ip6_full_reass_trace,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "ip6-full-reassembly-expire-walk",
-
- .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings),
- .error_strings = ip6_full_reassembly_error_strings,
+ .function = ip6_full_reass_walk_expired,
+ .format_trace = format_ip6_full_reass_trace,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "ip6-full-reassembly-expire-walk",
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
};
static u8 *
@@ -1733,9 +2007,10 @@ format_ip6_full_reassembly_handoff_trace (u8 * s, va_list * args)
}
always_inline uword
-ip6_full_reassembly_handoff_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, bool is_feature)
+ip6_full_reassembly_handoff_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame,
+ ip6_full_reass_node_type_t type,
+ bool is_local)
{
ip6_full_reass_main_t *rm = &ip6_full_reass_main;
@@ -1751,8 +2026,28 @@ ip6_full_reassembly_handoff_inline (vlib_main_t * vm,
b = bufs;
ti = thread_indices;
- fq_index = (is_feature) ? rm->fq_feature_index : rm->fq_index;
-
+ switch (type)
+ {
+ case NORMAL:
+ if (is_local)
+ {
+ fq_index = rm->fq_local_index;
+ }
+ else
+ {
+ fq_index = rm->fq_index;
+ }
+ break;
+ case FEATURE:
+ fq_index = rm->fq_feature_index;
+ break;
+ case CUSTOM:
+ fq_index = rm->fq_custom_index;
+ break;
+ default:
+ clib_warning ("Unexpected `type' (%d)!", type);
+ ASSERT (0);
+ }
while (n_left_from > 0)
{
ti[0] = vnet_buffer (b[0])->ip.reass.owner_thread_index;
@@ -1784,8 +2079,8 @@ VLIB_NODE_FN (ip6_full_reassembly_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_full_reassembly_handoff_inline (vm, node, frame,
- false /* is_feature */ );
+ return ip6_full_reassembly_handoff_inline (vm, node, frame, NORMAL,
+ false /* is_local */);
}
VLIB_REGISTER_NODE (ip6_full_reassembly_handoff_node) = {
@@ -1802,14 +2097,34 @@ VLIB_REGISTER_NODE (ip6_full_reassembly_handoff_node) = {
},
};
+VLIB_NODE_FN (ip6_local_full_reassembly_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_full_reassembly_handoff_inline (vm, node, frame, NORMAL,
+ true /* is_feature */);
+}
+
+VLIB_REGISTER_NODE (ip6_local_full_reassembly_handoff_node) = {
+ .name = "ip6-local-full-reassembly-handoff",
+ .vector_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(ip6_full_reassembly_handoff_error_strings),
+ .error_strings = ip6_full_reassembly_handoff_error_strings,
+ .format_trace = format_ip6_full_reassembly_handoff_trace,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
VLIB_NODE_FN (ip6_full_reassembly_feature_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * frame)
{
- return ip6_full_reassembly_handoff_inline (vm, node, frame, true /* is_feature */ );
+ return ip6_full_reassembly_handoff_inline (vm, node, frame, FEATURE,
+ false /* is_local */);
}
-
VLIB_REGISTER_NODE (ip6_full_reassembly_feature_handoff_node) = {
.name = "ip6-full-reass-feature-hoff",
.vector_size = sizeof (u32),
@@ -1824,6 +2139,27 @@ VLIB_REGISTER_NODE (ip6_full_reassembly_feature_handoff_node) = {
},
};
+VLIB_NODE_FN (ip6_full_reassembly_custom_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_full_reassembly_handoff_inline (vm, node, frame, CUSTOM,
+ false /* is_local */);
+}
+
+VLIB_REGISTER_NODE (ip6_full_reassembly_custom_handoff_node) = {
+ .name = "ip6-full-reass-custom-hoff",
+ .vector_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(ip6_full_reassembly_handoff_error_strings),
+ .error_strings = ip6_full_reassembly_handoff_error_strings,
+ .format_trace = format_ip6_full_reassembly_handoff_trace,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
#ifndef CLIB_MARCH_VARIANT
int
ip6_full_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
@@ -1849,8 +2185,37 @@ ip6_full_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
"ip6-full-reassembly-feature",
sw_if_index, 0, 0, 0);
}
- return -1;
+ return 0;
+}
+
+void
+ip6_local_full_reass_enable_disable (int enable)
+{
+ if (enable)
+ {
+ if (!ip6_full_reass_main.is_local_reass_enabled)
+ {
+ ip6_full_reass_main.is_local_reass_enabled = 1;
+ ip6_register_protocol (IP_PROTOCOL_IPV6_FRAGMENTATION,
+ ip6_local_full_reass_node.index);
+ }
+ }
+ else
+ {
+ if (ip6_full_reass_main.is_local_reass_enabled)
+ {
+ ip6_full_reass_main.is_local_reass_enabled = 0;
+ ip6_unregister_protocol (IP_PROTOCOL_IPV6_FRAGMENTATION);
+ }
+ }
+}
+
+int
+ip6_local_full_reass_enabled ()
+{
+ return ip6_full_reass_main.is_local_reass_enabled;
}
+
#endif
/*
diff --git a/src/vnet/ip/reass/ip6_full_reass.h b/src/vnet/ip/reass/ip6_full_reass.h
index 546075b04b4..f66cb67d796 100644
--- a/src/vnet/ip/reass/ip6_full_reass.h
+++ b/src/vnet/ip/reass/ip6_full_reass.h
@@ -46,6 +46,8 @@ vnet_api_error_t ip6_full_reass_enable_disable (u32 sw_if_index,
int ip6_full_reass_enable_disable_with_refcnt (u32 sw_if_index,
int is_enable);
+void ip6_local_full_reass_enable_disable (int enable);
+int ip6_local_full_reass_enabled ();
#endif /* __included_ip6_full_reass_h */
/*
diff --git a/src/vnet/ip/reass/ip6_sv_reass.c b/src/vnet/ip/reass/ip6_sv_reass.c
index 28941311f50..fe2ed05555c 100644
--- a/src/vnet/ip/reass/ip6_sv_reass.c
+++ b/src/vnet/ip/reass/ip6_sv_reass.c
@@ -26,6 +26,7 @@
#include <vnet/ip/ip6_to_ip4.h>
#include <vppinfra/bihash_48_8.h>
#include <vnet/ip/reass/ip6_sv_reass.h>
+#include <vnet/ip/ip6_inlines.h>
#define MSEC_PER_SEC 1000
#define IP6_SV_REASS_TIMEOUT_DEFAULT_MS 100
@@ -40,6 +41,7 @@ typedef enum
IP6_SV_REASS_RC_TOO_MANY_FRAGMENTS,
IP6_SV_REASS_RC_INTERNAL_ERROR,
IP6_SV_REASS_RC_UNSUPP_IP_PROTO,
+ IP6_SV_REASS_RC_INVALID_FRAG_LEN,
} ip6_sv_reass_rc_t;
typedef struct
@@ -50,7 +52,7 @@ typedef struct
{
ip6_address_t src;
ip6_address_t dst;
- u32 xx_id;
+ u32 fib_index;
u32 frag_id;
u8 unused[7];
u8 proto;
@@ -148,6 +150,7 @@ typedef struct
/** Worker handoff */
u32 fq_index;
u32 fq_feature_index;
+ u32 fq_custom_context_index;
// reference count for enabling/disabling feature - per interface
u32 *feature_use_refcount_per_intf;
@@ -214,7 +217,7 @@ format_ip6_sv_reass_trace (u8 * s, va_list * args)
clib_net_to_host_u16 (t->l4_dst_port));
break;
case REASS_PASSTHROUGH:
- s = format (s, "[not-fragmented]");
+ s = format (s, "[not fragmented or atomic fragment]");
break;
}
return s;
@@ -309,6 +312,8 @@ ip6_sv_reass_find_or_create (vlib_main_t *vm, ip6_sv_reass_main_t *rm,
ip6_sv_reass_t *reass = NULL;
f64 now = vlib_time_now (vm);
+again:
+
if (!clib_bihash_search_48_8 (&rm->hash, &kv->kv, &kv->kv))
{
if (vm->thread_index != kv->v.thread_index)
@@ -368,10 +373,14 @@ ip6_sv_reass_find_or_create (vlib_main_t *vm, ip6_sv_reass_main_t *rm,
kv->v.thread_index = vm->thread_index;
reass->last_heard = now;
- if (clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 1))
+ int rv = clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 2);
+ if (rv)
{
ip6_sv_reass_free (vm, rm, rt, reass);
reass = NULL;
+ // if other worker created a context already work with the other copy
+ if (-2 == rv)
+ goto again;
}
return reass;
@@ -399,6 +408,10 @@ ip6_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
u32 fragment_length =
vlib_buffer_length_in_chain (vm, fb) -
(fvnb->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr));
+ if (0 == fragment_length)
+ {
+ return IP6_SV_REASS_RC_INVALID_FRAG_LEN;
+ }
u32 fragment_last = fvnb->ip.reass.fragment_last =
fragment_first + fragment_length - 1;
fvnb->ip.reass.range_first = fragment_first;
@@ -440,22 +453,18 @@ ip6_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
}
always_inline bool
-ip6_sv_reass_verify_upper_layer_present (vlib_node_runtime_t * node,
- vlib_buffer_t * b,
- ip6_frag_hdr_t * frag_hdr)
+ip6_sv_reass_verify_upper_layer_present (vlib_node_runtime_t *node,
+ vlib_buffer_t *b,
+ ip6_ext_hdr_chain_t *hc)
{
- ip6_ext_header_t *tmp = (ip6_ext_header_t *) frag_hdr;
- while (ip6_ext_hdr (tmp->next_hdr))
+ int nh = hc->eh[hc->length - 1].protocol;
+ /* Checking to see if it's a terminating header */
+ if (ip6_ext_hdr (nh))
{
- tmp = ip6_ext_next_header (tmp);
- }
- if (IP_PROTOCOL_IP6_NONXT == tmp->next_hdr)
- {
- icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem,
- ICMP6_parameter_problem_first_fragment_has_incomplete_header_chain,
- 0);
+ icmp6_error_set_vnet_buffer (
+ b, ICMP6_parameter_problem,
+ ICMP6_parameter_problem_first_fragment_has_incomplete_header_chain, 0);
b->error = node->errors[IP6_ERROR_REASS_MISSING_UPPER];
-
return false;
}
return true;
@@ -505,14 +514,18 @@ ip6_sv_reass_verify_packet_size_lt_64k (vlib_main_t * vm,
}
always_inline uword
-ip6_sv_reassembly_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, bool is_feature)
+ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_feature,
+ bool custom_next, bool custom_context)
{
u32 *from = vlib_frame_vector_args (frame);
- u32 n_left_from, n_left_to_next, *to_next, next_index;
+ u32 n_left_from, n_left_to_next, *to_next, *to_next_aux, next_index;
ip6_sv_reass_main_t *rm = &ip6_sv_reass_main;
ip6_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
+ u32 *context;
+ if (custom_context)
+ context = vlib_frame_aux_args (frame);
+
clib_spinlock_lock (&rt->lock);
n_left_from = frame->n_vectors;
@@ -520,7 +533,11 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
while (n_left_from > 0)
{
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ if (custom_context)
+ vlib_get_next_frame_with_aux_safe (vm, node, next_index, to_next,
+ to_next_aux, n_left_to_next);
+ else
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
while (n_left_from > 0 && n_left_to_next > 0)
{
@@ -528,23 +545,31 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
vlib_buffer_t *b0;
u32 next0 = IP6_SV_REASSEMBLY_NEXT_DROP;
u32 error0 = IP6_ERROR_NONE;
-
+ u8 forward_context = 0;
bi0 = from[0];
b0 = vlib_get_buffer (vm, bi0);
ip6_header_t *ip0 = vlib_buffer_get_current (b0);
- ip6_frag_hdr_t *frag_hdr = NULL;
- ip6_ext_header_t *prev_hdr;
- if (ip6_ext_hdr (ip0->protocol))
+ ip6_frag_hdr_t *frag_hdr;
+ ip6_ext_hdr_chain_t hdr_chain;
+ bool is_atomic_fragment = false;
+
+ int res = ip6_ext_header_walk (
+ b0, ip0, IP_PROTOCOL_IPV6_FRAGMENTATION, &hdr_chain);
+ if (res >= 0 &&
+ hdr_chain.eh[res].protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
{
frag_hdr =
- ip6_ext_header_find (vm, b0, ip0,
- IP_PROTOCOL_IPV6_FRAGMENTATION,
- &prev_hdr);
+ ip6_ext_next_header_offset (ip0, hdr_chain.eh[res].offset);
+ is_atomic_fragment = (0 == ip6_frag_hdr_offset (frag_hdr) &&
+ !ip6_frag_hdr_more (frag_hdr));
}
- if (!frag_hdr)
+
+ if (res < 0 ||
+ hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION ||
+ is_atomic_fragment)
{
- // this is a regular packet - no fragmentation
+ // this is a regular unfragmented packet or an atomic fragment
if (!ip6_get_port
(vm, b0, ip0, b0->current_length,
&(vnet_buffer (b0)->ip.reass.ip_proto),
@@ -560,7 +585,8 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
goto packet_enqueue;
}
vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0;
- next0 = IP6_SV_REASSEMBLY_NEXT_INPUT;
+ next0 = custom_next ? vnet_buffer (b0)->ip.reass.next_index :
+ IP6_SV_REASSEMBLY_NEXT_INPUT;
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
ip6_sv_reass_add_trace (
@@ -571,13 +597,15 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
}
goto packet_enqueue;
}
+
vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset =
- (u8 *) frag_hdr - (u8 *) ip0;
+ hdr_chain.eh[res].offset;
+
if (0 == ip6_frag_hdr_offset (frag_hdr))
{
// first fragment - verify upper-layer is present
- if (!ip6_sv_reass_verify_upper_layer_present
- (node, b0, frag_hdr))
+ if (!ip6_sv_reass_verify_upper_layer_present (node, b0,
+ &hdr_chain))
{
next0 = IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR;
goto packet_enqueue;
@@ -597,10 +625,15 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
kv.k.as_u64[1] = ip0->src_address.as_u64[1];
kv.k.as_u64[2] = ip0->dst_address.as_u64[0];
kv.k.as_u64[3] = ip0->dst_address.as_u64[1];
- kv.k.as_u64[4] =
- ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index,
- vnet_buffer (b0)->sw_if_index[VLIB_RX])) << 32 |
- (u64) frag_hdr->identification;
+ if (custom_context)
+ kv.k.as_u64[4] =
+ (u64) *context << 32 | (u64) frag_hdr->identification;
+ else
+ kv.k.as_u64[4] =
+ ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]))
+ << 32 |
+ (u64) frag_hdr->identification;
kv.k.as_u64[5] = ip0->protocol;
ip6_sv_reass_t *reass =
@@ -611,6 +644,8 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
next0 = IP6_SV_REASSEMBLY_NEXT_HANDOFF;
vnet_buffer (b0)->ip.reass.owner_thread_index =
kv.v.thread_index;
+ if (custom_context)
+ forward_context = 1;
goto packet_enqueue;
}
@@ -635,7 +670,8 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
reass->tcp_seq_number;
vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
- next0 = IP6_SV_REASSEMBLY_NEXT_INPUT;
+ next0 = custom_next ? vnet_buffer (b0)->ip.reass.next_index :
+ IP6_SV_REASSEMBLY_NEXT_INPUT;
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
ip6_sv_reass_add_trace (
@@ -645,31 +681,30 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
goto packet_enqueue;
}
+ u32 counter = ~0;
switch (ip6_sv_reass_update (vm, node, rm, reass, bi0, frag_hdr))
{
case IP6_SV_REASS_RC_OK:
/* nothing to do here */
break;
case IP6_SV_REASS_RC_TOO_MANY_FRAGMENTS:
- vlib_node_increment_counter (vm, node->node_index,
- IP6_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
- 1);
- ip6_sv_reass_free (vm, rm, rt, reass);
- goto next_packet;
+ counter = IP6_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG;
break;
case IP6_SV_REASS_RC_UNSUPP_IP_PROTO:
- vlib_node_increment_counter (vm, node->node_index,
- IP6_ERROR_REASS_UNSUPP_IP_PROTO,
- 1);
- ip6_sv_reass_free (vm, rm, rt, reass);
- goto next_packet;
+ counter = IP6_ERROR_REASS_UNSUPP_IP_PROTO;
break;
case IP6_SV_REASS_RC_INTERNAL_ERROR:
- vlib_node_increment_counter (vm, node->node_index,
- IP6_ERROR_REASS_INTERNAL_ERROR, 1);
+ counter = IP6_ERROR_REASS_INTERNAL_ERROR;
+ break;
+ case IP6_SV_REASS_RC_INVALID_FRAG_LEN:
+ counter = IP6_ERROR_REASS_INVALID_FRAG_LEN;
+ break;
+ }
+ if (~0 != counter)
+ {
+ vlib_node_increment_counter (vm, node->node_index, counter, 1);
ip6_sv_reass_free (vm, rm, rt, reass);
goto next_packet;
- break;
}
if (reass->is_complete)
@@ -717,7 +752,8 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
to_next, n_left_to_next, bi0,
next0);
}
- _vec_len (reass->cached_buffers) = 0; // buffers are owned by frame now
+ vec_set_len (reass->cached_buffers,
+ 0); // buffers are owned by frame now
}
goto next_packet;
@@ -730,11 +766,25 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
b0 = vlib_get_buffer (vm, bi0);
vnet_feature_next (&next0, b0);
}
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
+ if (custom_context && forward_context)
+ {
+ if (to_next_aux)
+ {
+ to_next_aux[0] = *context;
+ to_next_aux += 1;
+ }
+ vlib_validate_buffer_enqueue_with_aux_x1 (
+ vm, node, next_index, to_next, to_next_aux, n_left_to_next,
+ bi0, *context, next0);
+ }
+ else
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
next_packet:
from += 1;
+ if (custom_context)
+ context += 1;
n_left_from -= 1;
}
@@ -745,26 +795,21 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
return frame->n_vectors;
}
-static char *ip6_sv_reassembly_error_strings[] = {
-#define _(sym, string) string,
- foreach_ip6_error
-#undef _
-};
-
VLIB_NODE_FN (ip6_sv_reass_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */ );
+ return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */,
+ false /* custom next */,
+ false /* custom context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_sv_reass_node) = {
.name = "ip6-sv-reassembly",
.vector_size = sizeof (u32),
.format_trace = format_ip6_sv_reass_trace,
- .n_errors = ARRAY_LEN (ip6_sv_reassembly_error_strings),
- .error_strings = ip6_sv_reassembly_error_strings,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
.n_next_nodes = IP6_SV_REASSEMBLY_N_NEXT,
.next_nodes =
{
@@ -774,22 +819,22 @@ VLIB_REGISTER_NODE (ip6_sv_reass_node) = {
[IP6_SV_REASSEMBLY_NEXT_HANDOFF] = "ip6-sv-reassembly-handoff",
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip6_sv_reass_node_feature) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_sv_reassembly_inline (vm, node, frame, true /* is_feature */ );
+ return ip6_sv_reassembly_inline (vm, node, frame, true /* is_feature */,
+ false /* custom next */,
+ false /* custom context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_sv_reass_node_feature) = {
.name = "ip6-sv-reassembly-feature",
.vector_size = sizeof (u32),
.format_trace = format_ip6_sv_reass_trace,
- .n_errors = ARRAY_LEN (ip6_sv_reassembly_error_strings),
- .error_strings = ip6_sv_reassembly_error_strings,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
.n_next_nodes = IP6_SV_REASSEMBLY_N_NEXT,
.next_nodes =
{
@@ -799,16 +844,38 @@ VLIB_REGISTER_NODE (ip6_sv_reass_node_feature) = {
[IP6_SV_REASSEMBLY_NEXT_HANDOFF] = "ip6-sv-reass-feature-hoff",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip6_sv_reassembly_feature) = {
.arc_name = "ip6-unicast",
.node_name = "ip6-sv-reassembly-feature",
.runs_before = VNET_FEATURES ("ip6-lookup"),
.runs_after = 0,
};
-/* *INDENT-ON* */
+
+VLIB_NODE_FN (ip6_sv_reass_custom_context_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */,
+ true /* custom next */,
+ true /* custom context */);
+}
+
+VLIB_REGISTER_NODE (ip6_sv_reass_custom_context_node) = {
+ .name = "ip6-sv-reassembly-custom-context",
+ .vector_size = sizeof (u32),
+ .aux_size = sizeof (u32),
+ .format_trace = format_ip6_sv_reass_trace,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
+ .n_next_nodes = IP6_SV_REASSEMBLY_N_NEXT,
+ .next_nodes =
+ {
+ [IP6_SV_REASSEMBLY_NEXT_INPUT] = "ip6-input",
+ [IP6_SV_REASSEMBLY_NEXT_DROP] = "ip6-drop",
+ [IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error",
+ [IP6_SV_REASSEMBLY_NEXT_HANDOFF] = "ip6-sv-reassembly-custom-context-handoff",
+ },
+};
#ifndef CLIB_MARCH_VARIANT
static u32
@@ -959,6 +1026,8 @@ ip6_sv_reass_init_function (vlib_main_t * vm)
rm->fq_index = vlib_frame_queue_main_init (ip6_sv_reass_node.index, 0);
rm->fq_feature_index =
vlib_frame_queue_main_init (ip6_sv_reass_node_feature.index, 0);
+ rm->fq_custom_context_index =
+ vlib_frame_queue_main_init (ip6_sv_reass_custom_context_node.index, 0);
rm->feature_use_refcount_per_intf = NULL;
@@ -1009,7 +1078,6 @@ ip6_sv_reass_walk_expired (vlib_main_t *vm,
clib_spinlock_lock (&rt->lock);
vec_reset_length (pool_indexes_to_free);
- /* *INDENT-OFF* */
pool_foreach_index (index, rt->pool) {
reass = pool_elt_at_index (rt->pool, index);
if (now > reass->last_heard + rm->timeout)
@@ -1017,15 +1085,12 @@ ip6_sv_reass_walk_expired (vlib_main_t *vm,
vec_add1 (pool_indexes_to_free, index);
}
}
- /* *INDENT-ON* */
int *i;
- /* *INDENT-OFF* */
vec_foreach (i, pool_indexes_to_free)
{
ip6_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
ip6_sv_reass_free (vm, rm, rt, reass);
}
- /* *INDENT-ON* */
clib_spinlock_unlock (&rt->lock);
}
@@ -1033,33 +1098,31 @@ ip6_sv_reass_walk_expired (vlib_main_t *vm,
vec_free (pool_indexes_to_free);
if (event_data)
{
- _vec_len (event_data) = 0;
+ vec_set_len (event_data, 0);
}
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_sv_reass_expire_node) = {
- .function = ip6_sv_reass_walk_expired,
- .format_trace = format_ip6_sv_reass_trace,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "ip6-sv-reassembly-expire-walk",
-
- .n_errors = ARRAY_LEN (ip6_sv_reassembly_error_strings),
- .error_strings = ip6_sv_reassembly_error_strings,
+ .function = ip6_sv_reass_walk_expired,
+ .format_trace = format_ip6_sv_reass_trace,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "ip6-sv-reassembly-expire-walk",
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
};
-/* *INDENT-ON* */
static u8 *
format_ip6_sv_reass_key (u8 * s, va_list * args)
{
ip6_sv_reass_key_t *key = va_arg (*args, ip6_sv_reass_key_t *);
- s = format (s, "xx_id: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
- key->xx_id, format_ip6_address, &key->src, format_ip6_address,
- &key->dst, clib_net_to_host_u16 (key->frag_id), key->proto);
+ s =
+ format (s, "fib_index: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
+ key->fib_index, format_ip6_address, &key->src, format_ip6_address,
+ &key->dst, clib_net_to_host_u16 (key->frag_id), key->proto);
return s;
}
@@ -1116,11 +1179,9 @@ show_ip6_sv_reass (vlib_main_t * vm, unformat_input_t * input,
clib_spinlock_lock (&rt->lock);
if (details)
{
- /* *INDENT-OFF* */
pool_foreach (reass, rt->pool) {
vlib_cli_output (vm, "%U", format_ip6_sv_reass, vm, reass);
}
- /* *INDENT-ON* */
}
sum_reass_n += rt->reass_n;
clib_spinlock_unlock (&rt->lock);
@@ -1146,13 +1207,11 @@ show_ip6_sv_reass (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip6_sv_reassembly_cmd, static) = {
.path = "show ip6-sv-reassembly",
.short_help = "show ip6-sv-reassembly [details]",
.function = show_ip6_sv_reass,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
vnet_api_error_t
@@ -1202,25 +1261,29 @@ format_ip6_sv_reassembly_handoff_trace (u8 * s, va_list * args)
}
always_inline uword
-ip6_sv_reassembly_handoff_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, bool is_feature)
+ip6_sv_reassembly_handoff_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_feature,
+ bool custom_context)
{
ip6_sv_reass_main_t *rm = &ip6_sv_reass_main;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
- u32 n_enq, n_left_from, *from;
+ u32 n_enq, n_left_from, *from, *context;
u16 thread_indices[VLIB_FRAME_SIZE], *ti;
u32 fq_index;
from = vlib_frame_vector_args (frame);
+ if (custom_context)
+ context = vlib_frame_aux_args (frame);
n_left_from = frame->n_vectors;
vlib_get_buffers (vm, from, bufs, n_left_from);
b = bufs;
ti = thread_indices;
- fq_index = (is_feature) ? rm->fq_feature_index : rm->fq_index;
+ fq_index = (is_feature) ?
+ rm->fq_feature_index :
+ (custom_context ? rm->fq_custom_context_index : rm->fq_index);
while (n_left_from > 0)
{
@@ -1239,8 +1302,12 @@ ip6_sv_reassembly_handoff_inline (vlib_main_t * vm,
ti += 1;
b += 1;
}
- n_enq = vlib_buffer_enqueue_to_thread (vm, node, fq_index, from,
- thread_indices, frame->n_vectors, 1);
+ if (custom_context)
+ n_enq = vlib_buffer_enqueue_to_thread_with_aux (
+ vm, node, fq_index, from, context, thread_indices, frame->n_vectors, 1);
+ else
+ n_enq = vlib_buffer_enqueue_to_thread (
+ vm, node, fq_index, from, thread_indices, frame->n_vectors, 1);
if (n_enq < frame->n_vectors)
vlib_node_increment_counter (vm, node->node_index,
@@ -1253,11 +1320,10 @@ VLIB_NODE_FN (ip6_sv_reassembly_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_sv_reassembly_handoff_inline (vm, node, frame,
- false /* is_feature */ );
+ return ip6_sv_reassembly_handoff_inline (
+ vm, node, frame, false /* is_feature */, false /* custom_context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_sv_reassembly_handoff_node) = {
.name = "ip6-sv-reassembly-handoff",
.vector_size = sizeof (u32),
@@ -1276,11 +1342,11 @@ VLIB_REGISTER_NODE (ip6_sv_reassembly_handoff_node) = {
VLIB_NODE_FN (ip6_sv_reassembly_feature_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * frame)
{
- return ip6_sv_reassembly_handoff_inline (vm, node, frame, true /* is_feature */ );
+ return ip6_sv_reassembly_handoff_inline (
+ vm, node, frame, true /* is_feature */, false /* custom_context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_sv_reassembly_feature_handoff_node) = {
.name = "ip6-sv-reass-feature-hoff",
.vector_size = sizeof (u32),
@@ -1294,7 +1360,28 @@ VLIB_REGISTER_NODE (ip6_sv_reassembly_feature_handoff_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
+
+VLIB_NODE_FN (ip6_sv_reassembly_custom_context_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_sv_reassembly_handoff_inline (
+ vm, node, frame, false /* is_feature */, true /* custom_context */);
+}
+
+VLIB_REGISTER_NODE (ip6_sv_reassembly_custom_context_handoff_node) = {
+ .name = "ip6-sv-reassembly-custom-context-handoff",
+ .vector_size = sizeof (u32),
+ .aux_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(ip6_sv_reassembly_handoff_error_strings),
+ .error_strings = ip6_sv_reassembly_handoff_error_strings,
+ .format_trace = format_ip6_sv_reassembly_handoff_trace,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
#ifndef CLIB_MARCH_VARIANT
int
@@ -1323,6 +1410,14 @@ ip6_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
}
return 0;
}
+
+uword
+ip6_sv_reass_custom_context_register_next_node (uword node_index)
+{
+ return vlib_node_add_next (
+ vlib_get_main (), ip6_sv_reassembly_custom_context_handoff_node.index,
+ node_index);
+}
#endif
/*
diff --git a/src/vnet/ip/reass/ip6_sv_reass.h b/src/vnet/ip/reass/ip6_sv_reass.h
index 81ac2312bdf..7dc9df132dd 100644
--- a/src/vnet/ip/reass/ip6_sv_reass.h
+++ b/src/vnet/ip/reass/ip6_sv_reass.h
@@ -44,6 +44,7 @@ vnet_api_error_t ip6_sv_reass_enable_disable (u32 sw_if_index,
u8 enable_disable);
int ip6_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable);
+uword ip6_sv_reass_custom_context_register_next_node (uword node_index);
#endif /* __included_ip6_sv_reass_h */
diff --git a/src/vnet/ip/reass/reassembly.rst b/src/vnet/ip/reass/reassembly.rst
new file mode 100644
index 00000000000..49e0a8de6e6
--- /dev/null
+++ b/src/vnet/ip/reass/reassembly.rst
@@ -0,0 +1,221 @@
+.. _reassembly:
+
+IP Reassembly
+=============
+
+Some VPP functions need access to whole packet and/or stream
+classification based on L4 headers. Reassembly functionality allows
+both former and latter.
+
+Full reassembly vs shallow (virtual) reassembly
+-----------------------------------------------
+
+There are two kinds of reassembly available in VPP:
+
+1. Full reassembly changes a stream of packet fragments into one
+packet containing all data reassembled with fragment bits cleared
+and fragment header stripped (in case of ip6). Note that resulting
+packet may come out of reassembly as a buffer chain. Because it's
+impractical to parse headers which are split over multiple vnet
+buffers, vnet_buffer_chain_linearize() is called after reassembly so
+that L2/L3/L4 headers can be found in first buffer. Full reassembly
+is costly and shouldn't be used unless necessary. Full reassembly is by
+default enabled for both ipv4 and ipv6 "for us" traffic
+- that is packets aimed at VPP addresses. This can be disabled via API
+if desired, in which case "for us" fragments are dropped.
+
+2. Shallow (virtual) reassembly allows various classifying and/or
+translating features to work with fragments without having to
+understand fragmentation. It works by extracting L4 data and adding
+them to vnet_buffer for each packet/fragment passing through SVR
+nodes. This operation is performed for both fragments and regular
+packets, allowing consuming code to treat all packets in same way. SVR
+caches incoming packet fragments (buffers) until first fragment is
+seen. Then it extracts L4 data from that first fragment, fills it for
+any cached fragments and transmits them in the same order as they were
+received. From that point on, any other passing fragments get L4 data
+populated in vnet_buffer based on reassembly context.
+
+Multi-worker behaviour
+^^^^^^^^^^^^^^^^^^^^^^
+
+Both reassembly types deal with fragments arriving on different workers
+via handoff mechanism. All reassembly contexts are stored in pools.
+Bihash mapping 5-tuple key to a value containing pool index and thread
+index is used for lookups. When a lookup finds an existing reassembly on
+a different thread, it hands off the fragment to that thread. If lookup
+fails, a new reassembly context is created and current worker becomes
+owner of that context. Further fragments received on other worker
+threads are then handed off owner worker thread.
+
+Full reassembly also remembers thread index where first fragment (as in
+fragment with fragment offset 0) was seen and uses handoff mechanism to
+send the reassembled packet out on that thread even if pool owner is
+a different thread. This then requires an additional handoff to free
+reassembly context as only pool owner can do that in a thread-safe way.
+
+Limits
+^^^^^^
+
+Because reassembly could be an attack vector, there is a configurable
+limit on the number of concurrent reassemblies and also maximum
+fragments per packet.
+
+Custom applications
+^^^^^^^^^^^^^^^^^^^
+
+Both reassembly features allow to be used by custom application which
+are not part of VPP source tree. Be it patches or 3rd party plugins,
+they can build their own graph paths by using "-custom*" versions of
+nodes. Reassembly then reads next_index and error_next_index for each
+buffer from vnet_buffer, allowing custom application to steer
+both reassembled packets and any packets which are considered an error
+in a way the custom application requires.
+
+Full reassembly
+---------------
+
+Configuration
+^^^^^^^^^^^^^
+
+Configuration is via API (``ip_reassembly_enable_disable``) or CLI:
+
+``set interface reassembly <interface-name> [on|off|ip4|ip6]``
+
+here ``on`` means both ip4 and ip6.
+
+A show command is provided to see reassembly contexts:
+
+For ip4:
+
+``show ip4-full-reassembly [details]``
+
+For ip6:
+
+``show ip6-full-reassembly [details]``
+
+Global full reassembly parameters can be modified using API
+``ip_reassembly_set`` and retrieved using ``ip_reassembly_get``.
+
+Defaults
+""""""""
+
+For defaults values, see #defines in
+
+`ip4_full_reass.c <__REPOSITORY_URL__/src/vnet/ip/reass/ip4_full_reass.c>`_
+
+========================================= ==========================================
+#define description
+----------------------------------------- ------------------------------------------
+IP4_REASS_TIMEOUT_DEFAULT_MS timeout in milliseconds
+IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS interval between reaping expired sessions
+IP4_REASS_MAX_REASSEMBLIES_DEFAULT maximum number of concurrent reassemblies
+IP4_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT maximum number of fragments per reassembly
+========================================= ==========================================
+
+and
+
+`ip6_full_reass.c <__REPOSITORY_URL__/src/vnet/ip/reass/ip6_full_reass.c>`_
+
+========================================= ==========================================
+#define description
+----------------------------------------- ------------------------------------------
+IP6_REASS_TIMEOUT_DEFAULT_MS timeout in milliseconds
+IP6_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS interval between reaping expired sessions
+IP6_REASS_MAX_REASSEMBLIES_DEFAULT maximum number of concurrent reassemblies
+IP6_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT maximum number of fragments per reassembly
+========================================= ==========================================
+
+Finished/expired contexts
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Reassembly contexts are freed either when reassembly is finished - when
+all data has been received or in case of timeout. There is a process
+walking all reassemblies, freeing any expired ones.
+
+Shallow (virtual) reassembly
+----------------------------
+
+Configuration
+^^^^^^^^^^^^^
+
+Configuration is via API (``ip_reassembly_enable_disable``) only as
+there is no value in turning SVR on by hand without a feature consuming
+buffer metadata. SVR is designed to be turned on by a feature requiring
+it in a programmatic way.
+
+A show command is provided to see reassembly contexts:
+
+For ip4:
+
+``show ip4-sv-reassembly [details]``
+
+For ip6:
+
+``show ip6-sv-reassembly [details]``
+
+Global shallow reassembly parameters can be modified using API
+``ip_reassembly_set`` and retrieved using ``ip_reassembly_get``.
+
+Defaults
+""""""""
+
+For defaults values, see #defines in
+
+`ip4_sv_reass.c <__REPOSITORY_URL__/src/vnet/ip/reass/ip4_sv_reass.c>`_
+
+============================================ ==========================================
+#define description
+-------------------------------------------- ------------------------------------------
+IP4_SV_REASS_TIMEOUT_DEFAULT_MS timeout in milliseconds
+IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS interval between reaping expired sessions
+IP4_SV_REASS_MAX_REASSEMBLIES_DEFAULT maximum number of concurrent reassemblies
+IP4_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT maximum number of fragments per reassembly
+============================================ ==========================================
+
+and
+
+`ip6_sv_reass.c <__REPOSITORY_URL__/src/vnet/ip/reass/ip6_sv_reass.c>`_
+
+============================================ ==========================================
+#define description
+-------------------------------------------- ------------------------------------------
+IP6_SV_REASS_TIMEOUT_DEFAULT_MS timeout in milliseconds
+IP6_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS interval between reaping expired sessions
+IP6_SV_REASS_MAX_REASSEMBLIES_DEFAULT maximum number of concurrent reassemblies
+IP6_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT maximum number of fragments per reassembly
+============================================ ==========================================
+
+Expiring contexts
+^^^^^^^^^^^^^^^^^
+
+There is no way of knowing when a reassembly is finished without
+performing (an almost) full reassembly, so contexts in SVR cannot be
+freed in the same way as in full reassembly. Instead a different
+approach is taken. Least recently used (LRU) list is maintained where
+reassembly contexts are ordered based on last update. The oldest
+context is then freed whenever SVR hits limit on number of concurrent
+reassembly contexts. There is also a process reaping expired sessions
+similar as in full reassembly.
+
+Truncated packets
+^^^^^^^^^^^^^^^^^
+
+When SVR detects that a packet has been truncated in a way where L4
+headers are not available, it will mark it as such in vnet_buffer,
+allowing downstream features to handle such packets as they deem fit.
+
+Fast path/slow path
+^^^^^^^^^^^^^^^^^^^
+
+SVR runs is implemented fast path/slow path way. By default, it assumes
+that any passing traffic doesn't contain fragments, processing buffers
+in a dual-loop. If it sees a fragment, it then jumps to single-loop
+processing.
+
+Feature enabled by other features/reference counting
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+SVR feature is enabled by some other features, like NAT, when those
+features are enabled. For this to work, it implements a reference
+counted API for enabling/disabling SVR.
diff --git a/src/vnet/ip/vtep.h b/src/vnet/ip/vtep.h
index 92e8002e55a..97e74429e88 100644
--- a/src/vnet/ip/vtep.h
+++ b/src/vnet/ip/vtep.h
@@ -29,7 +29,6 @@
* processing and go directly to the tunnel protocol handler node.
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED
(struct {
union {
@@ -40,7 +39,6 @@ typedef CLIB_PACKED
u64 as_u64;
};
}) vtep4_key_t;
-/* *INDENT-ON* */
/**
* @brief Tunnel endpoint key (IPv6)
@@ -51,13 +49,11 @@ typedef CLIB_PACKED
* processing and go directly to the tunnel protocol handler node.
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED
(struct {
ip6_address_t addr;
u32 fib_index;
}) vtep6_key_t;
-/* *INDENT-ON* */
typedef struct
{
@@ -111,13 +107,13 @@ vtep4_check (vtep_table_t * t, vlib_buffer_t * b0, ip4_header_t * ip40,
return VTEP_CHECK_PASS;
}
-#ifdef CLIB_HAVE_VEC512
typedef struct
{
vtep4_key_t vtep4_cache[8];
int idx;
} vtep4_cache_t;
+#ifdef CLIB_HAVE_VEC512
always_inline u8
vtep4_check_vector (vtep_table_t * t, vlib_buffer_t * b0, ip4_header_t * ip40,
vtep4_key_t * last_k4, vtep4_cache_t * vtep4_u512)
diff --git a/src/vnet/ip6-nd/ip6_mld.c b/src/vnet/ip6-nd/ip6_mld.c
index ea70bcc5d19..74428ec93c3 100644
--- a/src/vnet/ip6-nd/ip6_mld.c
+++ b/src/vnet/ip6-nd/ip6_mld.c
@@ -33,7 +33,6 @@
* adjacency tables and neighbor discovery logic.
*/
-/* *INDENT-OFF*/
/* multicast listener report packet format for ethernet. */
typedef CLIB_PACKED (struct
{
@@ -51,7 +50,6 @@ typedef CLIB_PACKED (struct
ip6_header_t ip;
icmp6_multicast_listener_report_header_t report_hdr;
}) icmp6_multicast_listener_report_packet_t;
-/* *INDENT-ON*/
typedef struct
{
@@ -224,12 +222,10 @@ ip6_mld_delegate_disable (index_t imdi)
imd = pool_elt_at_index (ip6_mld_pool, imdi);
/* clean MLD pools */
- /* *INDENT-OFF* */
pool_flush (m, imd->mldp_group_pool,
({
mhash_unset (&imd->address_to_mldp_index, &m->mcast_address, 0);
}));
- /* *INDENT-ON* */
pool_free (imd->mldp_group_pool);
@@ -326,7 +322,6 @@ ip6_neighbor_send_mldpv2_report (u32 sw_if_index)
rh0->icmp.checksum = 0;
- /* *INDENT-OFF* */
pool_foreach (m, imd->mldp_group_pool)
{
rr.type = m->type;
@@ -345,7 +340,6 @@ ip6_neighbor_send_mldpv2_report (u32 sw_if_index)
payload_length += sizeof( icmp6_multicast_address_record_t);
}
- /* *INDENT-ON* */
rh0->rsvd = 0;
rh0->num_addr_records = clib_host_to_net_u16 (num_addr_records);
@@ -388,7 +382,6 @@ ip6_mld_timer_event (vlib_main_t * vm,
ip6_mld_t *imd;
/* Interface ip6 radv info list */
- /* *INDENT-OFF* */
pool_foreach (imd, ip6_mld_pool)
{
if (!vnet_sw_interface_is_admin_up (vnm, imd->sw_if_index))
@@ -405,7 +398,6 @@ ip6_mld_timer_event (vlib_main_t * vm,
imd->all_routers_mcast = 1;
}
}
- /* *INDENT-ON* */
return 0;
}
@@ -433,13 +425,11 @@ ip6_mld_event_process (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_mld_event_process_node) = {
.function = ip6_mld_event_process,
.name = "ip6-mld-process",
.type = VLIB_NODE_TYPE_PROCESS,
};
-/* *INDENT-ON* */
static u8 *
format_ip6_mld (u8 * s, va_list * args)
@@ -453,7 +443,6 @@ format_ip6_mld (u8 * s, va_list * args)
s = format (s, "%UJoined group address(es):\n", format_white_space, indent);
- /* *INDENT-OFF* */
pool_foreach (m, imd->mldp_group_pool)
{
s = format (s, "%U%U\n",
@@ -461,7 +450,6 @@ format_ip6_mld (u8 * s, va_list * args)
format_ip6_address,
&m->mcast_address);
}
- /* *INDENT-ON* */
return (s);
}
@@ -526,12 +514,10 @@ ip6_mld_init (vlib_main_t * vm)
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip6_mld_init) =
{
.runs_after = VLIB_INITS("icmp6_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip6-nd/ip6_nd.api b/src/vnet/ip6-nd/ip6_nd.api
index 0a519c16f7f..3ddf25103c1 100644
--- a/src/vnet/ip6-nd/ip6_nd.api
+++ b/src/vnet/ip6-nd/ip6_nd.api
@@ -20,7 +20,7 @@
called through a shared memory interface.
*/
-option version = "1.0.0";
+option version = "1.1.0";
import "vnet/ip/ip_types.api";
import "vnet/interface_types.api";
@@ -106,6 +106,134 @@ autoreply define sw_interface_ip6nd_ra_prefix
u32 pref_lifetime;
};
+/** \brief IPv6 Router Advertisements prefix entry
+ @param prefix - prefix to advertise
+ @param onlink_flag - if true, the prefix can be used for on-link
+ determination
+ @param autonomous_flag - if true, the prefix can be used for stateless
+ address configuration
+ @param val_lifetime - valid lifetime in seconds (0xffffffff represents
+ infinity)
+ @param pref_lifetime - preferred lifetime in seconds (0xffffffff represents
+ infinity)
+ @param valid_lifetime_expires - number of seconds in which valid lifetime
+ expires (zero means never, negative value
+ means expired this number of seconds ago)
+ @param pref_lifetime_expires - number of seconds in which preferred
+ lifetime expires (zero means never, negative
+ value means expired this number of seconds
+ ago)
+ @param decrement_lifetime_flag - if true, decrement valid lifetime and
+ preferred lifetime
+ @param no_advertise - if true, the prefix will not be advertised
+*/
+typedef ip6nd_ra_prefix
+{
+ vl_api_prefix_t prefix;
+ bool onlink_flag;
+ bool autonomous_flag;
+ u32 val_lifetime;
+ u32 pref_lifetime;
+ f64 valid_lifetime_expires;
+ f64 pref_lifetime_expires;
+ bool decrement_lifetime_flag;
+ bool no_advertise;
+};
+
+/** \brief Dump IPv6 Router Advertisements details on a per-interface basis
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface index to use as a filter (0xffffffff
+ represents all interfaces)
+*/
+define sw_interface_ip6nd_ra_dump
+{
+ option in_progress;
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ option vat_help = "[(<if-name>|sw_if_index <if-idx>)]";
+};
+
+/** \brief Details on IPv6 Router Advertisements for a single interface
+ @param context - returned sender context, to match reply w/ request
+ @param sw_if_index - interface index the details are belong to
+ @param cur_hop_limit - current hop limit
+ @param adv_managed_flag - if true, enable DHCP for address
+ @param adv_other_flag - if true, Enable DHCP for other information
+ @param adv_router_lifetime - lifetime associated with the default router in
+ seconds (zero indicates that the router is not
+ a default router)
+ @param adv_neighbor_reachable_time - number of milliseconds within which a
+ neighbor is assumed to be reachable
+ (zero means unspecified)
+ @param adv_retransmit_interval - number of milliseconds between
+ retransmitted Neighbor Solicitation
+ messages (zero means unspecified)
+ @param adv_link_mtu - MTU that all the nodes on a link use
+ @param send_radv - if true, send periodic Router Advertisements
+ @param cease_radv - if true, cease to send periodic Router Advertisements
+ @param send_unicast - if true, destination address of a Router
+ Advertisement message will use the source address of
+ the Router Solicitation message (when available).
+ Otherwise, multicast address will be used
+ @param adv_link_layer_address - if true, add link layer address option
+ @param max_radv_interval - maximum time in seconds allowed between sending
+ unsolicited multicast Router Advertisements
+ @param min_radv_interval - minimum time in seconds allowed between sending
+ unsolicited multicast Router Advertisements
+ @param last_radv_time - number of seconds since the last time a solicited
+ Router Advertisement message was sent (zero means
+ never)
+ @param last_multicast_time - number of seconds since the last time a
+ multicast Router Advertisements message was
+ sent (zero means never)
+ @param next_multicast_time - number of seconds within which next time a
+ multicast Router Advertisement message will be
+ sent (zero means never)
+ @param initial_adverts_count - number of initial Router Advertisement
+ messages to send
+ @param initial_adverts_interval - number of seconds between initial Router
+ Advertisement messages
+ @param initial_adverts_sent - if true, all initial Router Advertisement
+ messages were sent
+ @param n_advertisements_sent - number of Router Advertisements sent
+ @param n_solicitations_rcvd - number of Router Solicitations received
+ @param n_solicitations_dropped - number of Router Solicitations dropped
+ @param n_prefixes - number of prefix entries
+ @param prefixes - array of prefix entries
+*/
+define sw_interface_ip6nd_ra_details
+{
+ option in_progress;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ u8 cur_hop_limit;
+ bool adv_managed_flag;
+ bool adv_other_flag;
+ u16 adv_router_lifetime;
+ u32 adv_neighbor_reachable_time;
+ u32 adv_retransmit_interval;
+ u32 adv_link_mtu;
+ bool send_radv;
+ bool cease_radv;
+ bool send_unicast;
+ bool adv_link_layer_address;
+ f64 max_radv_interval;
+ f64 min_radv_interval;
+ f64 last_radv_time;
+ f64 last_multicast_time;
+ f64 next_multicast_time;
+ u32 initial_adverts_count;
+ f64 initial_adverts_interval;
+ bool initial_adverts_sent;
+ u32 n_advertisements_sent;
+ u32 n_solicitations_rcvd;
+ u32 n_solicitations_dropped;
+ u32 n_prefixes;
+ vl_api_ip6nd_ra_prefix_t prefixes[n_prefixes];
+};
+
/** \brief IPv6 ND (mirror) proxy
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/vnet/ip6-nd/ip6_nd.c b/src/vnet/ip6-nd/ip6_nd.c
index 772c811ae20..763aca290e6 100644
--- a/src/vnet/ip6-nd/ip6_nd.c
+++ b/src/vnet/ip6-nd/ip6_nd.c
@@ -90,6 +90,7 @@ icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm,
icmp6_neighbor_discovery_ethernet_link_layer_address_option_t *o0;
u32 bi0, options_len0, sw_if_index0, next0, error0;
u32 ip6_sadd_link_local, ip6_sadd_unspecified;
+ ip_neighbor_counter_type_t c_type;
int is_rewrite0;
u32 ni0;
@@ -148,7 +149,6 @@ icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm,
if (PREDICT_TRUE (error0 == ICMP6_ERROR_NONE && o0 != 0 &&
!ip6_sadd_unspecified))
{
- /* *INDENT-OFF* */
ip_neighbor_learn_t learn = {
.sw_if_index = sw_if_index0,
.ip = {
@@ -158,7 +158,6 @@ icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm,
h0->target_address),
}
};
- /* *INDENT-ON* */
memcpy (&learn.mac, o0->ethernet_address, sizeof (learn.mac));
ip_neighbor_learn_dp (&learn);
}
@@ -230,16 +229,24 @@ icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm,
}
if (is_solicitation)
- next0 = (error0 != ICMP6_ERROR_NONE
- ? ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP
- : ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY);
+ {
+ next0 = (error0 != ICMP6_ERROR_NONE ?
+ ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP :
+ ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY);
+ c_type = IP_NEIGHBOR_CTR_REQUEST;
+ }
else
{
next0 = 0;
error0 = error0 == ICMP6_ERROR_NONE ?
ICMP6_ERROR_NEIGHBOR_ADVERTISEMENTS_RX : error0;
+ c_type = IP_NEIGHBOR_CTR_REPLY;
}
+ vlib_increment_simple_counter (
+ &ip_neighbor_counters[AF_IP6].ipnc[VLIB_RX][c_type],
+ vm->thread_index, sw_if_index0, 1);
+
if (is_solicitation && error0 == ICMP6_ERROR_NONE)
{
icmp6_send_neighbor_advertisement (vm, p0, ip0, h0, o0,
@@ -334,7 +341,6 @@ icmp6_neighbor_advertisement (vlib_main_t * vm,
0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_icmp_neighbor_solicitation_node,static) =
{
.function = icmp6_neighbor_solicitation,
@@ -365,7 +371,6 @@ VLIB_REGISTER_NODE (ip6_icmp_neighbor_advertisement_node,static) =
[0] = "ip6-punt",
},
};
-/* *INDENT-ON* */
static u8 *
format_ip6_nd (u8 * s, va_list * args)
@@ -418,12 +423,10 @@ ip6_nd_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip6_nd_init) =
{
.runs_after = VLIB_INITS("icmp6_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip6-nd/ip6_nd_api.c b/src/vnet/ip6-nd/ip6_nd_api.c
index 6520a61f691..5555d8fea64 100644
--- a/src/vnet/ip6-nd/ip6_nd_api.c
+++ b/src/vnet/ip6-nd/ip6_nd_api.c
@@ -95,13 +95,11 @@ vl_api_ip6nd_proxy_dump_t_handler (vl_api_ip6nd_proxy_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach_index (fib_index, im6->fibs)
{
fib_table_walk (fib_index, FIB_PROTOCOL_IP6,
api_ip6nd_proxy_fib_table_walk, &ctx);
}
- /* *INDENT-ON* */
vec_sort_with_function (ctx.indices, fib_entry_cmp_for_sort);
@@ -222,6 +220,175 @@ static void
}
static void
+ip6_radv_prefix_encode (f64 now, const ip6_radv_prefix_t *in,
+ vl_api_ip6nd_ra_prefix_t *out)
+{
+ fib_prefix_t in_ip6_pfx = {
+ .fp_addr = {
+ .ip6 = in->prefix,
+ },
+ .fp_len = in->prefix_len,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ };
+
+ ip_prefix_encode (&in_ip6_pfx, &out->prefix);
+
+ out->onlink_flag = in->adv_on_link_flag;
+ out->autonomous_flag = in->adv_autonomous_flag;
+ out->val_lifetime = htonl (in->adv_valid_lifetime_in_secs);
+ out->pref_lifetime = htonl (in->adv_pref_lifetime_in_secs);
+
+ if (in->adv_valid_lifetime_in_secs != ~0)
+ {
+ out->valid_lifetime_expires =
+ clib_host_to_net_f64 (in->valid_lifetime_expires - now);
+ }
+
+ if (in->adv_pref_lifetime_in_secs != ~0)
+ {
+ out->pref_lifetime_expires =
+ clib_host_to_net_f64 (in->pref_lifetime_expires - now);
+ }
+
+ out->decrement_lifetime_flag = in->decrement_lifetime_flag;
+ out->no_advertise = (in->enabled == 0);
+}
+
+static void
+send_sw_interface_ip6nd_ra_details (vl_api_registration_t *reg, u32 context,
+ ip6_ra_t *radv_info)
+{
+ vl_api_sw_interface_ip6nd_ra_details_t *rmp = 0;
+ vl_api_ip6nd_ra_prefix_t *api_radv_pfx;
+ u32 n_prefixes = pool_elts (radv_info->adv_prefixes_pool);
+ ip6_radv_prefix_t *radv_pfx;
+ u32 msg_size = sizeof (*rmp) + n_prefixes * sizeof (*api_radv_pfx);
+ vlib_main_t *vm = vlib_get_main ();
+ f64 now = vlib_time_now (vm);
+
+ rmp = vl_msg_api_alloc (msg_size);
+ if (!rmp)
+ return;
+ clib_memset (rmp, 0, msg_size);
+ rmp->_vl_msg_id =
+ ntohs (VL_API_SW_INTERFACE_IP6ND_RA_DETAILS + REPLY_MSG_ID_BASE);
+ rmp->context = context;
+
+ rmp->sw_if_index = htonl (radv_info->sw_if_index);
+ rmp->cur_hop_limit = radv_info->curr_hop_limit;
+ rmp->adv_managed_flag = radv_info->adv_managed_flag;
+ rmp->adv_other_flag = radv_info->adv_other_flag;
+ rmp->adv_router_lifetime = htons (radv_info->adv_router_lifetime_in_sec);
+ rmp->adv_neighbor_reachable_time =
+ htonl (radv_info->adv_neighbor_reachable_time_in_msec);
+ rmp->adv_retransmit_interval = htonl (
+ radv_info->adv_time_in_msec_between_retransmitted_neighbor_solicitations);
+ rmp->adv_link_mtu = htonl (radv_info->adv_link_mtu);
+ rmp->send_radv = radv_info->send_radv;
+ rmp->cease_radv = radv_info->cease_radv;
+ rmp->send_unicast = radv_info->send_unicast;
+ rmp->adv_link_layer_address = radv_info->adv_link_layer_address;
+ rmp->max_radv_interval = clib_host_to_net_f64 (radv_info->max_radv_interval);
+ rmp->min_radv_interval = clib_host_to_net_f64 (radv_info->min_radv_interval);
+
+ if (radv_info->last_radv_time > 0.0)
+ {
+ rmp->last_radv_time =
+ clib_host_to_net_f64 (now - radv_info->last_radv_time);
+ }
+
+ if ((radv_info->next_multicast_time - radv_info->last_multicast_time) > 0.0)
+ {
+ rmp->last_multicast_time =
+ clib_host_to_net_f64 (now - radv_info->last_multicast_time);
+ rmp->next_multicast_time =
+ clib_host_to_net_f64 (radv_info->next_multicast_time - now);
+ }
+
+ rmp->initial_adverts_count = htonl (radv_info->initial_adverts_count);
+ rmp->initial_adverts_interval =
+ clib_host_to_net_f64 (radv_info->initial_adverts_interval);
+ rmp->initial_adverts_sent = (radv_info->initial_adverts_sent == 0);
+ rmp->n_advertisements_sent = htonl (radv_info->n_advertisements_sent);
+ rmp->n_solicitations_rcvd = htonl (radv_info->n_solicitations_rcvd);
+ rmp->n_solicitations_dropped = htonl (radv_info->n_solicitations_dropped);
+ rmp->n_prefixes = htonl (n_prefixes);
+
+ api_radv_pfx = rmp->prefixes;
+ pool_foreach (radv_pfx, radv_info->adv_prefixes_pool)
+ {
+ ip6_radv_prefix_encode (now, radv_pfx, api_radv_pfx);
+
+ api_radv_pfx++;
+ }
+
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+typedef struct
+{
+ u32 *sw_if_indices;
+} api_dump_ip6_ra_itf_walk_ctx_t;
+
+static walk_rc_t
+api_dump_ip6_ra_itf_walk_fn (u32 sw_if_index, void *arg)
+{
+ api_dump_ip6_ra_itf_walk_ctx_t *ctx = arg;
+
+ vec_add1 (ctx->sw_if_indices, sw_if_index);
+
+ return (WALK_CONTINUE);
+}
+
+static void
+vl_api_sw_interface_ip6nd_ra_dump_t_handler (
+ vl_api_sw_interface_ip6nd_ra_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+ u32 sw_if_index;
+ ip6_ra_t *radv_info;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ if (sw_if_index == INDEX_INVALID)
+ {
+ /* dump all interfaces */
+
+ api_dump_ip6_ra_itf_walk_ctx_t ctx = {
+ .sw_if_indices = NULL,
+ };
+ u32 *sw_if_i;
+
+ ip6_ra_itf_walk (api_dump_ip6_ra_itf_walk_fn, &ctx);
+
+ vec_foreach (sw_if_i, ctx.sw_if_indices)
+ {
+ radv_info = ip6_ra_get_itf (*sw_if_i);
+ if (radv_info != NULL)
+ {
+ send_sw_interface_ip6nd_ra_details (reg, mp->context, radv_info);
+ }
+ }
+
+ vec_free (ctx.sw_if_indices);
+ }
+ else
+ {
+ /* dump a single interface */
+
+ radv_info = ip6_ra_get_itf (sw_if_index);
+ if (radv_info != NULL)
+ {
+ send_sw_interface_ip6nd_ra_details (reg, mp->context, radv_info);
+ }
+ }
+}
+
+static void
vl_api_ip6nd_send_router_solicitation_t_handler
(vl_api_ip6nd_send_router_solicitation_t * mp)
{
@@ -250,7 +417,6 @@ static void
static void
ip6_ra_handle_report (const ip6_ra_report_t * rap)
{
- /* *INDENT-OFF* */
vpe_client_registration_t *rp;
pool_foreach (rp, vpe_api_main.ip6_ra_events_registrations)
@@ -304,7 +470,6 @@ ip6_ra_handle_report (const ip6_ra_report_t * rap)
vl_api_send_msg (vl_reg, (u8 *) event);
}
}
- /* *INDENT-ON* */
}
static void
diff --git a/src/vnet/ip6-nd/ip6_nd_inline.h b/src/vnet/ip6-nd/ip6_nd_inline.h
index ad0c3a3a79b..c959c94ed1d 100644
--- a/src/vnet/ip6-nd/ip6_nd_inline.h
+++ b/src/vnet/ip6-nd/ip6_nd_inline.h
@@ -22,6 +22,8 @@
#include <vnet/ethernet/ethernet.h>
#include <vnet/ip/icmp46_packet.h>
#include <vnet/ip/ip6.h>
+#include <vnet/ip-neighbor/ip_neighbor_types.h>
+#include <vnet/ip6-nd/ip6_ra.h>
typedef enum
{
@@ -70,6 +72,13 @@ icmp6_send_neighbor_advertisement (
clib_host_to_net_u32 (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED |
ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE);
+ /* if sending RAs is enabled, the "router" flag should be set,
+ * otherwise, neighbors may believe we have changed from a router
+ * to a host - RFC 4861 section 4.4 */
+ if (ip6_ra_adv_enabled (sw_if_index0))
+ icmp6_nsa->advertisement_flags |=
+ clib_host_to_net_u32 (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_ROUTER);
+
icmp6_nsa->icmp.checksum = 0;
icmp6_nsa->icmp.checksum =
ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6_h, &bogus_length);
@@ -88,6 +97,10 @@ icmp6_send_neighbor_advertisement (
vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index0;
vnet_buffer (b)->sw_if_index[VLIB_RX] =
vnet_main.local_interface_sw_if_index;
+
+ vlib_increment_simple_counter (
+ &ip_neighbor_counters[AF_IP6].ipnc[VLIB_TX][IP_NEIGHBOR_CTR_REPLY],
+ vm->thread_index, sw_if_index0, 1);
}
#endif /* included_ip6_nd_inline_h */
diff --git a/src/vnet/ip6-nd/ip6_nd_proxy.c b/src/vnet/ip6-nd/ip6_nd_proxy.c
index 256b48581bb..f7f07cb59f6 100644
--- a/src/vnet/ip6-nd/ip6_nd_proxy.c
+++ b/src/vnet/ip6-nd/ip6_nd_proxy.c
@@ -23,7 +23,6 @@
static int
ip6_nd_proxy_add_del (u32 sw_if_index, const ip6_address_t * addr, u8 is_del)
{
- /* *INDENT-OFF* */
u32 fib_index;
fib_prefix_t pfx = {
.fp_len = 128,
@@ -35,7 +34,6 @@ ip6_nd_proxy_add_del (u32 sw_if_index, const ip6_address_t * addr, u8 is_del)
ip46_address_t nh = {
.ip6 = *addr,
};
- /* *INDENT-ON* */
fib_index = ip6_fib_table_get_index_for_sw_if_index (sw_if_index);
@@ -117,14 +115,12 @@ set_ip6_nd_proxy_cmd (vlib_main_t * vm,
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ip6_nd_proxy_command, static) =
{
.path = "set ip6 nd proxy",
.short_help = "set ip6 nd proxy <interface> [del] <host-ip>",
.function = set_ip6_nd_proxy_cmd,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip6-nd/ip6_nd_test.c b/src/vnet/ip6-nd/ip6_nd_test.c
index 933029d7593..488ca591ba0 100644
--- a/src/vnet/ip6-nd/ip6_nd_test.c
+++ b/src/vnet/ip6-nd/ip6_nd_test.c
@@ -325,6 +325,63 @@ api_ip6nd_proxy_enable_disable (vat_main_t *vam)
return -1;
}
+static int
+api_sw_interface_ip6nd_ra_dump (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_ip6nd_ra_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ u32 sw_if_index = ~0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %u", &sw_if_index))
+ ;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ /* Construct the API message */
+ M (SW_INTERFACE_IP6ND_RA_DUMP, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+
+ /* Send it */
+ S (mp);
+
+ /* Use control ping for synchronization */
+ PING (&ip6_nd_test_main, mp_ping);
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+
+ return ret;
+}
+
+static void
+vl_api_sw_interface_ip6nd_ra_details_t_handler (
+ vl_api_sw_interface_ip6nd_ra_details_t *mp)
+{
+ vat_main_t *vam = ip6_nd_test_main.vat_main;
+ u32 sw_if_index;
+ u8 send_radv;
+
+ /* Read the message */
+ sw_if_index = ntohl (mp->sw_if_index);
+ send_radv = mp->send_radv;
+
+ /* Print it */
+ print (vam->ofp, "sw_if_index: %u, send_radv: %s", sw_if_index,
+ (send_radv ? "on" : "off"));
+}
+
#include <ip6-nd/ip6_nd.api_test.c>
/*
diff --git a/src/vnet/ip6-nd/ip6_ra.c b/src/vnet/ip6-nd/ip6_ra.c
index d3597706293..ffc02e813e2 100644
--- a/src/vnet/ip6-nd/ip6_ra.c
+++ b/src/vnet/ip6-nd/ip6_ra.c
@@ -30,7 +30,6 @@
* The files contains the API and CLI code for managing IPv6 RAs
*/
-/* *INDENT-OFF* */
/* Router solicitation packet format for ethernet. */
typedef CLIB_PACKED (struct
{
@@ -51,7 +50,6 @@ typedef CLIB_PACKED (struct
icmp6_neighbor_discovery_prefix_information_option_t
prefix[0];
}) icmp6_router_advertisement_packet_t;
-/* *INDENT-ON* */
#define DEF_MAX_RADV_INTERVAL 200
#define DEF_MIN_RADV_INTERVAL .75 * DEF_MAX_RADV_INTERVAL
@@ -65,95 +63,6 @@ typedef CLIB_PACKED (struct
#define MAX_DELAY_BETWEEN_RAS 1800 /* seconds */
#define MAX_RA_DELAY_TIME .5 /* seconds */
-/* advertised prefix option */
-typedef struct
-{
- /* basic advertised information */
- ip6_address_t prefix;
- u8 prefix_len;
- int adv_on_link_flag;
- int adv_autonomous_flag;
- u32 adv_valid_lifetime_in_secs;
- u32 adv_pref_lifetime_in_secs;
-
- /* advertised values are computed from these times if decrementing */
- f64 valid_lifetime_expires;
- f64 pref_lifetime_expires;
-
- /* local information */
- int enabled;
- int deprecated_prefix_flag;
- int decrement_lifetime_flag;
-
-#define MIN_ADV_VALID_LIFETIME 7203 /* seconds */
-#define DEF_ADV_VALID_LIFETIME 2592000
-#define DEF_ADV_PREF_LIFETIME 604800
-
- /* extensions are added here, mobile, DNS etc.. */
-} ip6_radv_prefix_t;
-
-typedef struct ip6_ra_t_
-{
- /* advertised config information, zero means unspecified */
- u8 curr_hop_limit;
- int adv_managed_flag;
- int adv_other_flag;
- u16 adv_router_lifetime_in_sec;
- u32 adv_neighbor_reachable_time_in_msec;
- u32 adv_time_in_msec_between_retransmitted_neighbor_solicitations;
-
- /* mtu option */
- u32 adv_link_mtu;
-
- /* local information */
- u32 sw_if_index;
- int send_radv; /* radv on/off on this interface - set by config */
- int cease_radv; /* we are ceasing to send - set byf config */
- int send_unicast;
- int adv_link_layer_address;
- int prefix_option;
- int failed_device_check;
- int ref_count;
-
- /* prefix option */
- ip6_radv_prefix_t *adv_prefixes_pool;
-
- /* Hash table mapping address to index in interface advertised prefix pool. */
- mhash_t address_to_prefix_index;
-
- f64 max_radv_interval;
- f64 min_radv_interval;
- f64 min_delay_between_radv;
- f64 max_delay_between_radv;
- f64 max_rtr_default_lifetime;
-
- f64 last_radv_time;
- f64 last_multicast_time;
- f64 next_multicast_time;
-
-
- u32 initial_adverts_count;
- f64 initial_adverts_interval;
- u32 initial_adverts_sent;
-
- /* stats */
- u32 n_advertisements_sent;
- u32 n_solicitations_rcvd;
- u32 n_solicitations_dropped;
-
- /* router solicitations sending state */
- u8 keep_sending_rs; /* when true then next fields are valid */
- icmp6_send_router_solicitation_params_t params;
- f64 sleep_interval;
- f64 due_time;
- u32 n_left;
- f64 start_time;
- vlib_buffer_t *buffer;
-
- u32 seed;
-
-} ip6_ra_t;
-
static ip6_link_delegate_id_t ip6_ra_delegate_id;
static ip6_ra_t *ip6_ra_pool;
@@ -191,7 +100,7 @@ ip6_ra_report_unregister (ip6_ra_report_notify_t fn)
}
}
-static inline ip6_ra_t *
+ip6_ra_t *
ip6_ra_get_itf (u32 sw_if_index)
{
index_t rai;
@@ -204,6 +113,28 @@ ip6_ra_get_itf (u32 sw_if_index)
return (NULL);
}
+u8
+ip6_ra_adv_enabled (u32 sw_if_index)
+{
+ ip6_ra_t *ra;
+
+ ra = ip6_ra_get_itf (sw_if_index);
+
+ return ((ra != NULL) && (ra->send_radv != 0));
+}
+
+void
+ip6_ra_itf_walk (ip6_ra_itf_walk_fn_t fn, void *ctx)
+{
+ ip6_ra_t *radv_info;
+
+ pool_foreach (radv_info, ip6_ra_pool)
+ {
+ if (WALK_STOP == fn (radv_info->sw_if_index, ctx))
+ break;
+ }
+}
+
/* for "syslogging" - use elog for now */
#define foreach_log_level \
_ (DEBUG, "DEBUG") \
@@ -372,7 +303,6 @@ icmp6_router_solicitation (vlib_main_t * vm,
if (PREDICT_TRUE (error0 == ICMP6_ERROR_NONE && o0 != 0 &&
!is_unspecified && !is_link_local))
{
- /* *INDENT-OFF* */
ip_neighbor_learn_t learn = {
.sw_if_index = sw_if_index0,
.ip = {
@@ -380,7 +310,6 @@ icmp6_router_solicitation (vlib_main_t * vm,
.version = AF_IP6,
},
};
- /* *INDENT-ON* */
memcpy (&learn.mac, o0->ethernet_address, sizeof (learn.mac));
ip_neighbor_learn_dp (&learn);
}
@@ -413,12 +342,9 @@ icmp6_router_solicitation (vlib_main_t * vm,
radv_info = ip6_ra_get_itf (sw_if_index0);
- error0 = ((!radv_info) ?
- ICMP6_ERROR_ROUTER_SOLICITATION_RADV_NOT_CONFIG :
- error0);
- error0 = radv_info->send_radv == 0 ?
- ICMP6_ERROR_ROUTER_SOLICITATION_RADV_NOT_CONFIG :
- error0;
+ error0 = ((!radv_info || 0 == radv_info->send_radv) ?
+ ICMP6_ERROR_ROUTER_SOLICITATION_RADV_NOT_CONFIG :
+ error0);
if (error0 == ICMP6_ERROR_NONE)
{
f64 now = vlib_time_now (vm);
@@ -530,7 +456,6 @@ icmp6_router_solicitation (vlib_main_t * vm,
/* add advertised prefix options */
ip6_radv_prefix_t *pr_info;
- /* *INDENT-OFF* */
pool_foreach (pr_info, radv_info->adv_prefixes_pool)
{
if(pr_info->enabled &&
@@ -596,7 +521,6 @@ icmp6_router_solicitation (vlib_main_t * vm,
}
}
- /* *INDENT-ON* */
/* add additional options before here */
@@ -640,6 +564,8 @@ icmp6_router_solicitation (vlib_main_t * vm,
/* Reuse current MAC header, copy SMAC to DMAC and
* interface MAC to SMAC */
vlib_buffer_reset (p0);
+ vlib_buffer_advance (
+ p0, vnet_buffer (p0)->l2_hdr_offset);
eth0 = vlib_buffer_get_current (p0);
clib_memcpy (eth0->dst_address, eth0->src_address,
6);
@@ -702,7 +628,6 @@ icmp6_router_solicitation (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_icmp_router_solicitation_node,static) =
{
.function = icmp6_router_solicitation,
@@ -719,7 +644,6 @@ VLIB_REGISTER_NODE (ip6_icmp_router_solicitation_node,static) =
[ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX] = "interface-output",
},
};
-/* *INDENT-ON* */
/* validate advertised info for consistancy (see RFC-4861 section 6.2.7) - log any inconsistencies, packet will always be dropped */
static_always_inline uword
@@ -1012,7 +936,6 @@ icmp6_router_advertisement (vlib_main_t * vm,
prefix->prefix.fp_proto = FIB_PROTOCOL_IP6;
/* look for matching prefix - if we our advertising it, it better be consistant */
- /* *INDENT-OFF* */
pool_foreach (pr_info, radv_info->adv_prefixes_pool)
{
@@ -1043,7 +966,6 @@ icmp6_router_advertisement (vlib_main_t * vm,
}
break;
}
- /* *INDENT-ON* */
break;
}
default:
@@ -1077,7 +999,6 @@ icmp6_router_advertisement (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_icmp_router_advertisement_node,static) =
{
.function = icmp6_router_advertisement,
@@ -1092,7 +1013,6 @@ VLIB_REGISTER_NODE (ip6_icmp_router_advertisement_node,static) =
[0] = "ip6-drop",
},
};
-/* *INDENT-ON* */
static inline f64
random_f64_from_to (f64 from, f64 to)
@@ -1282,14 +1202,12 @@ send_rs_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
do
{
due_time = current_time + 1e9;
- /* *INDENT-OFF* */
pool_foreach (radv_info, ip6_ra_pool)
{
if (check_send_rs (vm, radv_info, current_time, &dt)
&& (dt < due_time))
due_time = dt;
}
- /* *INDENT-ON* */
current_time = vlib_time_now (vm);
}
while (due_time < current_time);
@@ -1300,13 +1218,11 @@ send_rs_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_rs_process_node) = {
.function = send_rs_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "ip6-rs-process",
};
-/* *INDENT-ON* */
void
icmp6_send_router_solicitation (vlib_main_t * vm, u32 sw_if_index, u8 stop,
@@ -1394,9 +1310,6 @@ ip6_ra_link_enable (u32 sw_if_index)
radv_info->initial_adverts_sent = radv_info->initial_adverts_count - 1;
radv_info->initial_adverts_interval = MAX_INITIAL_RTR_ADVERT_INTERVAL;
- /* deafult is to send */
- radv_info->send_radv = 1;
-
/* fill in delegate for this interface that will be needed later */
radv_info->adv_link_mtu =
vnet_sw_interface_get_mtu (vnet_get_main (), sw_if_index, VNET_MTU_IP6);
@@ -1417,12 +1330,10 @@ ip6_ra_delegate_disable (index_t rai)
radv_info = pool_elt_at_index (ip6_ra_pool, rai);
/* clean up prefix and MDP pools */
- /* *INDENT-OFF* */
pool_flush(p, radv_info->adv_prefixes_pool,
({
mhash_unset (&radv_info->address_to_prefix_index, &p->prefix, 0);
}));
- /* *INDENT-ON* */
pool_free (radv_info->adv_prefixes_pool);
@@ -1444,12 +1355,10 @@ ip6_ra_update_secondary_radv_info (ip6_address_t * address, u8 prefix_len,
ip6_address_mask_from_width (&mask, prefix_len);
vec_reset_length (radv_indices);
- /* *INDENT-OFF* */
pool_foreach (radv_info, ip6_ra_pool)
{
vec_add1 (radv_indices, radv_info - ip6_ra_pool);
}
- /* *INDENT-ON* */
/*
* If we have another customer for this prefix,
@@ -1464,7 +1373,6 @@ ip6_ra_update_secondary_radv_info (ip6_address_t * address, u8 prefix_len,
if (radv_info->sw_if_index == primary_sw_if_index)
continue;
- /* *INDENT-OFF* */
pool_foreach (this_prefix, radv_info->adv_prefixes_pool)
{
if (this_prefix->prefix_len == prefix_len
@@ -1487,7 +1395,6 @@ ip6_ra_update_secondary_radv_info (ip6_address_t * address, u8 prefix_len,
clib_warning ("ip6_neighbor_ra_prefix returned %d", rv);
}
}
- /* *INDENT-ON*/
}
}
@@ -1508,7 +1415,6 @@ ip6_ra_process_timer_event (vlib_main_t * vm,
f64 now = vlib_time_now (vm);
/* Interface ip6 radv info list */
- /* *INDENT-OFF* */
pool_foreach (radv_info, ip6_ra_pool)
{
if( !vnet_sw_interface_is_admin_up (vnm, radv_info->sw_if_index))
@@ -1598,7 +1504,6 @@ ip6_ra_process_timer_event (vlib_main_t * vm,
}
}
}
- /* *INDENT-ON* */
if (f)
{
@@ -1655,14 +1560,12 @@ ip6_ra_event_process (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_ra_process_node) =
{
.function = ip6_ra_event_process,
.name = "ip6-ra-process",
.type = VLIB_NODE_TYPE_PROCESS,
};
-/* *INDENT-ON* */
static void
ip6_ra_signal_report (ip6_ra_report_t * r)
@@ -1704,6 +1607,9 @@ ip6_ra_config (vlib_main_t * vm, u32 sw_if_index,
if (!radv_info)
return (VNET_API_ERROR_IP6_NOT_ENABLED);
+ /* Start off believing that we're going to send radv's */
+ radv_info->send_radv = 1;
+
if ((max_interval != 0) && (min_interval == 0))
min_interval = .75 * max_interval;
@@ -2029,8 +1935,7 @@ ip6_ra_cmd (vlib_main_t * vm,
}
else
{
- error = unformat_parse_error (line_input);
- goto done;
+ break;
}
}
@@ -2122,14 +2027,12 @@ format_ip6_ra (u8 * s, va_list * args)
indent += 2;
- /* *INDENT-OFF* */
pool_foreach (p, radv_info->adv_prefixes_pool)
{
s = format (s, "%Uprefix %U, length %d\n",
format_white_space, indent+2,
format_ip6_address, &p->prefix, p->prefix_len);
}
- /* *INDENT-ON* */
s = format (s, "%UMTU is %d\n",
format_white_space, indent, radv_info->adv_link_mtu);
@@ -2305,14 +2208,12 @@ format_ip6_ra (u8 * s, va_list * args)
* Example of how to delete a prefix:
* @cliexcmd{ip6 nd GigabitEthernet2/0/0 no prefix fe80::fe:28ff:fe9c:75b3/64}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_nd_command, static) =
{
.path = "ip6 nd",
.short_help = "ip6 nd <interface> ...",
.function = ip6_ra_cmd,
};
-/* *INDENT-ON* */
/**
* VFT for registering as a delegate to an IP6 link
@@ -2338,12 +2239,10 @@ ip6_ra_init (vlib_main_t * vm)
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip6_ra_init) =
{
.runs_after = VLIB_INITS("icmp6_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip6-nd/ip6_ra.h b/src/vnet/ip6-nd/ip6_ra.h
index d09e8c0c975..958845b0a55 100644
--- a/src/vnet/ip6-nd/ip6_ra.h
+++ b/src/vnet/ip6-nd/ip6_ra.h
@@ -21,6 +21,105 @@
#include <vnet/fib/fib_types.h>
+/* advertised prefix option */
+typedef struct
+{
+ /* basic advertised information */
+ ip6_address_t prefix;
+ u8 prefix_len;
+ int adv_on_link_flag;
+ int adv_autonomous_flag;
+ u32 adv_valid_lifetime_in_secs;
+ u32 adv_pref_lifetime_in_secs;
+
+ /* advertised values are computed from these times if decrementing */
+ f64 valid_lifetime_expires;
+ f64 pref_lifetime_expires;
+
+ /* local information */
+ int enabled;
+ int deprecated_prefix_flag;
+ int decrement_lifetime_flag;
+
+#define MIN_ADV_VALID_LIFETIME 7203 /* seconds */
+#define DEF_ADV_VALID_LIFETIME 2592000
+#define DEF_ADV_PREF_LIFETIME 604800
+
+ /* extensions are added here, mobile, DNS etc.. */
+} ip6_radv_prefix_t;
+
+typedef struct
+{
+ u32 irt;
+ u32 mrt;
+ u32 mrc;
+ u32 mrd;
+} icmp6_send_router_solicitation_params_t;
+
+typedef struct ip6_ra_t_
+{
+ /* advertised config information, zero means unspecified */
+ u8 curr_hop_limit;
+ int adv_managed_flag;
+ int adv_other_flag;
+ u16 adv_router_lifetime_in_sec;
+ u32 adv_neighbor_reachable_time_in_msec;
+ u32 adv_time_in_msec_between_retransmitted_neighbor_solicitations;
+
+ /* mtu option */
+ u32 adv_link_mtu;
+
+ /* local information */
+ u32 sw_if_index;
+ int send_radv; /* radv on/off on this interface - set by config */
+ int cease_radv; /* we are ceasing to send - set byf config */
+ int send_unicast;
+ int adv_link_layer_address;
+ int prefix_option;
+ int failed_device_check;
+ int ref_count;
+
+ /* prefix option */
+ ip6_radv_prefix_t *adv_prefixes_pool;
+
+ /* Hash table mapping address to index in interface advertised prefix pool.
+ */
+ mhash_t address_to_prefix_index;
+
+ f64 max_radv_interval;
+ f64 min_radv_interval;
+ f64 min_delay_between_radv;
+ f64 max_delay_between_radv;
+ f64 max_rtr_default_lifetime;
+
+ f64 last_radv_time;
+ f64 last_multicast_time;
+ f64 next_multicast_time;
+
+ u32 initial_adverts_count;
+ f64 initial_adverts_interval;
+ u32 initial_adverts_sent;
+
+ /* stats */
+ u32 n_advertisements_sent;
+ u32 n_solicitations_rcvd;
+ u32 n_solicitations_dropped;
+
+ /* router solicitations sending state */
+ u8 keep_sending_rs; /* when true then next fields are valid */
+ icmp6_send_router_solicitation_params_t params;
+ f64 sleep_interval;
+ f64 due_time;
+ u32 n_left;
+ f64 start_time;
+ vlib_buffer_t *buffer;
+
+ u32 seed;
+
+} ip6_ra_t;
+
+extern ip6_ra_t *ip6_ra_get_itf (u32 sw_if_index);
+
extern int ip6_ra_config (vlib_main_t * vm, u32 sw_if_index,
u8 suppress, u8 managed, u8 other,
u8 ll_option, u8 send_unicast, u8 cease,
@@ -35,13 +134,9 @@ extern int ip6_ra_prefix (vlib_main_t * vm, u32 sw_if_index,
u8 off_link, u8 no_autoconfig,
u8 no_onlink, u8 is_no);
-typedef struct
-{
- u32 irt;
- u32 mrt;
- u32 mrc;
- u32 mrd;
-} icmp6_send_router_solicitation_params_t;
+typedef walk_rc_t (*ip6_ra_itf_walk_fn_t) (u32 sw_if_index, void *ctx);
+
+extern void ip6_ra_itf_walk (ip6_ra_itf_walk_fn_t fn, void *ctx);
extern void icmp6_send_router_solicitation (vlib_main_t * vm,
u32 sw_if_index,
@@ -82,7 +177,7 @@ extern void ip6_ra_update_secondary_radv_info (ip6_address_t * address,
u32 primary_sw_if_index,
u32 valid_time,
u32 preferred_time);
-
+extern u8 ip6_ra_adv_enabled (u32 sw_if_index);
#endif /* included_ip6_neighbor_h */
/*
diff --git a/src/vnet/ip6-nd/rd_cp.c b/src/vnet/ip6-nd/rd_cp.c
index 13fd90db288..5d419286051 100644
--- a/src/vnet/ip6-nd/rd_cp.c
+++ b/src/vnet/ip6-nd/rd_cp.c
@@ -72,8 +72,6 @@ enum
RD_CP_EVENT_INTERRUPT,
};
-#define vl_api_ip6_nd_address_autoconfig_t_print vl_noop_handler
-
static void
router_solicitation_start_stop (u32 sw_if_index, u8 start)
{
@@ -262,7 +260,6 @@ ip6_ra_report_handler (const ip6_ra_report_t * r)
{
router_lifetime_in_sec = r->router_lifetime_in_sec;
u8 route_already_present = 0;
- /* *INDENT-OFF* */
pool_foreach (default_route, rm->default_route_pool)
{
if (default_route->sw_if_index != sw_if_index)
@@ -276,7 +273,6 @@ ip6_ra_report_handler (const ip6_ra_report_t * r)
goto default_route_pool_foreach_out;
}
}
- /* *INDENT-ON* */
default_route_pool_foreach_out:
if (!route_already_present)
@@ -333,7 +329,6 @@ ip6_ra_report_handler (const ip6_ra_report_t * r)
continue;
u8 address_already_present = 0;
- /* *INDENT-OFF* */
pool_foreach (slaac_address, rm->slaac_address_pool)
{
if (slaac_address->sw_if_index != sw_if_index)
@@ -349,7 +344,6 @@ ip6_ra_report_handler (const ip6_ra_report_t * r)
goto slaac_address_pool_foreach_out;
}
}
- /* *INDENT-ON* */
slaac_address_pool_foreach_out:
if (address_already_present)
@@ -414,7 +408,6 @@ rd_cp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
* we do not use pool_foreach() to iterate over pool elements here
* as we are removing elements inside the loop body
*/
- /* *INDENT-OFF* */
pool_foreach_index (index, rm->slaac_address_pool)
{
slaac_address = pool_elt_at_index(rm->slaac_address_pool, index);
@@ -442,7 +435,6 @@ rd_cp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
else
remove_default_route (vm, default_route);
}
- /* *INDENT-ON* */
current_time = vlib_time_now (vm);
}
while (due_time < current_time);
@@ -453,13 +445,11 @@ rd_cp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (rd_cp_process_node) = {
.function = rd_cp_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "rd-cp-process",
};
-/* *INDENT-ON* */
static void
interrupt_process (void)
@@ -514,21 +504,17 @@ rd_cp_set_address_autoconfig (u32 sw_if_index,
if (if_config->enabled && !enable)
{
- /* *INDENT-OFF* */
pool_foreach (slaac_address, rm->slaac_address_pool)
{
remove_slaac_address (vm, slaac_address);
}
- /* *INDENT-ON* */
}
if (if_config->install_default_routes && !install_default_routes)
{
- /* *INDENT-OFF* */
pool_foreach (default_route, rm->default_route_pool)
{
remove_default_route (vm, default_route);
}
- /* *INDENT-ON* */
}
if_config->enabled = enable;
@@ -588,13 +574,11 @@ ip6_nd_address_autoconfig (vlib_main_t * vm,
* @cliexcmd{ip6 nd address autoconfig GigabitEthernet2/0/0 disable}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_nd_address_autoconfig_command, static) = {
.path = "ip6 nd address autoconfig",
.short_help = "ip6 nd address autoconfig <interface> [default-route|disable]",
.function = ip6_nd_address_autoconfig,
};
-/* *INDENT-ON* */
static clib_error_t *
rd_cp_init (vlib_main_t * vm)
diff --git a/src/vnet/ip6-nd/rd_cp_api.c b/src/vnet/ip6-nd/rd_cp_api.c
index 1f0d8587970..3cd55a702e1 100644
--- a/src/vnet/ip6-nd/rd_cp_api.c
+++ b/src/vnet/ip6-nd/rd_cp_api.c
@@ -13,6 +13,7 @@
* limitations under the License.
*/
+#include <vnet/vnet.h>
#include <vnet/ip6-nd/rd_cp.h>
#include <vlibapi/api.h>
diff --git a/src/vnet/ipfix-export/flow_api.c b/src/vnet/ipfix-export/flow_api.c
index 75a656468db..0b287335bbf 100644
--- a/src/vnet/ipfix-export/flow_api.c
+++ b/src/vnet/ipfix-export/flow_api.c
@@ -36,39 +36,96 @@
#define REPLY_MSG_ID_BASE frm->msg_id_base
#include <vlibapi/api_helper_macros.h>
-static void
-vl_api_set_ipfix_exporter_t_handler (vl_api_set_ipfix_exporter_t * mp)
+ipfix_exporter_t *
+vnet_ipfix_exporter_lookup (const ip_address_t *ipfix_collector)
+{
+ flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp;
+
+ pool_foreach (exp, frm->exporters)
+ {
+ if (ip_address_cmp (&exp->ipfix_collector, ipfix_collector) == 0)
+ return exp;
+ }
+
+ return NULL;
+}
+
+/*
+ * For backwards compatibility reasons index 0 in the set of exporters
+ * is alwyas used for the exporter created via the set_ipfix_exporter
+ * API.
+ */
+#define USE_INDEX_0 true
+#define USE_ANY_INDEX false
+
+static int
+vl_api_set_ipfix_exporter_t_internal (
+ u32 client_index, vl_api_address_t *mp_collector_address,
+ u16 mp_collector_port, vl_api_address_t *mp_src_address, u32 mp_vrf_id,
+ u32 mp_path_mtu, u32 mp_template_interval, bool mp_udp_checksum,
+ bool use_index_0, bool is_create)
{
vlib_main_t *vm = vlib_get_main ();
flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp;
vl_api_registration_t *reg;
- vl_api_set_ipfix_exporter_reply_t *rmp;
- ip4_address_t collector, src;
+ ip_address_t collector, src;
u16 collector_port = UDP_DST_PORT_ipfix;
u32 path_mtu;
u32 template_interval;
u8 udp_checksum;
u32 fib_id;
u32 fib_index = ~0;
- int rv = 0;
+ u32 ip_header_size;
- reg = vl_api_client_index_to_registration (mp->client_index);
+ reg = vl_api_client_index_to_registration (client_index);
if (!reg)
- return;
+ return VNET_API_ERROR_UNIMPLEMENTED;
- if (mp->src_address.af == ADDRESS_IP6
- || mp->collector_address.af == ADDRESS_IP6)
+ if (use_index_0)
{
- rv = VNET_API_ERROR_UNIMPLEMENTED;
- goto out;
+ /*
+ * In this case we update the existing exporter. There is no delete
+ * for exp[0]
+ */
+ exp = &frm->exporters[0];
+
+ /* Collector address must be IPv4 for exp[0] */
+ collector.version = AF_IP4;
+ ip4_address_decode (mp_collector_address->un.ip4, &collector.ip.ip4);
+ }
+ else
+ {
+ ip_address_decode2 (mp_collector_address, &collector);
+ if (is_create)
+ {
+ exp = vnet_ipfix_exporter_lookup (&collector);
+ if (!exp)
+ {
+ /* Create a new exporter instead of updating an existing one */
+ if (pool_elts (frm->exporters) >= IPFIX_EXPORTERS_MAX)
+ return VNET_API_ERROR_INVALID_VALUE;
+ pool_get (frm->exporters, exp);
+ }
+ }
+ else
+ {
+ /* Delete the exporter */
+ exp = vnet_ipfix_exporter_lookup (&collector);
+ if (!exp)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ pool_put (frm->exporters, exp);
+ return 0;
+ }
}
- ip4_address_decode (mp->collector_address.un.ip4, &collector);
- collector_port = ntohs (mp->collector_port);
+ collector_port = ntohs (mp_collector_port);
if (collector_port == (u16) ~ 0)
collector_port = UDP_DST_PORT_ipfix;
- ip4_address_decode (mp->src_address.un.ip4, &src);
- fib_id = ntohl (mp->vrf_id);
+ ip_address_decode2 (mp_src_address, &src);
+ fib_id = ntohl (mp_vrf_id);
ip4_main_t *im = &ip4_main;
if (fib_id == ~0)
@@ -79,69 +136,97 @@ vl_api_set_ipfix_exporter_t_handler (vl_api_set_ipfix_exporter_t * mp)
{
uword *p = hash_get (im->fib_index_by_table_id, fib_id);
if (!p)
- {
- rv = VNET_API_ERROR_NO_SUCH_FIB;
- goto out;
- }
+ return VNET_API_ERROR_NO_SUCH_FIB;
fib_index = p[0];
}
- path_mtu = ntohl (mp->path_mtu);
+ path_mtu = ntohl (mp_path_mtu);
if (path_mtu == ~0)
path_mtu = 512; // RFC 7011 section 10.3.3.
- template_interval = ntohl (mp->template_interval);
+ template_interval = ntohl (mp_template_interval);
if (template_interval == ~0)
template_interval = 20;
- udp_checksum = mp->udp_checksum;
+ udp_checksum = mp_udp_checksum;
- if (collector.as_u32 != 0 && src.as_u32 == 0)
- {
- rv = VNET_API_ERROR_INVALID_VALUE;
- goto out;
- }
+ /*
+ * If the collector address is set then the src must be too.
+ * Collector address can be set to 0 to disable exporter
+ */
+ if (!ip_address_is_zero (&collector) && ip_address_is_zero (&src))
+ return VNET_API_ERROR_INVALID_VALUE;
+ if (collector.version != src.version)
+ return VNET_API_ERROR_INVALID_VALUE;
if (path_mtu > 1450 /* vpp does not support fragmentation */ )
- {
- rv = VNET_API_ERROR_INVALID_VALUE;
- goto out;
- }
+ return VNET_API_ERROR_INVALID_VALUE;
if (path_mtu < 68)
- {
- rv = VNET_API_ERROR_INVALID_VALUE;
- goto out;
- }
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ /* Calculate how much header data we need. */
+ if (collector.version == AF_IP4)
+ ip_header_size = sizeof (ip4_header_t);
+ else
+ ip_header_size = sizeof (ip6_header_t);
+ exp->all_headers_size = ip_header_size + sizeof (udp_header_t) +
+ sizeof (ipfix_message_header_t) +
+ sizeof (ipfix_set_header_t);
/* Reset report streams if we are reconfiguring IP addresses */
- if (frm->ipfix_collector.as_u32 != collector.as_u32 ||
- frm->src_address.as_u32 != src.as_u32 ||
- frm->collector_port != collector_port)
- vnet_flow_reports_reset (frm);
-
- frm->ipfix_collector.as_u32 = collector.as_u32;
- frm->collector_port = collector_port;
- frm->src_address.as_u32 = src.as_u32;
- frm->fib_index = fib_index;
- frm->path_mtu = path_mtu;
- frm->template_interval = template_interval;
- frm->udp_checksum = udp_checksum;
+ if (ip_address_cmp (&exp->ipfix_collector, &collector) ||
+ ip_address_cmp (&exp->src_address, &src) ||
+ exp->collector_port != collector_port)
+ vnet_flow_reports_reset (exp);
+
+ exp->ipfix_collector = collector;
+ exp->collector_port = collector_port;
+ exp->src_address = src;
+ exp->fib_index = fib_index;
+ exp->path_mtu = path_mtu;
+ exp->template_interval = template_interval;
+ exp->udp_checksum = udp_checksum;
/* Turn on the flow reporting process */
vlib_process_signal_event (vm, flow_report_process_node.index, 1, 0);
-out:
+ return 0;
+}
+
+static void
+vl_api_set_ipfix_exporter_t_handler (vl_api_set_ipfix_exporter_t *mp)
+{
+ vl_api_set_ipfix_exporter_reply_t *rmp;
+ flow_report_main_t *frm = &flow_report_main;
+ int rv = vl_api_set_ipfix_exporter_t_internal (
+ mp->client_index, &mp->collector_address, mp->collector_port,
+ &mp->src_address, mp->vrf_id, mp->path_mtu, mp->template_interval,
+ mp->udp_checksum, USE_INDEX_0, 0);
+
REPLY_MACRO (VL_API_SET_IPFIX_EXPORTER_REPLY);
}
static void
+vl_api_ipfix_exporter_create_delete_t_handler (
+ vl_api_ipfix_exporter_create_delete_t *mp)
+{
+ vl_api_ipfix_exporter_create_delete_reply_t *rmp;
+ flow_report_main_t *frm = &flow_report_main;
+ int rv = vl_api_set_ipfix_exporter_t_internal (
+ mp->client_index, &mp->collector_address, mp->collector_port,
+ &mp->src_address, mp->vrf_id, mp->path_mtu, mp->template_interval,
+ mp->udp_checksum, USE_ANY_INDEX, mp->is_create);
+
+ REPLY_MACRO (VL_API_IPFIX_EXPORTER_CREATE_DELETE_REPLY);
+}
+
+static void
vl_api_ipfix_exporter_dump_t_handler (vl_api_ipfix_exporter_dump_t * mp)
{
flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp = pool_elt_at_index (flow_report_main.exporters, 0);
vl_api_registration_t *reg;
vl_api_ipfix_exporter_details_t *rmp;
ip4_main_t *im = &ip4_main;
- ip46_address_t collector = {.as_u64[0] = 0,.as_u64[1] = 0 };
- ip46_address_t src = {.as_u64[0] = 0,.as_u64[1] = 0 };
u32 vrf_id;
reg = vl_api_client_index_to_registration (mp->client_index);
@@ -150,27 +235,69 @@ vl_api_ipfix_exporter_dump_t_handler (vl_api_ipfix_exporter_dump_t * mp)
rmp = vl_msg_api_alloc (sizeof (*rmp));
clib_memset (rmp, 0, sizeof (*rmp));
- rmp->_vl_msg_id = ntohs (VL_API_IPFIX_EXPORTER_DETAILS);
+ rmp->_vl_msg_id =
+ ntohs ((REPLY_MSG_ID_BASE) + VL_API_IPFIX_EXPORTER_DETAILS);
rmp->context = mp->context;
- memcpy (&collector.ip4, &frm->ipfix_collector, sizeof (ip4_address_t));
- ip_address_encode (&collector, IP46_TYPE_IP4, &rmp->collector_address);
+ ip_address_encode2 (&exp->ipfix_collector, &rmp->collector_address);
+ rmp->collector_port = htons (exp->collector_port);
+ ip_address_encode2 (&exp->src_address, &rmp->src_address);
- rmp->collector_port = htons (frm->collector_port);
+ if (exp->fib_index == ~0)
+ vrf_id = ~0;
+ else
+ vrf_id = im->fibs[exp->fib_index].ft_table_id;
+ rmp->vrf_id = htonl (vrf_id);
+ rmp->path_mtu = htonl (exp->path_mtu);
+ rmp->template_interval = htonl (exp->template_interval);
+ rmp->udp_checksum = (exp->udp_checksum != 0);
- memcpy (&src.ip4, &frm->src_address, sizeof (ip4_address_t));
- ip_address_encode (&src, IP46_TYPE_IP4, &rmp->src_address);
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
- if (frm->fib_index == ~0)
+static void
+ipfix_all_fill_details (vl_api_ipfix_all_exporter_details_t *rmp,
+ ipfix_exporter_t *exp)
+{
+ ip4_main_t *im = &ip4_main;
+ u32 vrf_id;
+
+ ip_address_encode2 (&exp->ipfix_collector, &rmp->collector_address);
+ rmp->collector_port = htons (exp->collector_port);
+ ip_address_encode2 (&exp->src_address, &rmp->src_address);
+
+ if (exp->fib_index == ~0)
vrf_id = ~0;
else
- vrf_id = im->fibs[frm->fib_index].ft_table_id;
+ vrf_id = im->fibs[exp->fib_index].ft_table_id;
rmp->vrf_id = htonl (vrf_id);
- rmp->path_mtu = htonl (frm->path_mtu);
- rmp->template_interval = htonl (frm->template_interval);
- rmp->udp_checksum = (frm->udp_checksum != 0);
+ rmp->path_mtu = htonl (exp->path_mtu);
+ rmp->template_interval = htonl (exp->template_interval);
+ rmp->udp_checksum = (exp->udp_checksum != 0);
+}
- vl_api_send_msg (reg, (u8 *) rmp);
+static void
+ipfix_all_exporter_details (flow_report_main_t *frm, u32 index,
+ vl_api_registration_t *rp, u32 context)
+{
+ ipfix_exporter_t *exp = pool_elt_at_index (frm->exporters, index);
+
+ vl_api_ipfix_all_exporter_details_t *rmp;
+
+ REPLY_MACRO_DETAILS4 (VL_API_IPFIX_ALL_EXPORTER_DETAILS, rp, context,
+ ({ ipfix_all_fill_details (rmp, exp); }));
+}
+
+static void
+vl_api_ipfix_all_exporter_get_t_handler (vl_api_ipfix_all_exporter_get_t *mp)
+{
+ flow_report_main_t *frm = &flow_report_main;
+ vl_api_ipfix_all_exporter_get_reply_t *rmp;
+ int rv = 0;
+
+ REPLY_AND_DETAILS_MACRO (
+ VL_API_IPFIX_ALL_EXPORTER_GET_REPLY, frm->exporters,
+ ({ ipfix_all_exporter_details (frm, cursor, rp, mp->context); }));
}
static void
@@ -180,6 +307,7 @@ static void
vl_api_set_ipfix_classify_stream_reply_t *rmp;
flow_report_classify_main_t *fcm = &flow_report_classify_main;
flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp = &frm->exporters[0];
u32 domain_id = 0;
u32 src_port = UDP_DST_PORT_ipfix;
int rv = 0;
@@ -190,7 +318,7 @@ static void
if (fcm->src_port != 0 &&
(fcm->domain_id != domain_id || fcm->src_port != (u16) src_port))
{
- int rv = vnet_stream_change (frm, fcm->domain_id, fcm->src_port,
+ int rv = vnet_stream_change (exp, fcm->domain_id, fcm->src_port,
domain_id, (u16) src_port);
ASSERT (rv == 0);
}
@@ -231,6 +359,7 @@ static void
vl_api_registration_t *reg;
flow_report_classify_main_t *fcm = &flow_report_classify_main;
flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp = &frm->exporters[0];
vnet_flow_report_add_del_args_t args;
ipfix_classify_table_t *table;
int is_add;
@@ -296,7 +425,7 @@ static void
args.domain_id = fcm->domain_id;
args.src_port = fcm->src_port;
- rv = vnet_flow_report_add_del (frm, &args, NULL);
+ rv = vnet_flow_report_add_del (exp, &args, NULL);
/* If deleting, or add failed */
if (is_add == 0 || (rv && is_add))
diff --git a/src/vnet/ipfix-export/flow_report.c b/src/vnet/ipfix-export/flow_report.c
index 760de5f8c66..4eb93520ed8 100644
--- a/src/vnet/ipfix-export/flow_report.c
+++ b/src/vnet/ipfix-export/flow_report.c
@@ -15,6 +15,7 @@
/*
* flow_report.c
*/
+#include <vppinfra/atomics.h>
#include <vnet/ipfix-export/flow_report.h>
#include <vnet/api_errno.h>
#include <vnet/udp/udp.h>
@@ -22,45 +23,40 @@
flow_report_main_t flow_report_main;
static_always_inline u8
-stream_index_valid (u32 index)
+stream_index_valid (ipfix_exporter_t *exp, u32 index)
{
- flow_report_main_t *frm = &flow_report_main;
- return index < vec_len (frm->streams) &&
- frm->streams[index].domain_id != ~0;
+ return index < vec_len (exp->streams) && exp->streams[index].domain_id != ~0;
}
static_always_inline flow_report_stream_t *
-add_stream (void)
+add_stream (ipfix_exporter_t *exp)
{
- flow_report_main_t *frm = &flow_report_main;
u32 i;
- for (i = 0; i < vec_len (frm->streams); i++)
- if (!stream_index_valid (i))
- return &frm->streams[i];
- u32 index = vec_len (frm->streams);
- vec_validate (frm->streams, index);
- return &frm->streams[index];
+ for (i = 0; i < vec_len (exp->streams); i++)
+ if (!stream_index_valid (exp, i))
+ return &exp->streams[i];
+ u32 index = vec_len (exp->streams);
+ vec_validate (exp->streams, index);
+ return &exp->streams[index];
}
static_always_inline void
-delete_stream (u32 index)
+delete_stream (ipfix_exporter_t *exp, u32 index)
{
- flow_report_main_t *frm = &flow_report_main;
- ASSERT (index < vec_len (frm->streams));
- ASSERT (frm->streams[index].domain_id != ~0);
- frm->streams[index].domain_id = ~0;
+ ASSERT (index < vec_len (exp->streams));
+ ASSERT (exp->streams[index].domain_id != ~0);
+ exp->streams[index].domain_id = ~0;
}
static i32
-find_stream (u32 domain_id, u16 src_port)
+find_stream (ipfix_exporter_t *exp, u32 domain_id, u16 src_port)
{
- flow_report_main_t *frm = &flow_report_main;
flow_report_stream_t *stream;
u32 i;
- for (i = 0; i < vec_len (frm->streams); i++)
- if (stream_index_valid (i))
+ for (i = 0; i < vec_len (exp->streams); i++)
+ if (stream_index_valid (exp, i))
{
- stream = &frm->streams[i];
+ stream = &exp->streams[i];
if (domain_id == stream->domain_id)
{
if (src_port != stream->src_port)
@@ -76,14 +72,17 @@ find_stream (u32 domain_id, u16 src_port)
}
int
-send_template_packet (flow_report_main_t * frm,
- flow_report_t * fr, u32 * buffer_indexp)
+send_template_packet (flow_report_main_t *frm, ipfix_exporter_t *exp,
+ flow_report_t *fr, u32 *buffer_indexp)
{
u32 bi0;
vlib_buffer_t *b0;
- ip4_ipfix_template_packet_t *tp;
+ ip4_ipfix_template_packet_t *tp4;
+ ip6_ipfix_template_packet_t *tp6;
ipfix_message_header_t *h;
- ip4_header_t *ip;
+ ip4_header_t *ip4;
+ ip6_header_t *ip6;
+ void *ip;
udp_header_t *udp;
vlib_main_t *vm = frm->vlib_main;
flow_report_stream_t *stream;
@@ -92,7 +91,8 @@ send_template_packet (flow_report_main_t * frm,
if (fr->update_rewrite || fr->rewrite == 0)
{
- if (frm->ipfix_collector.as_u32 == 0 || frm->src_address.as_u32 == 0)
+ if (ip_address_is_zero (&exp->ipfix_collector) ||
+ ip_address_is_zero (&exp->src_address))
{
vlib_node_set_state (frm->vlib_main, flow_report_process_node.index,
VLIB_NODE_STATE_DISABLED);
@@ -104,13 +104,9 @@ send_template_packet (flow_report_main_t * frm,
if (fr->update_rewrite)
{
- fr->rewrite = fr->rewrite_callback (frm, fr,
- &frm->ipfix_collector,
- &frm->src_address,
- frm->collector_port,
- fr->report_elements,
- fr->n_report_elements,
- fr->stream_indexp);
+ fr->rewrite = fr->rewrite_callback (
+ exp, fr, exp->collector_port, fr->report_elements,
+ fr->n_report_elements, fr->stream_indexp);
fr->update_rewrite = 0;
}
@@ -126,11 +122,22 @@ send_template_packet (flow_report_main_t * frm,
b0->current_length = vec_len (fr->rewrite);
b0->flags |= (VLIB_BUFFER_TOTAL_LENGTH_VALID | VNET_BUFFER_F_FLOW_REPORT);
vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = frm->fib_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = exp->fib_index;
- tp = vlib_buffer_get_current (b0);
- ip = (ip4_header_t *) & tp->ip4;
- udp = (udp_header_t *) (ip + 1);
+ if (ip_addr_version (&exp->ipfix_collector) == AF_IP4)
+ {
+ tp4 = vlib_buffer_get_current (b0);
+ ip4 = (ip4_header_t *) &tp4->ip4;
+ ip = ip4;
+ udp = (udp_header_t *) (ip4 + 1);
+ }
+ else
+ {
+ tp6 = vlib_buffer_get_current (b0);
+ ip6 = (ip6_header_t *) &tp6->ip6;
+ ip = ip6;
+ udp = (udp_header_t *) (ip6 + 1);
+ }
h = (ipfix_message_header_t *) (udp + 1);
/* FIXUP: message header export_time */
@@ -139,18 +146,30 @@ send_template_packet (flow_report_main_t * frm,
(vlib_time_now (frm->vlib_main) - frm->vlib_time_0));
h->export_time = clib_host_to_net_u32 (h->export_time);
- stream = &frm->streams[fr->stream_index];
+ stream = &exp->streams[fr->stream_index];
/* FIXUP: message header sequence_number. Templates do not increase it */
h->sequence_number = clib_host_to_net_u32 (stream->sequence_number);
/* FIXUP: udp length */
- udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
+ if (ip_addr_version (&exp->ipfix_collector) == AF_IP4)
+ udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip4));
+ else
+ udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip6));
- if (frm->udp_checksum)
+ if (exp->udp_checksum || ip_addr_version (&exp->ipfix_collector) == AF_IP6)
{
/* RFC 7011 section 10.3.2. */
- udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
+
+ if (ip_addr_version (&exp->ipfix_collector) == AF_IP4)
+ udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
+ else
+ {
+ int bogus = 0;
+ udp->checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip, &bogus);
+ }
+
if (udp->checksum == 0)
udp->checksum = 0xffff;
}
@@ -162,16 +181,58 @@ send_template_packet (flow_report_main_t * frm,
return 0;
}
+u32 always_inline
+ipfix_write_headers (ipfix_exporter_t *exp, void *data, void **ip,
+ udp_header_t **udp, u32 len)
+{
+ if (ip_addr_version (&exp->ipfix_collector) == AF_IP4)
+ {
+ ip4_ipfix_template_packet_t *tp4;
+ ip4_header_t *ip4;
+
+ tp4 = (ip4_ipfix_template_packet_t *) data;
+ ip4 = (ip4_header_t *) &tp4->ip4;
+ ip4->ip_version_and_header_length = 0x45;
+ ip4->ttl = 254;
+ ip4->protocol = IP_PROTOCOL_UDP;
+ ip4->flags_and_fragment_offset = 0;
+ ip4->src_address.as_u32 = exp->src_address.ip.ip4.as_u32;
+ ip4->dst_address.as_u32 = exp->ipfix_collector.ip.ip4.as_u32;
+ *ip = ip4;
+ *udp = (udp_header_t *) (ip4 + 1);
+
+ (*udp)->length = clib_host_to_net_u16 (len - sizeof (*ip4));
+ return sizeof (*ip4);
+ }
+ else
+ {
+ ip6_ipfix_template_packet_t *tp6;
+ ip6_header_t *ip6;
+
+ tp6 = (ip6_ipfix_template_packet_t *) data;
+ ip6 = (ip6_header_t *) &tp6->ip6;
+ ip6->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (6 << 28);
+ ip6->hop_limit = 254;
+ ip6->protocol = IP_PROTOCOL_UDP;
+ ip6->src_address = exp->src_address.ip.ip6;
+ ip6->dst_address = exp->ipfix_collector.ip.ip6;
+ *ip = ip6;
+ *udp = (udp_header_t *) (ip6 + 1);
+ (*udp)->length = clib_host_to_net_u16 (len - sizeof (*ip6));
+ return sizeof (*ip6);
+ }
+}
+
u8 *
-vnet_flow_rewrite_generic_callback (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
+vnet_flow_rewrite_generic_callback (ipfix_exporter_t *exp, flow_report_t *fr,
u16 collector_port,
- ipfix_report_element_t * report_elts,
- u32 n_elts, u32 * stream_indexp)
+ ipfix_report_element_t *report_elts,
+ u32 n_elts, u32 *stream_indexp)
{
- ip4_header_t *ip;
+ ip4_header_t *ip4;
+ ip6_header_t *ip6;
+ void *ip;
udp_header_t *udp;
ipfix_message_header_t *h;
ipfix_set_header_t *s;
@@ -179,41 +240,36 @@ vnet_flow_rewrite_generic_callback (flow_report_main_t * frm,
ipfix_field_specifier_t *f;
ipfix_field_specifier_t *first_field;
u8 *rewrite = 0;
- ip4_ipfix_template_packet_t *tp;
flow_report_stream_t *stream;
int i;
ipfix_report_element_t *ep;
+ u32 size;
ASSERT (stream_indexp);
ASSERT (n_elts);
ASSERT (report_elts);
- stream = &frm->streams[fr->stream_index];
+ stream = &exp->streams[fr->stream_index];
*stream_indexp = fr->stream_index;
+ if (ip_addr_version (&exp->ipfix_collector) == AF_IP4)
+ size = sizeof (ip4_ipfix_template_packet_t);
+ else
+ size = sizeof (ip6_ipfix_template_packet_t);
/* allocate rewrite space */
vec_validate_aligned (rewrite,
- sizeof (ip4_ipfix_template_packet_t)
- + n_elts * sizeof (ipfix_field_specifier_t) - 1,
+ size + n_elts * sizeof (ipfix_field_specifier_t) - 1,
CLIB_CACHE_LINE_BYTES);
/* create the packet rewrite string */
- tp = (ip4_ipfix_template_packet_t *) rewrite;
- ip = (ip4_header_t *) & tp->ip4;
- udp = (udp_header_t *) (ip + 1);
+ ipfix_write_headers (exp, rewrite, &ip, &udp, vec_len (rewrite));
+
h = (ipfix_message_header_t *) (udp + 1);
s = (ipfix_set_header_t *) (h + 1);
t = (ipfix_template_header_t *) (s + 1);
first_field = f = (ipfix_field_specifier_t *) (t + 1);
-
- ip->ip_version_and_header_length = 0x45;
- ip->ttl = 254;
- ip->protocol = IP_PROTOCOL_UDP;
- ip->src_address.as_u32 = src_address->as_u32;
- ip->dst_address.as_u32 = collector_address->as_u32;
udp->src_port = clib_host_to_net_u16 (stream->src_port);
udp->dst_port = clib_host_to_net_u16 (collector_port);
- udp->length = clib_host_to_net_u16 (vec_len (rewrite) - sizeof (*ip));
/* FIXUP LATER: message header export_time */
h->domain_id = clib_host_to_net_u32 (stream->domain_id);
@@ -227,10 +283,6 @@ vnet_flow_rewrite_generic_callback (flow_report_main_t * frm,
ep++;
}
- /* Back to the template packet... */
- ip = (ip4_header_t *) & tp->ip4;
- udp = (udp_header_t *) (ip + 1);
-
ASSERT (f - first_field);
/* Field count in this template */
t->id_count = ipfix_id_count (fr->template_id, f - first_field);
@@ -242,12 +294,201 @@ vnet_flow_rewrite_generic_callback (flow_report_main_t * frm,
/* message length in octets */
h->version_length = version_length ((u8 *) f - (u8 *) h);
- ip->length = clib_host_to_net_u16 ((u8 *) f - (u8 *) ip);
- ip->checksum = ip4_header_checksum (ip);
+ if (ip_addr_version (&exp->ipfix_collector) == AF_IP4)
+ {
+ ip4 = (ip4_header_t *) ip;
+ ip4->length = clib_host_to_net_u16 ((u8 *) f - (u8 *) ip4);
+ ip4->checksum = ip4_header_checksum (ip4);
+ }
+ else
+ {
+ ip6 = (ip6_header_t *) ip;
+ /* IPv6 payload length does not include the IPv6 header */
+ ip6->payload_length = clib_host_to_net_u16 ((u8 *) f - (u8 *) udp);
+ }
return rewrite;
}
+vlib_buffer_t *
+vnet_ipfix_exp_get_buffer (vlib_main_t *vm, ipfix_exporter_t *exp,
+ flow_report_t *fr, u32 thread_index)
+{
+ u32 bi0;
+ vlib_buffer_t *b0;
+
+ if (fr->per_thread_data[thread_index].buffer)
+ return fr->per_thread_data[thread_index].buffer;
+
+ if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
+ return NULL;
+
+ /* Initialize the buffer */
+ b0 = fr->per_thread_data[thread_index].buffer = vlib_get_buffer (vm, bi0);
+
+ b0->current_data = 0;
+ b0->current_length = exp->all_headers_size;
+ b0->flags |= (VLIB_BUFFER_TOTAL_LENGTH_VALID | VNET_BUFFER_F_FLOW_REPORT);
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = exp->fib_index;
+ fr->per_thread_data[thread_index].next_data_offset = b0->current_length;
+
+ return b0;
+}
+
+/*
+ * Send a buffer that is mostly populated. Has flow records but needs some
+ * header fields updated.
+ */
+void
+vnet_ipfix_exp_send_buffer (vlib_main_t *vm, ipfix_exporter_t *exp,
+ flow_report_t *fr, flow_report_stream_t *stream,
+ u32 thread_index, vlib_buffer_t *b0)
+{
+ flow_report_main_t *frm = &flow_report_main;
+ vlib_frame_t *f;
+ ipfix_set_header_t *s;
+ ipfix_message_header_t *h;
+ ip4_header_t *ip4 = 0;
+ ip6_header_t *ip6 = 0;
+ void *ip;
+ udp_header_t *udp;
+ int ip_len;
+
+ /* nothing to send */
+ if (fr->per_thread_data[thread_index].next_data_offset <=
+ exp->all_headers_size)
+ return;
+
+ ip_len = ipfix_write_headers (exp, (void *) vlib_buffer_get_current (b0),
+ &ip, &udp, b0->current_length);
+
+ h = (ipfix_message_header_t *) (udp + 1);
+ s = (ipfix_set_header_t *) (h + 1);
+
+ udp->src_port = clib_host_to_net_u16 (stream->src_port);
+ udp->dst_port = clib_host_to_net_u16 (exp->collector_port);
+ udp->checksum = 0;
+
+ /* FIXUP: message header export_time */
+ h->export_time =
+ (u32) (((f64) frm->unix_time_0) + (vlib_time_now (vm) - frm->vlib_time_0));
+ h->export_time = clib_host_to_net_u32 (h->export_time);
+ h->domain_id = clib_host_to_net_u32 (stream->domain_id);
+
+ /*
+ * RFC 7011: Section 3.2
+ *
+ * Incremental sequence counter modulo 2^32 of all IPFIX Data Records
+ * sent in the current stream from the current Observation Domain by
+ * the Exporting Process
+ */
+ h->sequence_number =
+ clib_atomic_fetch_add (&stream->sequence_number,
+ fr->per_thread_data[thread_index].n_data_records);
+ h->sequence_number = clib_host_to_net_u32 (h->sequence_number);
+
+ /*
+ * For data records we use the template ID as the set ID.
+ * RFC 7011: 3.4.3
+ */
+ s->set_id_length = ipfix_set_id_length (
+ fr->template_id,
+ b0->current_length - (ip_len + sizeof (*udp) + sizeof (*h)));
+ h->version_length =
+ version_length (b0->current_length - (ip_len + sizeof (*udp)));
+
+ if (ip_addr_version (&exp->ipfix_collector) == AF_IP4)
+ {
+ ip4 = (ip4_header_t *) ip;
+ ip4->length = clib_host_to_net_u16 (b0->current_length);
+ ip4->checksum = ip4_header_checksum (ip4);
+ udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip4));
+ ASSERT (ip4_header_checksum_is_valid (ip4));
+ }
+ else
+ {
+ ip6 = (ip6_header_t *) ip;
+ /* Ipv6 payload length does not include the IPv6 header */
+ ip6->payload_length =
+ clib_host_to_net_u16 (b0->current_length - sizeof (*ip6));
+ udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip6));
+ }
+
+ if (exp->udp_checksum || ip_addr_version (&exp->ipfix_collector) == AF_IP6)
+ {
+ /* RFC 7011 section 10.3.2. */
+ if (ip_addr_version (&exp->ipfix_collector) == AF_IP4)
+ udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip4);
+ else
+ {
+ int bogus = 0;
+ udp->checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip6, &bogus);
+ }
+ if (udp->checksum == 0)
+ udp->checksum = 0xffff;
+ }
+
+ /* Find or allocate a frame */
+ f = fr->per_thread_data[thread_index].frame;
+ if (PREDICT_FALSE (f == 0))
+ {
+ u32 *to_next;
+ if (ip_addr_version (&exp->ipfix_collector) == AF_IP4)
+ f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
+ else
+ f = vlib_get_frame_to_node (vm, ip6_lookup_node.index);
+ fr->per_thread_data[thread_index].frame = f;
+ u32 bi0 = vlib_get_buffer_index (vm, b0);
+
+ /* Enqueue the buffer */
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi0;
+ f->n_vectors = 1;
+ }
+
+ if (ip_addr_version (&exp->ipfix_collector) == AF_IP4)
+ vlib_put_frame_to_node (vm, ip4_lookup_node.index, f);
+ else
+ vlib_put_frame_to_node (vm, ip6_lookup_node.index, f);
+
+ fr->per_thread_data[thread_index].frame = NULL;
+ fr->per_thread_data[thread_index].buffer = NULL;
+ fr->per_thread_data[thread_index].next_data_offset = 0;
+}
+
+static void
+flow_report_process_send (vlib_main_t *vm, flow_report_main_t *frm,
+ ipfix_exporter_t *exp, flow_report_t *fr,
+ u32 next_node, u32 template_bi)
+{
+ vlib_frame_t *nf = 0;
+ u32 *to_next;
+
+ nf = vlib_get_frame_to_node (vm, next_node);
+ nf->n_vectors = 0;
+ to_next = vlib_frame_vector_args (nf);
+
+ if (template_bi != ~0)
+ {
+ to_next[0] = template_bi;
+ to_next++;
+ nf->n_vectors++;
+ }
+
+ nf = fr->flow_data_callback (frm, exp, fr, nf, to_next, next_node);
+ if (nf)
+ {
+ if (nf->n_vectors)
+ vlib_put_frame_to_node (vm, next_node, nf);
+ else
+ {
+ vlib_frame_free (vm, nf);
+ }
+ }
+}
+
static uword
flow_report_process (vlib_main_t * vm,
vlib_node_runtime_t * rt, vlib_frame_t * f)
@@ -256,9 +497,9 @@ flow_report_process (vlib_main_t * vm,
flow_report_t *fr;
u32 ip4_lookup_node_index;
vlib_node_t *ip4_lookup_node;
- vlib_frame_t *nf = 0;
+ u32 ip6_lookup_node_index;
+ vlib_node_t *ip6_lookup_node;
u32 template_bi;
- u32 *to_next;
int send_template;
f64 now, wait_time;
f64 def_wait_time = 5.0;
@@ -277,6 +518,10 @@ flow_report_process (vlib_main_t * vm,
ip4_lookup_node = vlib_get_node_by_name (vm, (u8 *) "ip4-lookup");
ip4_lookup_node_index = ip4_lookup_node->index;
+ /* Enqueue pkts to ip6-lookup */
+ ip6_lookup_node = vlib_get_node_by_name (vm, (u8 *) "ip6-lookup");
+ ip6_lookup_node_index = ip6_lookup_node->index;
+
wait_time = def_wait_time;
while (1)
@@ -284,82 +529,85 @@ flow_report_process (vlib_main_t * vm,
vlib_process_wait_for_event_or_clock (vm, wait_time);
event_type = vlib_process_get_events (vm, &event_data);
vec_reset_length (event_data);
+ ipfix_exporter_t *exp;
+ pool_foreach (exp, frm->exporters)
+ {
- /* 5s delay by default, possibly reduced by template intervals */
- wait_time = def_wait_time;
-
- vec_foreach (fr, frm->reports)
- {
- f64 next_template;
- now = vlib_time_now (vm);
-
- /* Need to send a template packet? */
- send_template =
- now > (fr->last_template_sent + frm->template_interval);
- send_template += fr->last_template_sent == 0;
- template_bi = ~0;
- rv = 0;
-
- if (send_template)
- rv = send_template_packet (frm, fr, &template_bi);
-
- if (rv < 0)
- continue;
-
- /* decide if template should be sent sooner than current wait time */
- next_template =
- (fr->last_template_sent + frm->template_interval) - now;
- wait_time = clib_min (wait_time, next_template);
-
- nf = vlib_get_frame_to_node (vm, ip4_lookup_node_index);
- nf->n_vectors = 0;
- to_next = vlib_frame_vector_args (nf);
-
- if (template_bi != ~0)
- {
- to_next[0] = template_bi;
- to_next++;
- nf->n_vectors++;
- }
-
- nf = fr->flow_data_callback (frm, fr,
- nf, to_next, ip4_lookup_node_index);
- if (nf)
- vlib_put_frame_to_node (vm, ip4_lookup_node_index, nf);
- }
+ /* 5s delay by default, possibly reduced by template intervals */
+ wait_time = def_wait_time;
+
+ vec_foreach (fr, exp->reports)
+ {
+ f64 next_template;
+ now = vlib_time_now (vm);
+
+ /* Need to send a template packet? */
+ send_template =
+ now > (fr->last_template_sent + exp->template_interval);
+ send_template += fr->last_template_sent == 0;
+ template_bi = ~0;
+ rv = 0;
+
+ if (send_template)
+ rv = send_template_packet (frm, exp, fr, &template_bi);
+
+ if (rv < 0)
+ continue;
+
+ /*
+ * decide if template should be sent sooner than current wait
+ * time
+ */
+ next_template =
+ (fr->last_template_sent + exp->template_interval) - now;
+ wait_time = clib_min (wait_time, next_template);
+
+ if (ip_addr_version (&exp->ipfix_collector) == AF_IP4)
+ {
+ flow_report_process_send (
+ vm, frm, exp, fr, ip4_lookup_node_index, template_bi);
+ }
+ else
+ {
+ flow_report_process_send (
+ vm, frm, exp, fr, ip6_lookup_node_index, template_bi);
+ }
+ }
+ }
}
return 0; /* not so much */
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (flow_report_process_node) = {
.function = flow_report_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "flow-report-process",
};
-/* *INDENT-ON* */
int
-vnet_flow_report_add_del (flow_report_main_t * frm,
- vnet_flow_report_add_del_args_t * a,
- u16 * template_id)
+vnet_flow_report_add_del (ipfix_exporter_t *exp,
+ vnet_flow_report_add_del_args_t *a, u16 *template_id)
{
int i;
int found_index = ~0;
flow_report_t *fr;
flow_report_stream_t *stream;
u32 si;
+ vlib_thread_main_t *tm = &vlib_thread_main;
+ flow_report_main_t *frm = &flow_report_main;
+ vlib_main_t *vm = frm->vlib_main;
+ int size;
- si = find_stream (a->domain_id, a->src_port);
+ si = find_stream (exp, a->domain_id, a->src_port);
if (si == -2)
return VNET_API_ERROR_INVALID_VALUE;
if (si == -1 && a->is_add == 0)
return VNET_API_ERROR_NO_SUCH_ENTRY;
- for (i = 0; i < vec_len (frm->reports); i++)
+ for (i = 0; i < vec_len (exp->reports); i++)
{
- fr = vec_elt_at_index (frm->reports, i);
+ fr = vec_elt_at_index (exp->reports, i);
if (fr->opaque.as_uword == a->opaque.as_uword
&& fr->rewrite_callback == a->rewrite_callback
&& fr->flow_data_callback == a->flow_data_callback)
@@ -375,11 +623,24 @@ vnet_flow_report_add_del (flow_report_main_t * frm,
{
if (found_index != ~0)
{
- vec_delete (frm->reports, 1, found_index);
- stream = &frm->streams[si];
+ for (int i = 0;
+ i < vec_len (exp->reports[found_index].per_thread_data); i++)
+ {
+ u32 bi;
+ if (exp->reports[found_index].per_thread_data[i].buffer)
+ {
+ bi = vlib_get_buffer_index (
+ vm, exp->reports[found_index].per_thread_data[i].buffer);
+ vlib_buffer_free (vm, &bi, 1);
+ }
+ }
+ vec_free (exp->reports[found_index].per_thread_data);
+
+ vec_delete (exp->reports, 1, found_index);
+ stream = &exp->streams[si];
stream->n_reports--;
if (stream->n_reports == 0)
- delete_stream (si);
+ delete_stream (exp, si);
return 0;
}
return VNET_API_ERROR_NO_SUCH_ENTRY;
@@ -390,19 +651,19 @@ vnet_flow_report_add_del (flow_report_main_t * frm,
if (si == -1)
{
- stream = add_stream ();
+ stream = add_stream (exp);
stream->domain_id = a->domain_id;
stream->src_port = a->src_port;
stream->sequence_number = 0;
stream->n_reports = 0;
- si = stream - frm->streams;
+ si = stream - exp->streams;
}
else
- stream = &frm->streams[si];
+ stream = &exp->streams[si];
stream->n_reports++;
- vec_add2 (frm->reports, fr, 1);
+ vec_add2 (exp->reports, fr, 1);
fr->stream_index = si;
fr->template_id = 256 + stream->next_template_no;
@@ -414,6 +675,14 @@ vnet_flow_report_add_del (flow_report_main_t * frm,
fr->report_elements = a->report_elements;
fr->n_report_elements = a->n_report_elements;
fr->stream_indexp = a->stream_indexp;
+ vec_validate (fr->per_thread_data, tm->n_threads);
+ /* Store the flow_report index back in the args struct */
+ a->flow_report_index = fr - exp->reports;
+
+ size = 0;
+ for (int i = 0; i < fr->n_report_elements; i++)
+ size += fr->report_elements[i].size;
+ fr->data_record_size = size;
if (template_id)
*template_id = fr->template_id;
@@ -442,50 +711,50 @@ flow_report_add_del_error_to_clib_error (int error)
}
void
-vnet_flow_reports_reset (flow_report_main_t * frm)
+vnet_flow_reports_reset (ipfix_exporter_t *exp)
{
flow_report_t *fr;
u32 i;
- for (i = 0; i < vec_len (frm->streams); i++)
- if (stream_index_valid (i))
- frm->streams[i].sequence_number = 0;
+ for (i = 0; i < vec_len (exp->streams); i++)
+ if (stream_index_valid (exp, i))
+ exp->streams[i].sequence_number = 0;
- vec_foreach (fr, frm->reports)
- {
- fr->update_rewrite = 1;
- fr->last_template_sent = 0;
- }
+ vec_foreach (fr, exp->reports)
+ {
+ fr->update_rewrite = 1;
+ fr->last_template_sent = 0;
+ }
}
void
-vnet_stream_reset (flow_report_main_t * frm, u32 stream_index)
+vnet_stream_reset (ipfix_exporter_t *exp, u32 stream_index)
{
flow_report_t *fr;
- frm->streams[stream_index].sequence_number = 0;
+ exp->streams[stream_index].sequence_number = 0;
- vec_foreach (fr, frm->reports)
- if (frm->reports->stream_index == stream_index)
- {
- fr->update_rewrite = 1;
- fr->last_template_sent = 0;
- }
+ vec_foreach (fr, exp->reports)
+ if (exp->reports->stream_index == stream_index)
+ {
+ fr->update_rewrite = 1;
+ fr->last_template_sent = 0;
+ }
}
int
-vnet_stream_change (flow_report_main_t * frm,
- u32 old_domain_id, u16 old_src_port,
+vnet_stream_change (ipfix_exporter_t *exp, u32 old_domain_id, u16 old_src_port,
u32 new_domain_id, u16 new_src_port)
{
- i32 stream_index = find_stream (old_domain_id, old_src_port);
+ i32 stream_index = find_stream (exp, old_domain_id, old_src_port);
+
if (stream_index < 0)
return 1;
- flow_report_stream_t *stream = &frm->streams[stream_index];
+ flow_report_stream_t *stream = &exp->streams[stream_index];
stream->domain_id = new_domain_id;
stream->src_port = new_src_port;
if (old_domain_id != new_domain_id || old_src_port != new_src_port)
- vnet_stream_reset (frm, stream_index);
+ vnet_stream_reset (exp, stream_index);
return 0;
}
@@ -495,25 +764,26 @@ set_ipfix_exporter_command_fn (vlib_main_t * vm,
vlib_cli_command_t * cmd)
{
flow_report_main_t *frm = &flow_report_main;
- ip4_address_t collector, src;
+ ip_address_t collector = IP_ADDRESS_V4_ALL_0S, src = IP_ADDRESS_V4_ALL_0S;
u16 collector_port = UDP_DST_PORT_ipfix;
u32 fib_id;
u32 fib_index = ~0;
- collector.as_u32 = 0;
- src.as_u32 = 0;
u32 path_mtu = 512; // RFC 7011 section 10.3.3.
u32 template_interval = 20;
u8 udp_checksum = 0;
+ ipfix_exporter_t *exp = pool_elt_at_index (frm->exporters, 0);
+ u32 ip_header_size;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (input, "collector %U", unformat_ip4_address, &collector))
+ if (unformat (input, "collector %U", unformat_ip4_address,
+ &collector.ip.ip4))
;
else if (unformat (input, "port %U", unformat_udp_port,
&collector_port))
;
- else if (unformat (input, "src %U", unformat_ip4_address, &src))
+ else if (unformat (input, "src %U", unformat_ip4_address, &src.ip.ip4))
;
else if (unformat (input, "fib-id %u", &fib_id))
{
@@ -533,8 +803,15 @@ set_ipfix_exporter_command_fn (vlib_main_t * vm,
break;
}
- if (collector.as_u32 != 0 && src.as_u32 == 0)
+ /*
+ * If the collector address is set then the src must be too.
+ * Collector address can be set to 0 to disable exporter
+ */
+ if (!ip_address_is_zero (&collector) && ip_address_is_zero (&src))
return clib_error_return (0, "src address required");
+ if (collector.version != src.version)
+ return clib_error_return (
+ 0, "src address and dest address must use same IP version");
if (path_mtu > 1450 /* vpp does not support fragmentation */ )
return clib_error_return (0, "too big path-mtu value, maximum is 1450");
@@ -542,28 +819,38 @@ set_ipfix_exporter_command_fn (vlib_main_t * vm,
if (path_mtu < 68)
return clib_error_return (0, "too small path-mtu value, minimum is 68");
+ /* Calculate how much header data we need. */
+ if (collector.version == AF_IP4)
+ ip_header_size = sizeof (ip4_header_t);
+ else
+ ip_header_size = sizeof (ip6_header_t);
+ exp->all_headers_size = ip_header_size + sizeof (udp_header_t) +
+ sizeof (ipfix_message_header_t) +
+ sizeof (ipfix_set_header_t);
+
/* Reset report streams if we are reconfiguring IP addresses */
- if (frm->ipfix_collector.as_u32 != collector.as_u32 ||
- frm->src_address.as_u32 != src.as_u32 ||
- frm->collector_port != collector_port)
- vnet_flow_reports_reset (frm);
-
- frm->ipfix_collector.as_u32 = collector.as_u32;
- frm->collector_port = collector_port;
- frm->src_address.as_u32 = src.as_u32;
- frm->fib_index = fib_index;
- frm->path_mtu = path_mtu;
- frm->template_interval = template_interval;
- frm->udp_checksum = udp_checksum;
-
- if (collector.as_u32)
- vlib_cli_output (vm, "Collector %U, src address %U, "
+ if (ip_address_cmp (&exp->ipfix_collector, &collector) ||
+ ip_address_cmp (&exp->src_address, &src) ||
+ exp->collector_port != collector_port)
+ vnet_flow_reports_reset (exp);
+
+ exp->ipfix_collector = collector;
+ exp->collector_port = collector_port;
+ exp->src_address = src;
+ exp->fib_index = fib_index;
+ exp->path_mtu = path_mtu;
+ exp->template_interval = template_interval;
+ exp->udp_checksum = udp_checksum;
+
+ if (collector.ip.ip4.as_u32)
+ vlib_cli_output (vm,
+ "Collector %U, src address %U, "
"fib index %d, path MTU %u, "
"template resend interval %us, "
"udp checksum %s",
- format_ip4_address, &frm->ipfix_collector,
- format_ip4_address, &frm->src_address,
- fib_index, path_mtu, template_interval,
+ format_ip4_address, &exp->ipfix_collector.ip.ip4,
+ format_ip4_address, &exp->src_address.ip.ip4, fib_index,
+ path_mtu, template_interval,
udp_checksum ? "enabled" : "disabled");
else
vlib_cli_output (vm, "IPFIX Collector is disabled");
@@ -573,7 +860,6 @@ set_ipfix_exporter_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ipfix_exporter_command, static) = {
.path = "set ipfix exporter",
.short_help = "set ipfix exporter "
@@ -584,7 +870,6 @@ VLIB_CLI_COMMAND (set_ipfix_exporter_command, static) = {
"[udp-checksum]",
.function = set_ipfix_exporter_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -596,25 +881,31 @@ ipfix_flush_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipfix_flush_command, static) = {
.path = "ipfix flush",
.short_help = "flush the current ipfix data [for make test]",
.function = ipfix_flush_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
flow_report_init (vlib_main_t * vm)
{
flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp;
frm->vlib_main = vm;
frm->vnet_main = vnet_get_main ();
frm->unix_time_0 = time (0);
frm->vlib_time_0 = vlib_time_now (frm->vlib_main);
- frm->fib_index = ~0;
-
+ /*
+ * Make sure that we can always access the first exporter for
+ * backwards compatibility reasons.
+ */
+ pool_alloc (frm->exporters, IPFIX_EXPORTERS_MAX);
+ pool_get (frm->exporters, exp);
+ /* Verify that this is at index 0 */
+ ASSERT (frm->exporters == exp);
+ exp->fib_index = ~0;
return 0;
}
diff --git a/src/vnet/ipfix-export/flow_report.h b/src/vnet/ipfix-export/flow_report.h
index f40015879c4..cd0cafb6158 100644
--- a/src/vnet/ipfix-export/flow_report.h
+++ b/src/vnet/ipfix-export/flow_report.h
@@ -20,6 +20,7 @@
#include <vnet/ethernet/ethernet.h>
#include <vnet/ethernet/packet.h>
#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/ip_types.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
#include <vnet/udp/udp_packet.h>
@@ -45,27 +46,31 @@ typedef struct
ipfix_template_packet_t ipfix;
} ip4_ipfix_template_packet_t;
+/* Used to build the rewrite */
+typedef struct
+{
+ ip6_header_t ip6;
+ udp_header_t udp;
+ ipfix_template_packet_t ipfix;
+} ip6_ipfix_template_packet_t;
+
struct flow_report_main;
struct flow_report;
+struct ipfix_exporter;
-typedef vlib_frame_t *(vnet_flow_data_callback_t) (struct flow_report_main *,
- struct flow_report *,
- vlib_frame_t *, u32 *,
- u32);
+typedef vlib_frame_t *(vnet_flow_data_callback_t) (
+ struct flow_report_main *frm, struct ipfix_exporter *exp,
+ struct flow_report *, vlib_frame_t *, u32 *, u32);
-typedef u8 *(vnet_flow_rewrite_callback_t) (struct flow_report_main *,
+typedef u8 *(vnet_flow_rewrite_callback_t) (struct ipfix_exporter *exp,
struct flow_report *,
- ip4_address_t *,
- ip4_address_t *, u16,
- ipfix_report_element_t * elts,
- u32 n_elts, u32 * stream_index);
-
-u8 *vnet_flow_rewrite_generic_callback (struct flow_report_main *,
- struct flow_report *,
- ip4_address_t *,
- ip4_address_t *, u16,
- ipfix_report_element_t * elts,
- u32 n_elts, u32 * stream_index);
+ u16, ipfix_report_element_t *elts,
+ u32 n_elts, u32 *stream_index);
+
+u8 *vnet_flow_rewrite_generic_callback (struct ipfix_exporter *exp,
+ struct flow_report *, u16,
+ ipfix_report_element_t *elts,
+ u32 n_elts, u32 *stream_index);
typedef union
{
@@ -73,6 +78,16 @@ typedef union
uword as_uword;
} opaque_t;
+/*
+ * A stream represents an IPFIX session to a destination. We can have
+ * multiple streams to the same destination, but each one has its own
+ * domain and source port. A stream has a sequence number for that
+ * session. A stream may contain multiple templates (i.e multiple for
+ * reports) and each stream also has its own template space.
+ *
+ * A stream has per thread state so that data packets can be built
+ * and send on multiple threads at the same time.
+ */
typedef struct
{
u32 domain_id;
@@ -82,11 +97,37 @@ typedef struct
u16 next_template_no;
} flow_report_stream_t;
+/*
+ * For each flow_report we want to be able to build buffers/frames per thread.
+ */
+typedef struct
+{
+ vlib_buffer_t *buffer;
+ vlib_frame_t *frame;
+ u16 next_data_offset;
+ /*
+ * We need this per stream as the IPFIX sequence number is the count of
+ * data record sent, not the count of packets with data records sent.
+ * See RFC 7011, Sec 3.1
+ */
+ u8 n_data_records;
+} flow_report_per_thread_t;
+
+/*
+ * A flow report represents a group of fields that are to be exported.
+ * Each flow_report has an associated template that is generated when
+ * the flow_report is added. Each flow_report is associated with a
+ * stream, and multiple flow_reports can use the same stream. When
+ * adding a flow_report the keys for the stream are the domain_id
+ * and the source_port.
+ */
typedef struct flow_report
{
/* ipfix rewrite, set by callback */
u8 *rewrite;
u16 template_id;
+ int data_record_size;
+ flow_report_per_thread_t *per_thread_data;
u32 stream_index;
f64 last_template_sent;
int update_rewrite;
@@ -107,15 +148,24 @@ typedef struct flow_report
vnet_flow_data_callback_t *flow_data_callback;
} flow_report_t;
-typedef struct flow_report_main
+/*
+ * The maximum number of ipfix exporters we can have at once
+ */
+#define IPFIX_EXPORTERS_MAX 5
+
+/*
+ * We support multiple exporters. Each one has its own configured
+ * destination, and its own set of reports and streams.
+ */
+typedef struct ipfix_exporter
{
flow_report_t *reports;
flow_report_stream_t *streams;
/* ipfix collector ip address, port, our ip address, fib index */
- ip4_address_t ipfix_collector;
+ ip_address_t ipfix_collector;
u16 collector_port;
- ip4_address_t src_address;
+ ip_address_t src_address;
u32 fib_index;
/* Path MTU */
@@ -127,6 +177,23 @@ typedef struct flow_report_main
/* UDP checksum calculation enable flag */
u8 udp_checksum;
+ /*
+ * The amount of data needed for all the headers, prior to the first
+ * flowset (template or data or ...) This is mostly dependent on the
+ * L3 and L4 protocols in use.
+ */
+ u32 all_headers_size;
+} ipfix_exporter_t;
+
+typedef struct flow_report_main
+{
+ /*
+ * A pool of the exporters. Entry 0 is always there for backwards
+ * compatability reasons. Entries 1 and above have to be created by
+ * the users.
+ */
+ ipfix_exporter_t *exporters;
+
/* time scale transform. Joy. */
u32 unix_time_0;
f64 vlib_time_0;
@@ -142,8 +209,6 @@ extern flow_report_main_t flow_report_main;
extern vlib_node_registration_t flow_report_process_node;
-int vnet_flow_report_enable_disable (u32 sw_if_index, u32 table_index,
- int enable_disable);
typedef struct
{
vnet_flow_data_callback_t *flow_data_callback;
@@ -155,21 +220,52 @@ typedef struct
u32 domain_id;
u16 src_port;
u32 *stream_indexp;
+ /*
+ * When adding a flow report, the index of the flow report is stored
+ * here on success.
+ */
+ u32 flow_report_index;
} vnet_flow_report_add_del_args_t;
-int vnet_flow_report_add_del (flow_report_main_t * frm,
- vnet_flow_report_add_del_args_t * a,
- u16 * template_id);
+int vnet_flow_report_add_del (ipfix_exporter_t *exp,
+ vnet_flow_report_add_del_args_t *a,
+ u16 *template_id);
clib_error_t *flow_report_add_del_error_to_clib_error (int error);
-void vnet_flow_reports_reset (flow_report_main_t * frm);
+void vnet_flow_reports_reset (ipfix_exporter_t *exp);
-void vnet_stream_reset (flow_report_main_t * frm, u32 stream_index);
+void vnet_stream_reset (ipfix_exporter_t *exp, u32 stream_index);
-int vnet_stream_change (flow_report_main_t * frm,
- u32 old_domain_id, u16 old_src_port,
- u32 new_domain_id, u16 new_src_port);
+int vnet_stream_change (ipfix_exporter_t *exp, u32 old_domain_id,
+ u16 old_src_port, u32 new_domain_id, u16 new_src_port);
+
+/*
+ * Search all the exporters for one that has a matching destination address.
+ */
+ipfix_exporter_t *
+vnet_ipfix_exporter_lookup (const ip_address_t *ipfix_collector);
+
+/*
+ * Get the currently in use buffer for the given stream on the given core.
+ * If there is no current buffer then allocate a new one and return that.
+ * This is the buffer that data records should be written into. The offset
+ * currently in use is stored in the per-thread data for the stream and
+ * should be updated as new records are written in.
+ */
+vlib_buffer_t *vnet_ipfix_exp_get_buffer (vlib_main_t *vm,
+ ipfix_exporter_t *exp,
+ flow_report_t *fr, u32 thread_index);
+
+/*
+ * Send the provided buffer. At this stage the buffer should be populated
+ * with data records, with the offset in use stored in the stream per thread
+ * data. This func will fix up all the headers and then send the buffer.
+ */
+void vnet_ipfix_exp_send_buffer (vlib_main_t *vm, ipfix_exporter_t *exp,
+ flow_report_t *fr,
+ flow_report_stream_t *stream,
+ u32 thread_index, vlib_buffer_t *b0);
#endif /* __included_vnet_flow_report_h__ */
diff --git a/src/vnet/ipfix-export/flow_report_classify.c b/src/vnet/ipfix-export/flow_report_classify.c
index 21b6411a292..9e1b99f252d 100644
--- a/src/vnet/ipfix-export/flow_report_classify.c
+++ b/src/vnet/ipfix-export/flow_report_classify.c
@@ -29,13 +29,10 @@ typedef struct
flow_report_classify_main_t flow_report_classify_main;
u8 *
-ipfix_classify_template_rewrite (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
+ipfix_classify_template_rewrite (ipfix_exporter_t *exp, flow_report_t *fr,
u16 collector_port,
- ipfix_report_element_t * elts,
- u32 n_elts, u32 * stream_index)
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index)
{
flow_report_classify_main_t *fcm = &flow_report_classify_main;
vnet_classify_table_t *tblp;
@@ -61,7 +58,7 @@ ipfix_classify_template_rewrite (flow_report_main_t * frm,
u8 *virt_mask;
u8 *real_mask;
- stream = &frm->streams[fr->stream_index];
+ stream = &exp->streams[fr->stream_index];
ipfix_classify_table_t *table = &fcm->tables[flow_table_index];
@@ -109,8 +106,8 @@ ipfix_classify_template_rewrite (flow_report_main_t * frm,
ip->ip_version_and_header_length = 0x45;
ip->ttl = 254;
ip->protocol = IP_PROTOCOL_UDP;
- ip->src_address.as_u32 = src_address->as_u32;
- ip->dst_address.as_u32 = collector_address->as_u32;
+ ip->src_address.as_u32 = exp->src_address.ip.ip4.as_u32;
+ ip->dst_address.as_u32 = exp->ipfix_collector.ip.ip4.as_u32;
udp->src_port = clib_host_to_net_u16 (stream->src_port);
udp->dst_port = clib_host_to_net_u16 (collector_port);
udp->length = clib_host_to_net_u16 (vec_len (rewrite) - sizeof (*ip));
@@ -158,9 +155,9 @@ ipfix_classify_template_rewrite (flow_report_main_t * frm,
}
vlib_frame_t *
-ipfix_classify_send_flows (flow_report_main_t * frm,
- flow_report_t * fr,
- vlib_frame_t * f, u32 * to_next, u32 node_index)
+ipfix_classify_send_flows (flow_report_main_t *frm, ipfix_exporter_t *exp,
+ flow_report_t *fr, vlib_frame_t *f, u32 *to_next,
+ u32 node_index)
{
flow_report_classify_main_t *fcm = &flow_report_classify_main;
vnet_classify_main_t *vcm = &vnet_classify_main;
@@ -182,7 +179,6 @@ ipfix_classify_send_flows (flow_report_main_t * frm,
tcpudp_header_t *tcpudp;
udp_header_t *udp;
int field_index;
- u32 records_this_buffer;
u16 new_l0, old_l0;
ip_csum_t sum0;
vlib_main_t *vm = frm->vlib_main;
@@ -191,7 +187,7 @@ ipfix_classify_send_flows (flow_report_main_t * frm,
u8 transport_protocol;
u8 *virt_key;
- stream = &frm->streams[fr->stream_index];
+ stream = &exp->streams[fr->stream_index];
ipfix_classify_table_t *table = &fcm->tables[flow_table_index];
@@ -233,7 +229,7 @@ ipfix_classify_send_flows (flow_report_main_t * frm,
b0->current_length = copy_len;
b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = frm->fib_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = exp->fib_index;
tp = vlib_buffer_get_current (b0);
ip = (ip4_header_t *) & tp->ip4;
@@ -254,7 +250,6 @@ ipfix_classify_send_flows (flow_report_main_t * frm,
next_offset = (u32) (((u8 *) (s + 1)) - (u8 *) tp);
record_offset = next_offset;
- records_this_buffer = 0;
}
field_index = 0;
@@ -278,14 +273,13 @@ ipfix_classify_send_flows (flow_report_main_t * frm,
sizeof (packets));
next_offset += sizeof (packets);
}
- records_this_buffer++;
stream->sequence_number++;
/* Next record will have the same size as this record */
u32 next_record_size = next_offset - record_offset;
record_offset = next_offset;
- if (next_offset + next_record_size > frm->path_mtu)
+ if (next_offset + next_record_size > exp->path_mtu)
{
s->set_id_length = ipfix_set_id_length (fr->template_id,
next_offset -
@@ -314,7 +308,7 @@ ipfix_classify_send_flows (flow_report_main_t * frm,
udp->length =
clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
- if (frm->udp_checksum)
+ if (exp->udp_checksum)
{
/* RFC 7011 section 10.3.2. */
udp->checksum =
@@ -370,7 +364,7 @@ flush:
ip->length = new_l0;
udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
- if (frm->udp_checksum)
+ if (exp->udp_checksum)
{
/* RFC 7011 section 10.3.2. */
udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
@@ -397,7 +391,7 @@ ipfix_classify_table_add_del_command_fn (vlib_main_t * vm,
vlib_cli_command_t * cmd)
{
flow_report_classify_main_t *fcm = &flow_report_classify_main;
- flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp = &flow_report_main.exporters[0];
vnet_flow_report_add_del_args_t args;
ipfix_classify_table_t *table;
int rv;
@@ -475,7 +469,7 @@ ipfix_classify_table_add_del_command_fn (vlib_main_t * vm,
args.domain_id = fcm->domain_id;
args.src_port = fcm->src_port;
- rv = vnet_flow_report_add_del (frm, &args, NULL);
+ rv = vnet_flow_report_add_del (exp, &args, NULL);
error = flow_report_add_del_error_to_clib_error (rv);
@@ -486,13 +480,11 @@ ipfix_classify_table_add_del_command_fn (vlib_main_t * vm,
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipfix_classify_table_add_del_command, static) = {
.path = "ipfix classify table",
.short_help = "ipfix classify table add|del <table-index>",
.function = ipfix_classify_table_add_del_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
set_ipfix_classify_stream_command_fn (vlib_main_t * vm,
@@ -500,7 +492,7 @@ set_ipfix_classify_stream_command_fn (vlib_main_t * vm,
vlib_cli_command_t * cmd)
{
flow_report_classify_main_t *fcm = &flow_report_classify_main;
- flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp = &flow_report_main.exporters[0];
u32 domain_id = 1;
u32 src_port = UDP_DST_PORT_ipfix;
@@ -518,7 +510,7 @@ set_ipfix_classify_stream_command_fn (vlib_main_t * vm,
if (fcm->src_port != 0 &&
(fcm->domain_id != domain_id || fcm->src_port != (u16) src_port))
{
- int rv = vnet_stream_change (frm, fcm->domain_id, fcm->src_port,
+ int rv = vnet_stream_change (exp, fcm->domain_id, fcm->src_port,
domain_id, (u16) src_port);
ASSERT (rv == 0);
}
@@ -529,14 +521,12 @@ set_ipfix_classify_stream_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ipfix_classify_stream_command, static) = {
.path = "set ipfix classify stream",
.short_help = "set ipfix classify stream"
"[domain <domain-id>] [src-port <src-port>]",
.function = set_ipfix_classify_stream_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
flow_report_classify_init (vlib_main_t * vm)
diff --git a/src/vnet/ipfix-export/flow_report_classify.h b/src/vnet/ipfix-export/flow_report_classify.h
index a923f36714a..8ca40688599 100644
--- a/src/vnet/ipfix-export/flow_report_classify.h
+++ b/src/vnet/ipfix-export/flow_report_classify.h
@@ -112,18 +112,15 @@ ipfix_classify_delete_table (u32 index)
fcm->tables[index].classify_table_index = ~0;
}
-u8 *ipfix_classify_template_rewrite (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
+u8 *ipfix_classify_template_rewrite (ipfix_exporter_t *exp, flow_report_t *fr,
u16 collector_port,
- ipfix_report_element_t * elts,
- u32 n_elts, u32 * stream_index);
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index);
-vlib_frame_t *ipfix_classify_send_flows (flow_report_main_t * frm,
- flow_report_t * fr,
- vlib_frame_t * f,
- u32 * to_next, u32 node_index);
+vlib_frame_t *ipfix_classify_send_flows (flow_report_main_t *frm,
+ ipfix_exporter_t *exp,
+ flow_report_t *fr, vlib_frame_t *f,
+ u32 *to_next, u32 node_index);
#endif /* __included_flow_report_classify_h__ */
diff --git a/src/vnet/ipfix-export/ipfix_export.api b/src/vnet/ipfix-export/ipfix_export.api
index a70b72bee39..8a9d5b13124 100644
--- a/src/vnet/ipfix-export/ipfix_export.api
+++ b/src/vnet/ipfix-export/ipfix_export.api
@@ -73,6 +73,80 @@ define ipfix_exporter_details
bool udp_checksum;
};
+/** Configure IPFIX exporter within the exporting process.
+ The exporting process can contain multiple independent exporters,
+ each of which have their own state. The collector_address is the key
+ field that identifies a unique exporter. The already existing API
+ 'set_ipfix_exporter' is used to modify a single exporter (which will
+ always have stat index 0). If more than one exporter is required then
+ they can be created and deleted using this API.
+
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_create - True for create, False for delete
+ @param collector_address - address of IPFIX collector
+ @param collector_port - port of IPFIX collector
+ @param src_address - address of IPFIX exporter
+ @param vrf_id - VRF / fib table ID
+ @param path_mtu - Path MTU between exporter and collector
+ @param template_interval - number of seconds after which to resend template
+ @param udp_checksum - UDP checksum calculation enable flag
+*/
+
+define ipfix_exporter_create_delete {
+ u32 client_index;
+ u32 context;
+ bool is_create;
+ vl_api_address_t collector_address;
+ u16 collector_port;
+ vl_api_address_t src_address;
+ u32 vrf_id;
+ u32 path_mtu;
+ u32 template_interval;
+ bool udp_checksum;
+};
+
+define ipfix_exporter_create_delete_reply {
+ u32 context;
+ i32 retval;
+ u32 stat_index;
+};
+
+service {
+ rpc ipfix_all_exporter_get returns ipfix_all_exporter_get_reply
+ stream ipfix_all_exporter_details;
+};
+
+define ipfix_all_exporter_get
+{
+ u32 client_index;
+ u32 context;
+ u32 cursor;
+};
+
+define ipfix_all_exporter_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 cursor;
+};
+
+/** \brief Ipfix meter details in response to the get_meters command
+ @param context - sender context, to match reply w/ request
+ @param name The name of the ipfix meter
+*/
+define ipfix_all_exporter_details
+{
+ u32 context;
+ vl_api_address_t collector_address;
+ u16 collector_port;
+ vl_api_address_t src_address;
+ u32 vrf_id;
+ u32 path_mtu;
+ u32 template_interval;
+ bool udp_checksum;
+};
+
/** \brief IPFIX classify stream configure request
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/vnet/ipip/ipip.c b/src/vnet/ipip/ipip.c
index 5dbe85a1c5b..aaf21468d1e 100644
--- a/src/vnet/ipip/ipip.c
+++ b/src/vnet/ipip/ipip.c
@@ -148,7 +148,14 @@ ipip64_fixup (vlib_main_t * vm, const ip_adjacency_t * adj, vlib_buffer_t * b,
ip4->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b));
tunnel_encap_fixup_6o4 (flags, ((ip6_header_t *) (ip4 + 1)), ip4);
- ip4->checksum = ip4_header_checksum (ip4);
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data;
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
+ }
+ else
+ ip4->checksum = ip4_header_checksum (ip4);
}
static void
@@ -164,7 +171,14 @@ ipip44_fixup (vlib_main_t * vm, const ip_adjacency_t * adj, vlib_buffer_t * b,
ip4->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b));
tunnel_encap_fixup_4o4 (flags, ip4 + 1, ip4);
- ip4->checksum = ip4_header_checksum (ip4);
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data;
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
+ }
+ else
+ ip4->checksum = ip4_header_checksum (ip4);
}
static void
@@ -185,6 +199,12 @@ ipip46_fixup (vlib_main_t * vm, const ip_adjacency_t * adj, vlib_buffer_t * b,
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) -
sizeof (*ip6));
tunnel_encap_fixup_4o6 (flags, b, ((ip4_header_t *) (ip6 + 1)), ip6);
+
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip6 - b->data;
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
+ }
}
static void
@@ -205,6 +225,12 @@ ipip66_fixup (vlib_main_t * vm,
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) -
sizeof (*ip6));
tunnel_encap_fixup_6o6 (flags, ip6 + 1, ip6);
+
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip6 - b->data;
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
+ }
}
static void
@@ -226,6 +252,12 @@ ipipm6_fixup (vlib_main_t *vm, const ip_adjacency_t *adj, vlib_buffer_t *b,
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) - sizeof (*ip6));
tunnel_encap_fixup_mplso6 (flags, b, (mpls_unicast_header_t *) (ip6 + 1),
ip6);
+
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip6 - b->data;
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
+ }
}
static void
@@ -245,7 +277,15 @@ ipipm4_fixup (vlib_main_t *vm, const ip_adjacency_t *adj, vlib_buffer_t *b,
ip4->length =
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) - sizeof (*ip4));
tunnel_encap_fixup_mplso4 (flags, (mpls_unicast_header_t *) (ip4 + 1), ip4);
- ip4->checksum = ip4_header_checksum (ip4);
+
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data;
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
+ }
+ else
+ ip4->checksum = ip4_header_checksum (ip4);
}
static void
@@ -269,7 +309,6 @@ ipip_tunnel_stack (adj_index_t ai)
}
else
{
- /* *INDENT-OFF* */
fib_prefix_t dst = {
.fp_len = t->transport == IPIP_TRANSPORT_IP6 ? 128 : 32,
.fp_proto = (t->transport == IPIP_TRANSPORT_IP6 ?
@@ -277,7 +316,6 @@ ipip_tunnel_stack (adj_index_t ai)
FIB_PROTOCOL_IP4),
.fp_addr = t->tunnel_dst
};
- /* *INDENT-ON* */
adj_midchain_delegate_stack (ai, t->fib_index, &dst);
}
@@ -348,9 +386,6 @@ ipip_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
if (!(t->flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_INNER_HASH))
af |= ADJ_FLAG_MIDCHAIN_IP_STACK;
- if (VNET_LINK_ETHERNET == adj_get_link_type (ai))
- af |= ADJ_FLAG_MIDCHAIN_NO_COUNT;
-
fixup = ipip_get_fixup (t, adj_get_link_type (ai), &af);
adj_nbr_midchain_update_rewrite
(ai, fixup,
@@ -515,7 +550,6 @@ ipip_tunnel_desc (u32 sw_if_index,
return (0);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS(ipip_device_class) = {
.name = "IPIP tunnel device",
.format_device_name = format_ipip_tunnel_name,
@@ -545,7 +579,6 @@ VNET_HW_INTERFACE_CLASS(mipip_hw_interface_class) = {
.update_adjacency = mipip_update_adj,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_NBMA,
};
-/* *INDENT-ON* */
ipip_tunnel_t *
ipip_tunnel_db_find (const ipip_tunnel_key_t * key)
@@ -785,16 +818,16 @@ ipip_add_tunnel (ipip_transport_t transport,
gm->tunnel_index_by_sw_if_index[sw_if_index] = t_idx;
if (t->transport == IPIP_TRANSPORT_IP4)
- {
- hi->min_packet_bytes = 64 + sizeof (ip4_header_t);
- }
+ hi->frame_overhead = sizeof (ip4_header_t);
else
- {
- hi->min_packet_bytes = 64 + sizeof (ip6_header_t);
- }
+ hi->frame_overhead = sizeof (ip6_header_t);
+
+ hi->min_frame_size = hi->frame_overhead + 64;
/* Standard default ipip MTU. */
vnet_sw_interface_set_mtu (vnm, sw_if_index, 9000);
+ vnet_set_interface_l3_output_node (gm->vlib_main, sw_if_index,
+ (u8 *) "tunnel-output");
t->tunnel_src = *src;
t->tunnel_dst = *dst;
@@ -840,6 +873,7 @@ ipip_del_tunnel (u32 sw_if_index)
teib_walk_itf (t->sw_if_index, ipip_tunnel_delete_teib_walk, t);
vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */ );
+ vnet_reset_interface_l3_output_node (gm->vlib_main, t->sw_if_index);
gm->tunnel_index_by_sw_if_index[sw_if_index] = ~0;
vnet_delete_hw_interface (vnm, t->hw_if_index);
hash_unset (gm->instance_used, t->user_instance);
diff --git a/src/vnet/ipip/ipip_api.c b/src/vnet/ipip/ipip_api.c
index 50b6731af44..2cb7bdf8dae 100644
--- a/src/vnet/ipip/ipip_api.c
+++ b/src/vnet/ipip/ipip_api.c
@@ -86,12 +86,10 @@ vl_api_ipip_add_tunnel_t_handler (vl_api_ipip_add_tunnel_t * mp)
}
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_IPIP_ADD_TUNNEL_REPLY,
({
rmp->sw_if_index = ntohl(sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -105,29 +103,45 @@ vl_api_ipip_del_tunnel_t_handler (vl_api_ipip_del_tunnel_t * mp)
REPLY_MACRO (VL_API_IPIP_DEL_TUNNEL_REPLY);
}
+static vl_api_tunnel_mode_t
+ipip_tunnel_mode_encode (ipip_mode_t mode)
+{
+ switch (mode)
+ {
+ case IPIP_MODE_P2P:
+ return TUNNEL_API_MODE_P2P;
+ case IPIP_MODE_P2MP:
+ return TUNNEL_API_MODE_MP;
+ case IPIP_MODE_6RD:
+ return TUNNEL_API_MODE_P2P;
+ default:
+ return TUNNEL_API_MODE_P2P;
+ }
+}
+
static void
send_ipip_tunnel_details (ipip_tunnel_t * t, vl_api_ipip_tunnel_dump_t * mp)
{
ipip_main_t *im = &ipip_main;
vl_api_ipip_tunnel_details_t *rmp;
bool is_ipv6 = t->transport == IPIP_TRANSPORT_IP6 ? true : false;
+ ip46_type_t ip_type = is_ipv6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4;
fib_table_t *ft;
- ft = fib_table_get (t->fib_index, (is_ipv6 ? FIB_PROTOCOL_IP6 :
- FIB_PROTOCOL_IP4));
-
- /* *INDENT-OFF* */
- REPLY_MACRO_DETAILS2(VL_API_IPIP_TUNNEL_DETAILS,
- ({
- ip_address_encode (&t->tunnel_src, IP46_TYPE_ANY, &rmp->tunnel.src);
- ip_address_encode (&t->tunnel_dst, IP46_TYPE_ANY, &rmp->tunnel.dst);
- rmp->tunnel.table_id = htonl (ft->ft_table_id);
- rmp->tunnel.instance = htonl (t->user_instance);
- rmp->tunnel.sw_if_index = htonl (t->sw_if_index);
- rmp->tunnel.dscp = ip_dscp_encode(t->dscp);
- rmp->tunnel.flags = tunnel_encap_decap_flags_encode(t->flags);
- }));
- /* *INDENT-ON* */
+ ft = fib_table_get (t->fib_index,
+ (is_ipv6 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4));
+
+ REPLY_MACRO_DETAILS2 (
+ VL_API_IPIP_TUNNEL_DETAILS, ({
+ ip_address_encode (&t->tunnel_src, ip_type, &rmp->tunnel.src);
+ ip_address_encode (&t->tunnel_dst, ip_type, &rmp->tunnel.dst);
+ rmp->tunnel.table_id = htonl (ft->ft_table_id);
+ rmp->tunnel.instance = htonl (t->user_instance);
+ rmp->tunnel.sw_if_index = htonl (t->sw_if_index);
+ rmp->tunnel.dscp = ip_dscp_encode (t->dscp);
+ rmp->tunnel.flags = tunnel_encap_decap_flags_encode (t->flags);
+ rmp->tunnel.mode = ipip_tunnel_mode_encode (t->mode);
+ }));
}
static void
@@ -141,12 +155,10 @@ vl_api_ipip_tunnel_dump_t_handler (vl_api_ipip_tunnel_dump_t * mp)
if (sw_if_index == ~0)
{
- /* *INDENT-OFF* */
pool_foreach (t, im->tunnels)
{
send_ipip_tunnel_details(t, mp);
}
- /* *INDENT-ON* */
}
else
{
@@ -185,12 +197,10 @@ vl_api_ipip_6rd_add_tunnel_t_handler (vl_api_ipip_6rd_add_tunnel_t * mp)
&sixrd_tunnel_index);
}
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IPIP_6RD_ADD_TUNNEL_REPLY,
({
rmp->sw_if_index = htonl (sixrd_tunnel_index);
}));
- /* *INDENT-ON* */
}
static void
diff --git a/src/vnet/ipip/ipip_cli.c b/src/vnet/ipip/ipip_cli.c
index 1a8e8896965..606a1f53f9a 100644
--- a/src/vnet/ipip/ipip_cli.c
+++ b/src/vnet/ipip/ipip_cli.c
@@ -197,7 +197,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(create_ipip_tunnel_command, static) = {
.path = "create ipip tunnel",
.short_help = "create ipip tunnel src <addr> dst <addr> [instance <n>] "
@@ -209,7 +208,6 @@ VLIB_CLI_COMMAND(delete_ipip_tunnel_command, static) = {
.short_help = "delete ipip tunnel sw_if_index <sw_if_index>",
.function = delete_ipip_tunnel_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_ipip_tunnel (u8 * s, va_list * args)
@@ -274,10 +272,8 @@ show_ipip_tunnel_command_fn (vlib_main_t * vm,
if (ti == ~0)
{
- /* *INDENT-OFF* */
pool_foreach (t, gm->tunnels)
{vlib_cli_output(vm, "%U", format_ipip_tunnel, t); }
- /* *INDENT-ON* */
}
else
{
@@ -290,12 +286,10 @@ show_ipip_tunnel_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(show_ipip_tunnel_command, static) = {
.path = "show ipip tunnel",
.function = show_ipip_tunnel_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_ipip_tunnel_key (u8 * s, va_list * args)
@@ -318,12 +312,10 @@ ipip_tunnel_hash_show (vlib_main_t * vm,
ipip_tunnel_key_t *key;
u32 index;
- /* *INDENT-OFF* */
hash_foreach(key, index, im->tunnel_by_key,
({
vlib_cli_output (vm, " %U -> %d", format_ipip_tunnel_key, key, index);
}));
- /* *INDENT-ON* */
return NULL;
}
@@ -331,14 +323,12 @@ ipip_tunnel_hash_show (vlib_main_t * vm,
/**
* show IPSEC tunnel protection hash tables
*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipip_tunnel_hash_show_node, static) =
{
.path = "show ipip tunnel-hash",
.function = ipip_tunnel_hash_show,
.short_help = "show ipip tunnel-hash",
};
-/* *INDENT-ON* */
static clib_error_t *
create_sixrd_tunnel_command_fn (vlib_main_t * vm,
@@ -464,7 +454,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(create_sixrd_tunnel_command, static) = {
.path = "create 6rd tunnel",
.short_help = "create 6rd tunnel ip6-pfx <ip6-pfx> ip4-pfx <ip4-pfx> "
@@ -477,7 +466,6 @@ VLIB_CLI_COMMAND(delete_sixrd_tunnel_command, static) = {
.short_help = "delete 6rd tunnel sw_if_index <sw_if_index>",
.function = delete_sixrd_tunnel_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ipip/node.c b/src/vnet/ipip/node.c
index b008a21a20f..a289cc885df 100644
--- a/src/vnet/ipip/node.c
+++ b/src/vnet/ipip/node.c
@@ -260,7 +260,6 @@ static char *ipip_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE(ipip4_input_node) = {
.name = "ipip4-input",
/* Takes a vector of packets. */
@@ -293,7 +292,6 @@ VLIB_REGISTER_NODE(ipip6_input_node) = {
.format_trace = format_ipip_rx_trace,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ipip/sixrd.c b/src/vnet/ipip/sixrd.c
index 492b4f83260..6e0bfb042cc 100644
--- a/src/vnet/ipip/sixrd.c
+++ b/src/vnet/ipip/sixrd.c
@@ -250,7 +250,6 @@ sixrd_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
return /* no error */ 0;
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS(sixrd_hw_interface_class) = {
.name = "ip6ip-6rd",
.build_rewrite = sixrd_build_rewrite,
@@ -265,7 +264,6 @@ VNET_DEVICE_CLASS(sixrd_device_class) = {
#endif
}
;
-/* *INDENT-ON* */
int
sixrd_add_tunnel (ip6_address_t * ip6_prefix, u8 ip6_prefix_len,
@@ -325,6 +323,8 @@ sixrd_add_tunnel (ip6_address_t * ip6_prefix, u8 ip6_prefix_len,
t->user_instance = t_idx;
vnet_sw_interface_set_mtu (vnet_get_main (), t->sw_if_index, 1480);
+ vnet_set_interface_l3_output_node (gm->vlib_main, hi->sw_if_index,
+ (u8 *) "tunnel-output");
ipip_tunnel_db_add (t, &key);
@@ -339,7 +339,6 @@ sixrd_add_tunnel (ip6_address_t * ip6_prefix, u8 ip6_prefix_len,
ip6_sw_interface_enable_disable (t->sw_if_index, true);
/* Create IPv6 route/adjacency */
- /* *INDENT-OFF* */
fib_prefix_t pfx6 = {
.fp_proto = FIB_PROTOCOL_IP6,
.fp_len = t->sixrd.ip6_prefix_len,
@@ -347,7 +346,6 @@ sixrd_add_tunnel (ip6_address_t * ip6_prefix, u8 ip6_prefix_len,
.ip6 = t->sixrd.ip6_prefix,
},
};
- /* *INDENT-ON* */
fib_table_lock (ip6_fib_index, FIB_PROTOCOL_IP6, FIB_SOURCE_6RD);
fib_table_entry_update_one_path (ip6_fib_index, &pfx6, FIB_SOURCE_6RD,
@@ -384,7 +382,6 @@ sixrd_del_tunnel (u32 sw_if_index)
return -1;
}
- /* *INDENT-OFF* */
fib_prefix_t pfx6 = {
.fp_proto = FIB_PROTOCOL_IP6,
.fp_len = t->sixrd.ip6_prefix_len,
@@ -392,7 +389,6 @@ sixrd_del_tunnel (u32 sw_if_index)
.ip6 = t->sixrd.ip6_prefix,
},
};
- /* *INDENT-ON* */
fib_table_entry_path_remove (t->sixrd.ip6_fib_index, &pfx6,
FIB_SOURCE_6RD,
@@ -403,6 +399,7 @@ sixrd_del_tunnel (u32 sw_if_index)
vnet_sw_interface_set_flags (vnet_get_main (), t->sw_if_index,
0 /* down */ );
+ vnet_reset_interface_l3_output_node (gm->vlib_main, t->sw_if_index);
ip6_sw_interface_enable_disable (t->sw_if_index, false);
gm->tunnel_index_by_sw_if_index[t->sw_if_index] = ~0;
@@ -502,7 +499,8 @@ sixrd_init (vlib_main_t * vm)
sixrd_adj_delegate_type =
adj_delegate_register_new_type (&sixrd_adj_delegate_vft);
- sixrd_fib_node_type = fib_node_register_new_type (&sixrd_fib_node_vft);
+ sixrd_fib_node_type =
+ fib_node_register_new_type ("sixrd", &sixrd_fib_node_vft);
return error;
}
diff --git a/src/vnet/ipsec/ah.h b/src/vnet/ipsec/ah.h
index d0b4c21a4bc..450c9cfd6dc 100644
--- a/src/vnet/ipsec/ah.h
+++ b/src/vnet/ipsec/ah.h
@@ -17,6 +17,7 @@
#include <vnet/ip/ip.h>
#include <vnet/ipsec/ipsec.h>
+#include <vnet/ipsec/ipsec.api_enum.h>
typedef struct
{
@@ -29,19 +30,67 @@ typedef struct
} ah_header_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip4_header_t ip4;
ah_header_t ah;
}) ip4_and_ah_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip6_header_t ip6;
ah_header_t ah;
}) ip6_and_ah_header_t;
-/* *INDENT-ON* */
+
+always_inline u32
+ah_encrypt_err_to_sa_err (u32 err)
+{
+ switch (err)
+ {
+ case AH_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR:
+ return IPSEC_SA_ERROR_CRYPTO_ENGINE_ERROR;
+ case AH_ENCRYPT_ERROR_SEQ_CYCLED:
+ return IPSEC_SA_ERROR_SEQ_CYCLED;
+ }
+ return ~0;
+}
+
+always_inline u32
+ah_decrypt_err_to_sa_err (u32 err)
+{
+ switch (err)
+ {
+ case AH_DECRYPT_ERROR_DECRYPTION_FAILED:
+ return IPSEC_SA_ERROR_DECRYPTION_FAILED;
+ case AH_DECRYPT_ERROR_INTEG_ERROR:
+ return IPSEC_SA_ERROR_INTEG_ERROR;
+ case AH_DECRYPT_ERROR_NO_TAIL_SPACE:
+ return IPSEC_SA_ERROR_NO_TAIL_SPACE;
+ case AH_DECRYPT_ERROR_DROP_FRAGMENTS:
+ return IPSEC_SA_ERROR_DROP_FRAGMENTS;
+ case AH_DECRYPT_ERROR_REPLAY:
+ return IPSEC_SA_ERROR_REPLAY;
+ }
+ return ~0;
+}
+
+always_inline void
+ah_encrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node,
+ u32 thread_index, u32 err, u16 index, u16 *nexts,
+ u16 drop_next, u32 sa_index)
+{
+ ipsec_set_next_index (b, node, thread_index, err,
+ ah_encrypt_err_to_sa_err (err), index, nexts,
+ drop_next, sa_index);
+}
+
+always_inline void
+ah_decrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node,
+ u32 thread_index, u32 err, u16 index, u16 *nexts,
+ u16 drop_next, u32 sa_index)
+{
+ ipsec_set_next_index (b, node, thread_index, err,
+ ah_decrypt_err_to_sa_err (err), index, nexts,
+ drop_next, sa_index);
+}
always_inline u8
ah_calc_icv_padding_len (u8 icv_size, int is_ipv6)
diff --git a/src/vnet/ipsec/ah_decrypt.c b/src/vnet/ipsec/ah_decrypt.c
index 1ad372a7de0..918ebf03f67 100644
--- a/src/vnet/ipsec/ah_decrypt.c
+++ b/src/vnet/ipsec/ah_decrypt.c
@@ -38,28 +38,6 @@ typedef enum
AH_DECRYPT_N_NEXT,
} ah_decrypt_next_t;
-#define foreach_ah_decrypt_error \
- _ (RX_PKTS, "AH pkts received") \
- _ (DECRYPTION_FAILED, "AH decryption failed") \
- _ (INTEG_ERROR, "Integrity check failed") \
- _ (NO_TAIL_SPACE, "not enough buffer tail space (dropped)") \
- _ (DROP_FRAGMENTS, "IP fragments drop") \
- _ (REPLAY, "SA replayed packet")
-
-typedef enum
-{
-#define _(sym,str) AH_DECRYPT_ERROR_##sym,
- foreach_ah_decrypt_error
-#undef _
- AH_DECRYPT_N_ERROR,
-} ah_decrypt_error_t;
-
-static char *ah_decrypt_error_strings[] = {
-#define _(sym,string) string,
- foreach_ah_decrypt_error
-#undef _
-};
-
typedef struct
{
ipsec_integ_alg_t integ_alg;
@@ -125,8 +103,9 @@ ah_process_ops (vlib_main_t * vm, vlib_node_runtime_t * node,
if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
{
u32 bi = op->user_data;
- b[bi]->error = node->errors[AH_DECRYPT_ERROR_INTEG_ERROR];
- nexts[bi] = AH_DECRYPT_NEXT_DROP;
+ ah_decrypt_set_next_index (
+ b[bi], node, vm->thread_index, AH_DECRYPT_ERROR_INTEG_ERROR, bi,
+ nexts, AH_DECRYPT_NEXT_DROP, vnet_buffer (b[bi])->ipsec.sad_index);
n_fail--;
}
op++;
@@ -149,6 +128,7 @@ ah_decrypt_inline (vlib_main_t * vm,
from = vlib_frame_vector_args (from_frame);
n_left = from_frame->n_vectors;
ipsec_sa_t *sa0 = 0;
+ bool anti_replay_result;
u32 current_sa_index = ~0, current_sa_bytes = 0, current_sa_pkts = 0;
clib_memset (pkt_data, 0, VLIB_FRAME_SIZE * sizeof (pkt_data[0]));
@@ -166,8 +146,7 @@ ah_decrypt_inline (vlib_main_t * vm,
{
if (current_sa_index != ~0)
vlib_increment_combined_counter (&ipsec_sa_counters, thread_index,
- current_sa_index,
- current_sa_pkts,
+ current_sa_index, current_sa_pkts,
current_sa_bytes);
current_sa_index = vnet_buffer (b[0])->ipsec.sad_index;
sa0 = ipsec_sa_get (current_sa_index);
@@ -177,7 +156,7 @@ ah_decrypt_inline (vlib_main_t * vm,
thread_index, current_sa_index);
}
- if (PREDICT_FALSE (~0 == sa0->thread_index))
+ if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index))
{
/* this is the first packet to use this SA, claim the SA
* for this thread. this could happen simultaneously on
@@ -211,8 +190,9 @@ ah_decrypt_inline (vlib_main_t * vm,
{
if (ip4_is_fragment (ih4))
{
- b[0]->error = node->errors[AH_DECRYPT_ERROR_DROP_FRAGMENTS];
- next[0] = AH_DECRYPT_NEXT_DROP;
+ ah_decrypt_set_next_index (
+ b[0], node, vm->thread_index, AH_DECRYPT_ERROR_DROP_FRAGMENTS,
+ 0, next, AH_DECRYPT_NEXT_DROP, current_sa_index);
goto next;
}
pd->ip_hdr_size = ip4_header_bytes (ih4);
@@ -222,11 +202,21 @@ ah_decrypt_inline (vlib_main_t * vm,
pd->seq = clib_host_to_net_u32 (ah0->seq_no);
/* anti-replay check */
- if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, ~0, false,
- &pd->seq_hi))
+ if (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa0)))
{
- b[0]->error = node->errors[AH_DECRYPT_ERROR_REPLAY];
- next[0] = AH_DECRYPT_NEXT_DROP;
+ anti_replay_result = ipsec_sa_anti_replay_and_sn_advance (
+ sa0, pd->seq, ~0, false, &pd->seq_hi, true);
+ }
+ else
+ {
+ anti_replay_result = ipsec_sa_anti_replay_and_sn_advance (
+ sa0, pd->seq, ~0, false, &pd->seq_hi, false);
+ }
+ if (anti_replay_result)
+ {
+ ah_decrypt_set_next_index (b[0], node, vm->thread_index,
+ AH_DECRYPT_ERROR_REPLAY, 0, next,
+ AH_DECRYPT_NEXT_DROP, current_sa_index);
goto next;
}
@@ -241,8 +231,9 @@ ah_decrypt_inline (vlib_main_t * vm,
pd->current_data + b[0]->current_length
+ sizeof (u32) > buffer_data_size))
{
- b[0]->error = node->errors[AH_DECRYPT_ERROR_NO_TAIL_SPACE];
- next[0] = AH_DECRYPT_NEXT_DROP;
+ ah_decrypt_set_next_index (
+ b[0], node, vm->thread_index, AH_DECRYPT_ERROR_NO_TAIL_SPACE,
+ 0, next, AH_DECRYPT_NEXT_DROP, current_sa_index);
goto next;
}
@@ -325,23 +316,43 @@ ah_decrypt_inline (vlib_main_t * vm,
if (PREDICT_TRUE (sa0->integ_alg != IPSEC_INTEG_ALG_NONE))
{
/* redo the anti-reply check. see esp_decrypt for details */
- if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, pd->seq_hi,
- true, NULL))
+ if (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa0)))
{
- b[0]->error = node->errors[AH_DECRYPT_ERROR_REPLAY];
- next[0] = AH_DECRYPT_NEXT_DROP;
- goto trace;
+ if (ipsec_sa_anti_replay_and_sn_advance (
+ sa0, pd->seq, pd->seq_hi, true, NULL, true))
+ {
+ ah_decrypt_set_next_index (
+ b[0], node, vm->thread_index, AH_DECRYPT_ERROR_REPLAY, 0,
+ next, AH_DECRYPT_NEXT_DROP, pd->sa_index);
+ goto trace;
+ }
+ n_lost = ipsec_sa_anti_replay_advance (
+ sa0, thread_index, pd->seq, pd->seq_hi, true);
+ }
+ else
+ {
+ if (ipsec_sa_anti_replay_and_sn_advance (
+ sa0, pd->seq, pd->seq_hi, true, NULL, false))
+ {
+ ah_decrypt_set_next_index (
+ b[0], node, vm->thread_index, AH_DECRYPT_ERROR_REPLAY, 0,
+ next, AH_DECRYPT_NEXT_DROP, pd->sa_index);
+ goto trace;
+ }
+ n_lost = ipsec_sa_anti_replay_advance (
+ sa0, thread_index, pd->seq, pd->seq_hi, false);
}
- n_lost = ipsec_sa_anti_replay_advance (sa0, thread_index, pd->seq,
- pd->seq_hi);
- vlib_prefetch_simple_counter (&ipsec_sa_lost_counters, thread_index,
- pd->sa_index);
+ vlib_prefetch_simple_counter (
+ &ipsec_sa_err_counters[IPSEC_SA_ERROR_LOST], thread_index,
+ pd->sa_index);
}
u16 ah_hdr_len = sizeof (ah_header_t) + pd->icv_size
+ pd->icv_padding_len;
vlib_buffer_advance (b[0], pd->ip_hdr_size + ah_hdr_len);
b[0]->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ b[0]->flags &= ~(VNET_BUFFER_F_L4_CHECKSUM_COMPUTED |
+ VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
if (PREDICT_TRUE (ipsec_sa_is_set_IS_TUNNEL (sa0)))
{ /* tunnel mode */
@@ -351,8 +362,10 @@ ah_decrypt_inline (vlib_main_t * vm,
next[0] = AH_DECRYPT_NEXT_IP6_INPUT;
else
{
- b[0]->error = node->errors[AH_DECRYPT_ERROR_DECRYPTION_FAILED];
- next[0] = AH_DECRYPT_NEXT_DROP;
+ ah_decrypt_set_next_index (b[0], node, vm->thread_index,
+ AH_DECRYPT_ERROR_DECRYPTION_FAILED, 0,
+ next, AH_DECRYPT_NEXT_DROP,
+ pd->sa_index);
goto trace;
}
}
@@ -403,8 +416,9 @@ ah_decrypt_inline (vlib_main_t * vm,
}
if (PREDICT_FALSE (n_lost))
- vlib_increment_simple_counter (&ipsec_sa_lost_counters, thread_index,
- pd->sa_index, n_lost);
+ vlib_increment_simple_counter (
+ &ipsec_sa_err_counters[IPSEC_SA_ERROR_LOST], thread_index,
+ pd->sa_index, n_lost);
vnet_buffer (b[0])->sw_if_index[VLIB_TX] = (u32) ~ 0;
trace:
@@ -436,15 +450,14 @@ VLIB_NODE_FN (ah4_decrypt_node) (vlib_main_t * vm,
return ah_decrypt_inline (vm, node, from_frame, 0 /* is_ip6 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ah4_decrypt_node) = {
.name = "ah4-decrypt",
.vector_size = sizeof (u32),
.format_trace = format_ah_decrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(ah_decrypt_error_strings),
- .error_strings = ah_decrypt_error_strings,
+ .n_errors = AH_DECRYPT_N_ERROR,
+ .error_counters = ah_decrypt_error_counters,
.n_next_nodes = AH_DECRYPT_N_NEXT,
.next_nodes = {
@@ -454,7 +467,6 @@ VLIB_REGISTER_NODE (ah4_decrypt_node) = {
[AH_DECRYPT_NEXT_HANDOFF] = "ah4-decrypt-handoff",
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ah6_decrypt_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -463,15 +475,14 @@ VLIB_NODE_FN (ah6_decrypt_node) (vlib_main_t * vm,
return ah_decrypt_inline (vm, node, from_frame, 1 /* is_ip6 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ah6_decrypt_node) = {
.name = "ah6-decrypt",
.vector_size = sizeof (u32),
.format_trace = format_ah_decrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(ah_decrypt_error_strings),
- .error_strings = ah_decrypt_error_strings,
+ .n_errors = AH_DECRYPT_N_ERROR,
+ .error_counters = ah_decrypt_error_counters,
.n_next_nodes = AH_DECRYPT_N_NEXT,
.next_nodes = {
@@ -481,7 +492,6 @@ VLIB_REGISTER_NODE (ah6_decrypt_node) = {
[AH_DECRYPT_NEXT_HANDOFF] = "ah6-decrypt-handoff",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
diff --git a/src/vnet/ipsec/ah_encrypt.c b/src/vnet/ipsec/ah_encrypt.c
index bb971e40811..960327f071d 100644
--- a/src/vnet/ipsec/ah_encrypt.c
+++ b/src/vnet/ipsec/ah_encrypt.c
@@ -22,6 +22,7 @@
#include <vnet/ipsec/ipsec.h>
#include <vnet/ipsec/esp.h>
#include <vnet/ipsec/ah.h>
+#include <vnet/ipsec/ipsec.api_enum.h>
#include <vnet/tunnel/tunnel_dp.h>
#define foreach_ah_encrypt_next \
@@ -38,25 +39,6 @@ typedef enum
AH_ENCRYPT_N_NEXT,
} ah_encrypt_next_t;
-#define foreach_ah_encrypt_error \
- _ (RX_PKTS, "AH pkts received") \
- _ (CRYPTO_ENGINE_ERROR, "crypto engine error (packet dropped)") \
- _ (SEQ_CYCLED, "sequence number cycled (packet dropped)")
-
-typedef enum
-{
-#define _(sym,str) AH_ENCRYPT_ERROR_##sym,
- foreach_ah_encrypt_error
-#undef _
- AH_ENCRYPT_N_ERROR,
-} ah_encrypt_error_t;
-
-static char *ah_encrypt_error_strings[] = {
-#define _(sym,string) string,
- foreach_ah_encrypt_error
-#undef _
-};
-
typedef struct
{
u32 sa_index;
@@ -99,8 +81,10 @@ ah_process_ops (vlib_main_t * vm, vlib_node_runtime_t * node,
if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
{
u32 bi = op->user_data;
- b[bi]->error = node->errors[AH_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR];
- nexts[bi] = AH_ENCRYPT_NEXT_DROP;
+ ah_encrypt_set_next_index (b[bi], node, vm->thread_index,
+ AH_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR, bi,
+ nexts, AH_ENCRYPT_NEXT_DROP,
+ vnet_buffer (b[bi])->ipsec.sad_index);
n_fail--;
}
op++;
@@ -171,19 +155,20 @@ ah_encrypt_inline (vlib_main_t * vm,
{
if (current_sa_index != ~0)
vlib_increment_combined_counter (&ipsec_sa_counters, thread_index,
- current_sa_index,
- current_sa_pkts,
+ current_sa_index, current_sa_pkts,
current_sa_bytes);
current_sa_index = vnet_buffer (b[0])->ipsec.sad_index;
sa0 = ipsec_sa_get (current_sa_index);
current_sa_bytes = current_sa_pkts = 0;
+ vlib_prefetch_combined_counter (&ipsec_sa_counters, thread_index,
+ current_sa_index);
}
pd->sa_index = current_sa_index;
next[0] = AH_ENCRYPT_NEXT_DROP;
- if (PREDICT_FALSE (~0 == sa0->thread_index))
+ if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index))
{
/* this is the first packet to use this SA, claim the SA
* for this thread. this could happen simultaneously on
@@ -201,7 +186,9 @@ ah_encrypt_inline (vlib_main_t * vm,
if (PREDICT_FALSE (esp_seq_advance (sa0)))
{
- b[0]->error = node->errors[AH_ENCRYPT_ERROR_SEQ_CYCLED];
+ ah_encrypt_set_next_index (b[0], node, vm->thread_index,
+ AH_ENCRYPT_ERROR_SEQ_CYCLED, 0, next,
+ AH_ENCRYPT_NEXT_DROP, current_sa_index);
pd->skip = 1;
goto next;
}
@@ -455,15 +442,14 @@ VLIB_NODE_FN (ah4_encrypt_node) (vlib_main_t * vm,
return ah_encrypt_inline (vm, node, from_frame, 0 /* is_ip6 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ah4_encrypt_node) = {
.name = "ah4-encrypt",
.vector_size = sizeof (u32),
.format_trace = format_ah_encrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(ah_encrypt_error_strings),
- .error_strings = ah_encrypt_error_strings,
+ .n_errors = AH_ENCRYPT_N_ERROR,
+ .error_counters = ah_encrypt_error_counters,
.n_next_nodes = AH_ENCRYPT_N_NEXT,
.next_nodes = {
@@ -472,7 +458,6 @@ VLIB_REGISTER_NODE (ah4_encrypt_node) = {
[AH_ENCRYPT_NEXT_INTERFACE_OUTPUT] = "interface-output",
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ah6_encrypt_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -481,15 +466,14 @@ VLIB_NODE_FN (ah6_encrypt_node) (vlib_main_t * vm,
return ah_encrypt_inline (vm, node, from_frame, 1 /* is_ip6 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ah6_encrypt_node) = {
.name = "ah6-encrypt",
.vector_size = sizeof (u32),
.format_trace = format_ah_encrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(ah_encrypt_error_strings),
- .error_strings = ah_encrypt_error_strings,
+ .n_errors = AH_ENCRYPT_N_ERROR,
+ .error_counters = ah_encrypt_error_counters,
.n_next_nodes = AH_ENCRYPT_N_NEXT,
.next_nodes = {
@@ -498,7 +482,6 @@ VLIB_REGISTER_NODE (ah6_encrypt_node) = {
[AH_ENCRYPT_NEXT_INTERFACE_OUTPUT] = "interface-output",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
diff --git a/src/vnet/ipsec/esp.h b/src/vnet/ipsec/esp.h
index d179233df49..1c3ce776ad2 100644
--- a/src/vnet/ipsec/esp.h
+++ b/src/vnet/ipsec/esp.h
@@ -18,6 +18,7 @@
#include <vnet/ip/ip.h>
#include <vnet/crypto/crypto.h>
#include <vnet/ipsec/ipsec.h>
+#include <vnet/ipsec/ipsec.api_enum.h>
typedef struct
{
@@ -36,27 +37,21 @@ typedef struct
u8 next_header;
} esp_footer_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip4_header_t ip4;
esp_header_t esp;
}) ip4_and_esp_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip4_header_t ip4;
udp_header_t udp;
esp_header_t esp;
}) ip4_and_udp_and_esp_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip6_header_t ip6;
esp_header_t esp;
}) ip6_and_esp_header_t;
-/* *INDENT-ON* */
/**
* AES counter mode nonce
@@ -85,9 +80,6 @@ typedef struct esp_aead_t_
} __clib_packed esp_aead_t;
#define ESP_SEQ_MAX (4294967295UL)
-#define ESP_MAX_BLOCK_SIZE (16)
-#define ESP_MAX_IV_SIZE (16)
-#define ESP_MAX_ICV_SIZE (32)
u8 *format_esp_header (u8 * s, va_list * args);
@@ -141,39 +133,76 @@ esp_aad_fill (u8 *data, const esp_header_t *esp, const ipsec_sa_t *sa,
}
}
-/* Special case to drop or hand off packets for sync/async modes.
- *
- * Different than sync mode, async mode only enqueue drop or hand-off packets
- * to next nodes.
- */
-always_inline void
-esp_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node, u32 err,
- u16 index, u16 *nexts, u16 drop_next)
+always_inline u32
+esp_encrypt_err_to_sa_err (u32 err)
{
- nexts[index] = drop_next;
- b->error = node->errors[err];
+ switch (err)
+ {
+ case ESP_ENCRYPT_ERROR_HANDOFF:
+ return IPSEC_SA_ERROR_HANDOFF;
+ case ESP_ENCRYPT_ERROR_SEQ_CYCLED:
+ return IPSEC_SA_ERROR_SEQ_CYCLED;
+ case ESP_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR:
+ return IPSEC_SA_ERROR_CRYPTO_ENGINE_ERROR;
+ case ESP_ENCRYPT_ERROR_CRYPTO_QUEUE_FULL:
+ return IPSEC_SA_ERROR_CRYPTO_QUEUE_FULL;
+ case ESP_ENCRYPT_ERROR_NO_BUFFERS:
+ return IPSEC_SA_ERROR_NO_BUFFERS;
+ case ESP_ENCRYPT_ERROR_NO_ENCRYPTION:
+ return IPSEC_SA_ERROR_NO_ENCRYPTION;
+ }
+ return ~0;
}
-/* when submitting a frame is failed, drop all buffers in the frame */
always_inline u32
-esp_async_recycle_failed_submit (vlib_main_t *vm, vnet_crypto_async_frame_t *f,
- vlib_node_runtime_t *node, u32 err, u16 index,
- u32 *from, u16 *nexts, u16 drop_next_index)
+esp_decrypt_err_to_sa_err (u32 err)
{
- u32 n_drop = f->n_elts;
- u32 *bi = f->buffer_indices;
-
- while (n_drop--)
+ switch (err)
{
- from[index] = bi[0];
- esp_set_next_index (vlib_get_buffer (vm, bi[0]), node, err, index, nexts,
- drop_next_index);
- bi++;
- index++;
+ case ESP_DECRYPT_ERROR_HANDOFF:
+ return IPSEC_SA_ERROR_HANDOFF;
+ case ESP_DECRYPT_ERROR_DECRYPTION_FAILED:
+ return IPSEC_SA_ERROR_DECRYPTION_FAILED;
+ case ESP_DECRYPT_ERROR_INTEG_ERROR:
+ return IPSEC_SA_ERROR_INTEG_ERROR;
+ case ESP_DECRYPT_ERROR_CRYPTO_ENGINE_ERROR:
+ return IPSEC_SA_ERROR_CRYPTO_ENGINE_ERROR;
+ case ESP_DECRYPT_ERROR_REPLAY:
+ return IPSEC_SA_ERROR_REPLAY;
+ case ESP_DECRYPT_ERROR_RUNT:
+ return IPSEC_SA_ERROR_RUNT;
+ case ESP_DECRYPT_ERROR_NO_BUFFERS:
+ return IPSEC_SA_ERROR_NO_BUFFERS;
+ case ESP_DECRYPT_ERROR_OVERSIZED_HEADER:
+ return IPSEC_SA_ERROR_OVERSIZED_HEADER;
+ case ESP_DECRYPT_ERROR_NO_TAIL_SPACE:
+ return IPSEC_SA_ERROR_NO_TAIL_SPACE;
+ case ESP_DECRYPT_ERROR_TUN_NO_PROTO:
+ return IPSEC_SA_ERROR_TUN_NO_PROTO;
+ case ESP_DECRYPT_ERROR_UNSUP_PAYLOAD:
+ return IPSEC_SA_ERROR_UNSUP_PAYLOAD;
}
- vnet_crypto_async_reset_frame (f);
+ return ~0;
+}
- return (f->n_elts);
+always_inline void
+esp_encrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node,
+ u32 thread_index, u32 err, u16 index, u16 *nexts,
+ u16 drop_next, u32 sa_index)
+{
+ ipsec_set_next_index (b, node, thread_index, err,
+ esp_encrypt_err_to_sa_err (err), index, nexts,
+ drop_next, sa_index);
+}
+
+always_inline void
+esp_decrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node,
+ u32 thread_index, u32 err, u16 index, u16 *nexts,
+ u16 drop_next, u32 sa_index)
+{
+ ipsec_set_next_index (b, node, thread_index, err,
+ esp_decrypt_err_to_sa_err (err), index, nexts,
+ drop_next, sa_index);
}
/**
@@ -250,6 +279,43 @@ typedef struct
extern esp_async_post_next_t esp_encrypt_async_next;
extern esp_async_post_next_t esp_decrypt_async_next;
+/* when submitting a frame is failed, drop all buffers in the frame */
+always_inline u32
+esp_async_recycle_failed_submit (vlib_main_t *vm, vnet_crypto_async_frame_t *f,
+ vlib_node_runtime_t *node, u32 err,
+ u32 ipsec_sa_err, u16 index, u32 *from,
+ u16 *nexts, u16 drop_next_index,
+ bool is_encrypt)
+{
+ vlib_buffer_t *b;
+ u32 n_drop = f->n_elts;
+ u32 *bi = f->buffer_indices;
+
+ while (n_drop--)
+ {
+ u32 sa_index;
+
+ from[index] = bi[0];
+ b = vlib_get_buffer (vm, bi[0]);
+
+ if (is_encrypt)
+ {
+ sa_index = vnet_buffer (b)->ipsec.sad_index;
+ }
+ else
+ {
+ sa_index = esp_post_data (b)->decrypt_data.sa_index;
+ }
+
+ ipsec_set_next_index (b, node, vm->thread_index, err, ipsec_sa_err,
+ index, nexts, drop_next_index, sa_index);
+ bi++;
+ index++;
+ }
+
+ return (f->n_elts);
+}
+
#endif /* __ESP_H__ */
/*
diff --git a/src/vnet/ipsec/esp_decrypt.c b/src/vnet/ipsec/esp_decrypt.c
index f1e8065b8ff..26d8ca1deee 100644
--- a/src/vnet/ipsec/esp_decrypt.c
+++ b/src/vnet/ipsec/esp_decrypt.c
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
#include <vnet/vnet.h>
#include <vnet/api_errno.h>
#include <vnet/ip/ip.h>
@@ -58,35 +57,6 @@ typedef enum
ESP_DECRYPT_POST_N_NEXT,
} esp_decrypt_post_next_t;
-#define foreach_esp_decrypt_error \
- _ (RX_PKTS, "ESP pkts received") \
- _ (RX_POST_PKTS, "ESP-POST pkts received") \
- _ (HANDOFF, "hand-off") \
- _ (DECRYPTION_FAILED, "ESP decryption failed") \
- _ (INTEG_ERROR, "Integrity check failed") \
- _ (CRYPTO_ENGINE_ERROR, "crypto engine error (packet dropped)") \
- _ (REPLAY, "SA replayed packet") \
- _ (RUNT, "undersized packet") \
- _ (NO_BUFFERS, "no buffers (packet dropped)") \
- _ (OVERSIZED_HEADER, "buffer with oversized header (dropped)") \
- _ (NO_TAIL_SPACE, "no enough buffer tail space (dropped)") \
- _ (TUN_NO_PROTO, "no tunnel protocol") \
- _ (UNSUP_PAYLOAD, "unsupported payload")
-
-typedef enum
-{
-#define _(sym,str) ESP_DECRYPT_ERROR_##sym,
- foreach_esp_decrypt_error
-#undef _
- ESP_DECRYPT_N_ERROR,
-} esp_decrypt_error_t;
-
-static char *esp_decrypt_error_strings[] = {
-#define _(sym,string) string,
- foreach_esp_decrypt_error
-#undef _
-};
-
typedef struct
{
u32 seq;
@@ -97,6 +67,8 @@ typedef struct
ipsec_integ_alg_t integ_alg;
} esp_decrypt_trace_t;
+typedef vl_counter_esp_decrypt_enum_t esp_decrypt_error_t;
+
/* The number of byres in the hisequence number */
#define N_HI_ESN_BYTES 4
@@ -141,8 +113,9 @@ esp_process_ops (vlib_main_t * vm, vlib_node_runtime_t * node,
err = e;
else
err = ESP_DECRYPT_ERROR_CRYPTO_ENGINE_ERROR;
- b[bi]->error = node->errors[err];
- nexts[bi] = ESP_DECRYPT_NEXT_DROP;
+ esp_decrypt_set_next_index (b[bi], node, vm->thread_index, err, bi,
+ nexts, ESP_DECRYPT_NEXT_DROP,
+ vnet_buffer (b[bi])->ipsec.sad_index);
n_fail--;
}
op++;
@@ -173,8 +146,9 @@ esp_process_chained_ops (vlib_main_t * vm, vlib_node_runtime_t * node,
err = e;
else
err = ESP_DECRYPT_ERROR_CRYPTO_ENGINE_ERROR;
- b[bi]->error = node->errors[err];
- nexts[bi] = ESP_DECRYPT_NEXT_DROP;
+ esp_decrypt_set_next_index (b[bi], node, vm->thread_index, err, bi,
+ nexts, ESP_DECRYPT_NEXT_DROP,
+ vnet_buffer (b[bi])->ipsec.sad_index);
n_fail--;
}
op++;
@@ -187,6 +161,9 @@ esp_remove_tail (vlib_main_t * vm, vlib_buffer_t * b, vlib_buffer_t * last,
{
vlib_buffer_t *before_last = b;
+ if (b != last)
+ b->total_length_not_including_first_buffer -= tail;
+
if (last->current_length > tail)
{
last->current_length -= tail;
@@ -204,6 +181,37 @@ esp_remove_tail (vlib_main_t * vm, vlib_buffer_t * b, vlib_buffer_t * last,
before_last->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
}
+always_inline void
+esp_remove_tail_and_tfc_padding (vlib_main_t *vm, vlib_node_runtime_t *node,
+ const esp_decrypt_packet_data_t *pd,
+ vlib_buffer_t *b, vlib_buffer_t *last,
+ u16 *next, u16 tail, int is_ip6)
+{
+ const u16 total_buffer_length = vlib_buffer_length_in_chain (vm, b);
+ u16 ip_packet_length;
+ if (is_ip6)
+ {
+ const ip6_header_t *ip6 = vlib_buffer_get_current (b);
+ ip_packet_length =
+ clib_net_to_host_u16 (ip6->payload_length) + sizeof (ip6_header_t);
+ }
+ else
+ {
+ const ip4_header_t *ip4 = vlib_buffer_get_current (b);
+ ip_packet_length = clib_net_to_host_u16 (ip4->length);
+ }
+ /* In case of TFC padding, the size of the buffer data needs to be adjusted
+ * to the ip packet length */
+ if (PREDICT_FALSE (total_buffer_length < ip_packet_length + tail))
+ {
+ esp_decrypt_set_next_index (b, node, vm->thread_index,
+ ESP_DECRYPT_ERROR_NO_TAIL_SPACE, 0, next,
+ ESP_DECRYPT_NEXT_DROP, pd->sa_index);
+ return;
+ }
+ esp_remove_tail (vm, b, last, total_buffer_length - ip_packet_length);
+}
+
/* ICV is splitted in last two buffers so move it to the last buffer and
return pointer to it */
static_always_inline u8 *
@@ -229,9 +237,12 @@ esp_move_icv (vlib_main_t * vm, vlib_buffer_t * first,
before_last->current_length -= first_sz;
if (before_last == first)
pd->current_length -= first_sz;
+ else
+ first->total_length_not_including_first_buffer -= first_sz;
clib_memset (vlib_buffer_get_tail (before_last), 0, first_sz);
if (dif)
dif[0] = first_sz;
+ first->total_length_not_including_first_buffer -= last_sz;
pd2->lb = before_last;
pd2->icv_removed = 1;
pd2->free_buffer_index = before_last->next_buffer;
@@ -483,18 +494,16 @@ esp_decrypt_chain_crypto (vlib_main_t * vm, ipsec_per_thread_data_t * ptd,
return total_len;
}
-static_always_inline void
-esp_decrypt_prepare_sync_op (vlib_main_t * vm, vlib_node_runtime_t * node,
- ipsec_per_thread_data_t * ptd,
- vnet_crypto_op_t *** crypto_ops,
- vnet_crypto_op_t *** integ_ops,
- vnet_crypto_op_t * op,
- ipsec_sa_t * sa0, u8 * payload,
- u16 len, u8 icv_sz, u8 iv_sz,
- esp_decrypt_packet_data_t * pd,
- esp_decrypt_packet_data2_t * pd2,
- vlib_buffer_t * b, u16 * next, u32 index)
+static_always_inline esp_decrypt_error_t
+esp_decrypt_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
+ ipsec_sa_t *sa0, u8 *payload, u16 len, u8 icv_sz,
+ u8 iv_sz, esp_decrypt_packet_data_t *pd,
+ esp_decrypt_packet_data2_t *pd2, vlib_buffer_t *b,
+ u32 index)
{
+ vnet_crypto_op_t **crypto_ops;
+ vnet_crypto_op_t **integ_ops;
+ vnet_crypto_op_t _op, *op = &_op;
const u8 esp_sz = sizeof (esp_header_t);
if (PREDICT_TRUE (sa0->integ_op_id != VNET_CRYPTO_OP_NONE))
@@ -511,6 +520,8 @@ esp_decrypt_prepare_sync_op (vlib_main_t * vm, vlib_node_runtime_t * node,
if (pd->is_chain)
{
/* buffer is chained */
+ integ_ops = &ptd->chained_integ_ops;
+
op->len = pd->current_length;
/* special case when ICV is splitted and needs to be reassembled
@@ -536,8 +547,7 @@ esp_decrypt_prepare_sync_op (vlib_main_t * vm, vlib_node_runtime_t * node,
{
/* we now have a single buffer of crypto data, adjust
* the length (second buffer contains only ICV) */
- *integ_ops = &ptd->integ_ops;
- *crypto_ops = &ptd->crypto_ops;
+ integ_ops = &ptd->integ_ops;
len = b->current_length;
goto out;
}
@@ -551,17 +561,16 @@ esp_decrypt_prepare_sync_op (vlib_main_t * vm, vlib_node_runtime_t * node,
if (esp_decrypt_chain_integ (vm, ptd, pd, pd2, sa0, b, icv_sz,
payload, pd->current_length,
&op->digest, &op->n_chunks, 0) < 0)
- {
- b->error = node->errors[ESP_DECRYPT_ERROR_NO_BUFFERS];
- next[0] = ESP_DECRYPT_NEXT_DROP;
- return;
- }
+ return ESP_DECRYPT_ERROR_NO_BUFFERS;
}
else
- esp_insert_esn (vm, sa0, pd, pd2, &op->len, &op->digest, &len, b,
- payload);
+ {
+ integ_ops = &ptd->integ_ops;
+ esp_insert_esn (vm, sa0, pd, pd2, &op->len, &op->digest, &len, b,
+ payload);
+ }
out:
- vec_add_aligned (*(integ_ops[0]), op, 1, CLIB_CACHE_LINE_BYTES);
+ vec_add_aligned (*integ_ops, op, 1, CLIB_CACHE_LINE_BYTES);
}
payload += esp_sz;
@@ -587,6 +596,12 @@ esp_decrypt_prepare_sync_op (vlib_main_t * vm, vlib_node_runtime_t * node,
op->aad_len = esp_aad_fill (op->aad, esp0, sa0, pd->seq_hi);
op->tag = payload + len;
op->tag_len = 16;
+ if (PREDICT_FALSE (ipsec_sa_is_set_IS_NULL_GMAC (sa0)))
+ {
+ /* RFC-4543 ENCR_NULL_AUTH_AES_GMAC: IV is part of AAD */
+ payload -= iv_sz;
+ len += iv_sz;
+ }
}
else
{
@@ -609,26 +624,32 @@ esp_decrypt_prepare_sync_op (vlib_main_t * vm, vlib_node_runtime_t * node,
esp_decrypt_chain_crypto (vm, ptd, pd, pd2, sa0, b, icv_sz,
payload, len - pd->iv_sz + pd->icv_sz,
&op->tag, &op->n_chunks);
+ crypto_ops = &ptd->chained_crypto_ops;
+ }
+ else
+ {
+ crypto_ops = &ptd->crypto_ops;
}
- vec_add_aligned (*(crypto_ops[0]), op, 1, CLIB_CACHE_LINE_BYTES);
+ vec_add_aligned (*crypto_ops, op, 1, CLIB_CACHE_LINE_BYTES);
}
+
+ return ESP_DECRYPT_ERROR_RX_PKTS;
}
static_always_inline esp_decrypt_error_t
-esp_decrypt_prepare_async_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
- ipsec_per_thread_data_t *ptd,
+esp_decrypt_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
vnet_crypto_async_frame_t *f, ipsec_sa_t *sa0,
u8 *payload, u16 len, u8 icv_sz, u8 iv_sz,
esp_decrypt_packet_data_t *pd,
esp_decrypt_packet_data2_t *pd2, u32 bi,
- vlib_buffer_t *b, u16 *next, u16 async_next)
+ vlib_buffer_t *b, u16 async_next)
{
const u8 esp_sz = sizeof (esp_header_t);
esp_decrypt_packet_data_t *async_pd = &(esp_post_data (b))->decrypt_data;
esp_decrypt_packet_data2_t *async_pd2 = esp_post_data2 (b);
u8 *tag = payload + len, *iv = payload + esp_sz, *aad = 0;
- u32 key_index;
+ const u32 key_index = sa0->crypto_key_index;
u32 crypto_len, integ_len = 0;
i16 crypto_start_offset, integ_start_offset = 0;
u8 flags = 0;
@@ -636,7 +657,6 @@ esp_decrypt_prepare_async_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
if (!ipsec_sa_is_set_IS_AEAD (sa0))
{
/* linked algs */
- key_index = sa0->linked_key_index;
integ_start_offset = payload - b->data;
integ_len = len;
if (PREDICT_TRUE (sa0->integ_op_id != VNET_CRYPTO_OP_NONE))
@@ -689,8 +709,6 @@ esp_decrypt_prepare_async_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
else
esp_insert_esn (vm, sa0, pd, pd2, &integ_len, &tag, &len, b, payload);
}
- else
- key_index = sa0->crypto_key_index;
out:
/* crypto */
@@ -710,6 +728,12 @@ out:
aad = (u8 *) nonce - sizeof (esp_aead_t);
esp_aad_fill (aad, esp0, sa0, pd->seq_hi);
tag = payload + len;
+ if (PREDICT_FALSE (ipsec_sa_is_set_IS_NULL_GMAC (sa0)))
+ {
+ /* RFC-4543 ENCR_NULL_AUTH_AES_GMAC: IV is part of AAD */
+ payload -= iv_sz;
+ len += iv_sz;
+ }
}
else
{
@@ -748,7 +772,8 @@ out:
}
static_always_inline void
-esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node,
+esp_decrypt_post_crypto (vlib_main_t *vm, vlib_node_runtime_t *node,
+ const u16 *next_by_next_header,
const esp_decrypt_packet_data_t *pd,
const esp_decrypt_packet_data2_t *pd2,
vlib_buffer_t *b, u16 *next, int is_ip6, int is_tun,
@@ -760,6 +785,7 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node,
const u8 tun_flags = IPSEC_SA_FLAG_IS_TUNNEL | IPSEC_SA_FLAG_IS_TUNNEL_V6;
u8 pad_length = 0, next_header = 0;
u16 icv_sz;
+ u64 n_lost;
/*
* redo the anti-reply check
@@ -768,34 +794,50 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node,
* check above we did so against the state of the window (W),
* after packet s-1. So each of the packets in the sequence will be
* accepted.
- * This time s will be cheked against Ws-1, s+1 chceked against Ws
- * (i.e. the window state is updated/advnaced)
- * so this time the successive s+! packet will be dropped.
+ * This time s will be cheked against Ws-1, s+1 checked against Ws
+ * (i.e. the window state is updated/advanced)
+ * so this time the successive s+1 packet will be dropped.
* This is a consequence of batching the decrypts. If the
- * check-dcrypt-advance process was done for each packet it would
+ * check-decrypt-advance process was done for each packet it would
* be fine. But we batch the decrypts because it's much more efficient
* to do so in SW and if we offload to HW and the process is async.
*
* You're probably thinking, but this means an attacker can send the
- * above sequence and cause VPP to perform decrpyts that will fail,
+ * above sequence and cause VPP to perform decrypts that will fail,
* and that's true. But if the attacker can determine s (a valid
* sequence number in the window) which is non-trivial, it can generate
* a sequence s, s+1, s+2, s+3, ... s+n and nothing will prevent any
* implementation, sequential or batching, from decrypting these.
*/
- if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, pd->seq_hi, true,
- NULL))
+ if (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa0)))
{
- b->error = node->errors[ESP_DECRYPT_ERROR_REPLAY];
- next[0] = ESP_DECRYPT_NEXT_DROP;
- return;
+ if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, pd->seq_hi, true,
+ NULL, true))
+ {
+ esp_decrypt_set_next_index (b, node, vm->thread_index,
+ ESP_DECRYPT_ERROR_REPLAY, 0, next,
+ ESP_DECRYPT_NEXT_DROP, pd->sa_index);
+ return;
+ }
+ n_lost = ipsec_sa_anti_replay_advance (sa0, vm->thread_index, pd->seq,
+ pd->seq_hi, true);
+ }
+ else
+ {
+ if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, pd->seq_hi, true,
+ NULL, false))
+ {
+ esp_decrypt_set_next_index (b, node, vm->thread_index,
+ ESP_DECRYPT_ERROR_REPLAY, 0, next,
+ ESP_DECRYPT_NEXT_DROP, pd->sa_index);
+ return;
+ }
+ n_lost = ipsec_sa_anti_replay_advance (sa0, vm->thread_index, pd->seq,
+ pd->seq_hi, false);
}
- u64 n_lost =
- ipsec_sa_anti_replay_advance (sa0, vm->thread_index, pd->seq, pd->seq_hi);
-
- vlib_prefetch_simple_counter (&ipsec_sa_lost_counters, vm->thread_index,
- pd->sa_index);
+ vlib_prefetch_simple_counter (&ipsec_sa_err_counters[IPSEC_SA_ERROR_LOST],
+ vm->thread_index, pd->sa_index);
if (pd->is_chain)
{
@@ -854,7 +896,8 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node,
u16 adv = pd->iv_sz + esp_sz;
u16 tail = sizeof (esp_footer_t) + pad_length + icv_sz;
u16 tail_orig = sizeof (esp_footer_t) + pad_length + pd->icv_sz;
- b->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ b->flags &=
+ ~(VNET_BUFFER_F_L4_CHECKSUM_COMPUTED | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
if ((pd->flags & tun_flags) == 0 && !is_tun) /* transport mode */
{
@@ -904,14 +947,16 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node,
next[0] = ESP_DECRYPT_NEXT_IP4_INPUT;
b->current_data = pd->current_data + adv;
b->current_length = pd->current_length - adv;
- esp_remove_tail (vm, b, lb, tail);
+ esp_remove_tail_and_tfc_padding (vm, node, pd, b, lb, next, tail,
+ false);
}
else if (next_header == IP_PROTOCOL_IPV6)
{
next[0] = ESP_DECRYPT_NEXT_IP6_INPUT;
b->current_data = pd->current_data + adv;
b->current_length = pd->current_length - adv;
- esp_remove_tail (vm, b, lb, tail);
+ esp_remove_tail_and_tfc_padding (vm, node, pd, b, lb, next, tail,
+ true);
}
else if (next_header == IP_PROTOCOL_MPLS_IN_IP)
{
@@ -920,44 +965,51 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node,
b->current_length = pd->current_length - adv;
esp_remove_tail (vm, b, lb, tail);
}
- else
+ else if (is_tun && next_header == IP_PROTOCOL_GRE)
{
- if (is_tun && next_header == IP_PROTOCOL_GRE)
- {
- gre_header_t *gre;
+ gre_header_t *gre;
- b->current_data = pd->current_data + adv;
- b->current_length = pd->current_length - adv - tail;
+ b->current_data = pd->current_data + adv;
+ b->current_length = pd->current_length - adv - tail;
- gre = vlib_buffer_get_current (b);
+ gre = vlib_buffer_get_current (b);
- vlib_buffer_advance (b, sizeof (*gre));
+ vlib_buffer_advance (b, sizeof (*gre));
- switch (clib_net_to_host_u16 (gre->protocol))
- {
- case GRE_PROTOCOL_teb:
- vnet_update_l2_len (b);
- next[0] = ESP_DECRYPT_NEXT_L2_INPUT;
- break;
- case GRE_PROTOCOL_ip4:
- next[0] = ESP_DECRYPT_NEXT_IP4_INPUT;
- break;
- case GRE_PROTOCOL_ip6:
- next[0] = ESP_DECRYPT_NEXT_IP6_INPUT;
- break;
- default:
- b->error = node->errors[ESP_DECRYPT_ERROR_UNSUP_PAYLOAD];
- next[0] = ESP_DECRYPT_NEXT_DROP;
- break;
- }
- }
- else
+ switch (clib_net_to_host_u16 (gre->protocol))
{
- next[0] = ESP_DECRYPT_NEXT_DROP;
- b->error = node->errors[ESP_DECRYPT_ERROR_UNSUP_PAYLOAD];
- return;
+ case GRE_PROTOCOL_teb:
+ vnet_update_l2_len (b);
+ next[0] = ESP_DECRYPT_NEXT_L2_INPUT;
+ break;
+ case GRE_PROTOCOL_ip4:
+ next[0] = ESP_DECRYPT_NEXT_IP4_INPUT;
+ break;
+ case GRE_PROTOCOL_ip6:
+ next[0] = ESP_DECRYPT_NEXT_IP6_INPUT;
+ break;
+ default:
+ esp_decrypt_set_next_index (
+ b, node, vm->thread_index, ESP_DECRYPT_ERROR_UNSUP_PAYLOAD, 0,
+ next, ESP_DECRYPT_NEXT_DROP, pd->sa_index);
+ break;
}
}
+ else if ((next[0] = vec_elt (next_by_next_header, next_header)) !=
+ (u16) ~0)
+ {
+ b->current_data = pd->current_data + adv;
+ b->current_length = pd->current_length - adv;
+ esp_remove_tail (vm, b, lb, tail);
+ }
+ else
+ {
+ esp_decrypt_set_next_index (b, node, vm->thread_index,
+ ESP_DECRYPT_ERROR_UNSUP_PAYLOAD, 0, next,
+ ESP_DECRYPT_NEXT_DROP, pd->sa_index);
+ return;
+ }
+
if (is_tun)
{
if (ipsec_sa_is_set_IS_PROTECT (sa0))
@@ -994,8 +1046,10 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node,
!ip46_address_is_equal_v4 (&itp->itp_tun.dst,
&ip4->src_address))
{
- next[0] = ESP_DECRYPT_NEXT_DROP;
- b->error = node->errors[ESP_DECRYPT_ERROR_TUN_NO_PROTO];
+ esp_decrypt_set_next_index (
+ b, node, vm->thread_index,
+ ESP_DECRYPT_ERROR_TUN_NO_PROTO, 0, next,
+ ESP_DECRYPT_NEXT_DROP, pd->sa_index);
}
}
else if (next_header == IP_PROTOCOL_IPV6)
@@ -1009,8 +1063,10 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node,
!ip46_address_is_equal_v6 (&itp->itp_tun.dst,
&ip6->src_address))
{
- next[0] = ESP_DECRYPT_NEXT_DROP;
- b->error = node->errors[ESP_DECRYPT_ERROR_TUN_NO_PROTO];
+ esp_decrypt_set_next_index (
+ b, node, vm->thread_index,
+ ESP_DECRYPT_ERROR_TUN_NO_PROTO, 0, next,
+ ESP_DECRYPT_NEXT_DROP, pd->sa_index);
}
}
}
@@ -1018,8 +1074,8 @@ esp_decrypt_post_crypto (vlib_main_t *vm, const vlib_node_runtime_t *node,
}
if (PREDICT_FALSE (n_lost))
- vlib_increment_simple_counter (&ipsec_sa_lost_counters, vm->thread_index,
- pd->sa_index, n_lost);
+ vlib_increment_simple_counter (&ipsec_sa_err_counters[IPSEC_SA_ERROR_LOST],
+ vm->thread_index, pd->sa_index, n_lost);
}
always_inline uword
@@ -1028,6 +1084,7 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
u16 async_next_node)
{
ipsec_main_t *im = &ipsec_main;
+ const u16 *next_by_next_header = im->next_header_registrations;
u32 thread_index = vm->thread_index;
u16 len;
ipsec_per_thread_data_t *ptd = vec_elt_at_index (im->ptd, thread_index);
@@ -1036,8 +1093,7 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
vlib_buffer_t *sync_bufs[VLIB_FRAME_SIZE];
u16 sync_nexts[VLIB_FRAME_SIZE], *sync_next = sync_nexts, n_sync = 0;
- u16 async_nexts[VLIB_FRAME_SIZE], *async_next = async_nexts;
- u16 noop_nexts[VLIB_FRAME_SIZE], *noop_next = noop_nexts, n_noop = 0;
+ u16 noop_nexts[VLIB_FRAME_SIZE], n_noop = 0;
u32 sync_bi[VLIB_FRAME_SIZE];
u32 noop_bi[VLIB_FRAME_SIZE];
esp_decrypt_packet_data_t pkt_data[VLIB_FRAME_SIZE], *pd = pkt_data;
@@ -1046,9 +1102,7 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
u32 current_sa_index = ~0, current_sa_bytes = 0, current_sa_pkts = 0;
const u8 esp_sz = sizeof (esp_header_t);
ipsec_sa_t *sa0 = 0;
- vnet_crypto_op_t _op, *op = &_op;
- vnet_crypto_op_t **crypto_ops;
- vnet_crypto_op_t **integ_ops;
+ bool anti_replay_result;
int is_async = im->async_mode;
vnet_crypto_async_op_id_t async_op = ~0;
vnet_crypto_async_frame_t *async_frames[VNET_CRYPTO_ASYNC_OP_N_IDS];
@@ -1086,8 +1140,9 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (n_bufs == 0)
{
err = ESP_DECRYPT_ERROR_NO_BUFFERS;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- ESP_DECRYPT_NEXT_DROP);
+ esp_decrypt_set_next_index (b[0], node, thread_index, err, n_noop,
+ noop_nexts, ESP_DECRYPT_NEXT_DROP,
+ vnet_buffer (b[0])->ipsec.sad_index);
goto next;
}
@@ -1095,12 +1150,13 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
if (current_sa_pkts)
vlib_increment_combined_counter (&ipsec_sa_counters, thread_index,
- current_sa_index,
- current_sa_pkts,
+ current_sa_index, current_sa_pkts,
current_sa_bytes);
current_sa_bytes = current_sa_pkts = 0;
current_sa_index = vnet_buffer (b[0])->ipsec.sad_index;
+ vlib_prefetch_combined_counter (&ipsec_sa_counters, thread_index,
+ current_sa_index);
sa0 = ipsec_sa_get (current_sa_index);
/* fetch the second cacheline ASAP */
@@ -1112,7 +1168,7 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
is_async = im->async_mode | ipsec_sa_is_set_IS_ASYNC (sa0);
}
- if (PREDICT_FALSE (~0 == sa0->thread_index))
+ if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index))
{
/* this is the first packet to use this SA, claim the SA
* for this thread. this could happen simultaneously on
@@ -1125,8 +1181,9 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
vnet_buffer (b[0])->ipsec.thread_index = sa0->thread_index;
err = ESP_DECRYPT_ERROR_HANDOFF;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- ESP_DECRYPT_NEXT_HANDOFF);
+ esp_decrypt_set_next_index (b[0], node, thread_index, err, n_noop,
+ noop_nexts, ESP_DECRYPT_NEXT_HANDOFF,
+ current_sa_index);
goto next;
}
@@ -1147,33 +1204,37 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
/* find last buffer in the chain */
while (pd2->lb->flags & VLIB_BUFFER_NEXT_PRESENT)
pd2->lb = vlib_get_buffer (vm, pd2->lb->next_buffer);
+ }
- crypto_ops = &ptd->chained_crypto_ops;
- integ_ops = &ptd->chained_integ_ops;
+ pd->current_length = b[0]->current_length;
+
+ /* anti-reply check */
+ if (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa0)))
+ {
+ anti_replay_result = ipsec_sa_anti_replay_and_sn_advance (
+ sa0, pd->seq, ~0, false, &pd->seq_hi, true);
}
else
{
- crypto_ops = &ptd->crypto_ops;
- integ_ops = &ptd->integ_ops;
+ anti_replay_result = ipsec_sa_anti_replay_and_sn_advance (
+ sa0, pd->seq, ~0, false, &pd->seq_hi, false);
}
- pd->current_length = b[0]->current_length;
-
- /* anti-reply check */
- if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, ~0, false,
- &pd->seq_hi))
+ if (anti_replay_result)
{
err = ESP_DECRYPT_ERROR_REPLAY;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- ESP_DECRYPT_NEXT_DROP);
+ esp_decrypt_set_next_index (b[0], node, thread_index, err, n_noop,
+ noop_nexts, ESP_DECRYPT_NEXT_DROP,
+ current_sa_index);
goto next;
}
if (pd->current_length < cpd.icv_sz + esp_sz + cpd.iv_sz)
{
err = ESP_DECRYPT_ERROR_RUNT;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- ESP_DECRYPT_NEXT_DROP);
+ esp_decrypt_set_next_index (b[0], node, thread_index, err, n_noop,
+ noop_nexts, ESP_DECRYPT_NEXT_DROP,
+ current_sa_index);
goto next;
}
@@ -1192,31 +1253,47 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
async_frames[async_op] =
vnet_crypto_async_get_frame (vm, async_op);
+ if (PREDICT_FALSE (!async_frames[async_op]))
+ {
+ err = ESP_DECRYPT_ERROR_NO_AVAIL_FRAME;
+ esp_decrypt_set_next_index (
+ b[0], node, thread_index, err, n_noop, noop_nexts,
+ ESP_DECRYPT_NEXT_DROP, current_sa_index);
+ goto next;
+ }
+
/* Save the frame to the list we'll submit at the end */
vec_add1 (ptd->async_frames, async_frames[async_op]);
}
err = esp_decrypt_prepare_async_frame (
- vm, node, ptd, async_frames[async_op], sa0, payload, len,
- cpd.icv_sz, cpd.iv_sz, pd, pd2, from[b - bufs], b[0], async_next,
- async_next_node);
+ vm, ptd, async_frames[async_op], sa0, payload, len, cpd.icv_sz,
+ cpd.iv_sz, pd, pd2, from[b - bufs], b[0], async_next_node);
if (ESP_DECRYPT_ERROR_RX_PKTS != err)
{
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- ESP_DECRYPT_NEXT_DROP);
+ esp_decrypt_set_next_index (
+ b[0], node, thread_index, err, n_noop, noop_nexts,
+ ESP_DECRYPT_NEXT_DROP, current_sa_index);
}
}
else
- esp_decrypt_prepare_sync_op (
- vm, node, ptd, &crypto_ops, &integ_ops, op, sa0, payload, len,
- cpd.icv_sz, cpd.iv_sz, pd, pd2, b[0], sync_next, b - bufs);
+ {
+ err = esp_decrypt_prepare_sync_op (vm, ptd, sa0, payload, len,
+ cpd.icv_sz, cpd.iv_sz, pd, pd2,
+ b[0], n_sync);
+ if (err != ESP_DECRYPT_ERROR_RX_PKTS)
+ {
+ esp_decrypt_set_next_index (b[0], node, thread_index, err, 0,
+ sync_next, ESP_DECRYPT_NEXT_DROP,
+ current_sa_index);
+ }
+ }
/* next */
next:
if (ESP_DECRYPT_ERROR_RX_PKTS != err)
{
noop_bi[n_noop] = from[b - bufs];
n_noop++;
- noop_next++;
}
else if (!is_async)
{
@@ -1227,8 +1304,6 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
pd += 1;
pd2 += 1;
}
- else
- async_next++;
n_left -= 1;
b += 1;
@@ -1254,7 +1329,8 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
n_noop += esp_async_recycle_failed_submit (
vm, *async_frame, node, ESP_DECRYPT_ERROR_CRYPTO_ENGINE_ERROR,
- n_sync, noop_bi, noop_nexts, ESP_DECRYPT_NEXT_DROP);
+ IPSEC_SA_ERROR_CRYPTO_ENGINE_ERROR, n_noop, noop_bi, noop_nexts,
+ ESP_DECRYPT_NEXT_DROP, false);
vnet_crypto_async_reset_frame (*async_frame);
vnet_crypto_async_free_frame (vm, *async_frame);
}
@@ -1307,8 +1383,8 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
current_sa_index = vnet_buffer (b[0])->ipsec.sad_index;
if (sync_next[0] >= ESP_DECRYPT_N_NEXT)
- esp_decrypt_post_crypto (vm, node, pd, pd2, b[0], sync_next, is_ip6,
- is_tun, 0);
+ esp_decrypt_post_crypto (vm, node, next_by_next_header, pd, pd2, b[0],
+ sync_next, is_ip6, is_tun, 0);
/* trace: */
if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
@@ -1349,6 +1425,8 @@ esp_decrypt_post_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * from_frame, int is_ip6, int is_tun)
{
+ const ipsec_main_t *im = &ipsec_main;
+ const u16 *next_by_next_header = im->next_header_registrations;
u32 *from = vlib_frame_vector_args (from_frame);
u32 n_left = from_frame->n_vectors;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
@@ -1366,13 +1444,13 @@ esp_decrypt_post_inline (vlib_main_t * vm,
}
if (!pd->is_chain)
- esp_decrypt_post_crypto (vm, node, pd, 0, b[0], next, is_ip6, is_tun,
- 1);
+ esp_decrypt_post_crypto (vm, node, next_by_next_header, pd, 0, b[0],
+ next, is_ip6, is_tun, 1);
else
{
esp_decrypt_packet_data2_t *pd2 = esp_post_data2 (b[0]);
- esp_decrypt_post_crypto (vm, node, pd, pd2, b[0], next, is_ip6,
- is_tun, 1);
+ esp_decrypt_post_crypto (vm, node, next_by_next_header, pd, pd2,
+ b[0], next, is_ip6, is_tun, 1);
}
/*trace: */
@@ -1466,15 +1544,14 @@ VLIB_NODE_FN (esp6_decrypt_tun_post_node) (vlib_main_t * vm,
return esp_decrypt_post_inline (vm, node, from_frame, 1, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp4_decrypt_node) = {
.name = "esp4-decrypt",
.vector_size = sizeof (u32),
.format_trace = format_esp_decrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(esp_decrypt_error_strings),
- .error_strings = esp_decrypt_error_strings,
+ .n_errors = ESP_DECRYPT_N_ERROR,
+ .error_counters = esp_decrypt_error_counters,
.n_next_nodes = ESP_DECRYPT_N_NEXT,
.next_nodes = {
@@ -1493,8 +1570,8 @@ VLIB_REGISTER_NODE (esp4_decrypt_post_node) = {
.format_trace = format_esp_decrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(esp_decrypt_error_strings),
- .error_strings = esp_decrypt_error_strings,
+ .n_errors = ESP_DECRYPT_N_ERROR,
+ .error_counters = esp_decrypt_error_counters,
.sibling_of = "esp4-decrypt",
};
@@ -1505,8 +1582,8 @@ VLIB_REGISTER_NODE (esp6_decrypt_node) = {
.format_trace = format_esp_decrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(esp_decrypt_error_strings),
- .error_strings = esp_decrypt_error_strings,
+ .n_errors = ESP_DECRYPT_N_ERROR,
+ .error_counters = esp_decrypt_error_counters,
.n_next_nodes = ESP_DECRYPT_N_NEXT,
.next_nodes = {
@@ -1525,8 +1602,8 @@ VLIB_REGISTER_NODE (esp6_decrypt_post_node) = {
.format_trace = format_esp_decrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(esp_decrypt_error_strings),
- .error_strings = esp_decrypt_error_strings,
+ .n_errors = ESP_DECRYPT_N_ERROR,
+ .error_counters = esp_decrypt_error_counters,
.sibling_of = "esp6-decrypt",
};
@@ -1536,8 +1613,8 @@ VLIB_REGISTER_NODE (esp4_decrypt_tun_node) = {
.vector_size = sizeof (u32),
.format_trace = format_esp_decrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(esp_decrypt_error_strings),
- .error_strings = esp_decrypt_error_strings,
+ .n_errors = ESP_DECRYPT_N_ERROR,
+ .error_counters = esp_decrypt_error_counters,
.n_next_nodes = ESP_DECRYPT_N_NEXT,
.next_nodes = {
[ESP_DECRYPT_NEXT_DROP] = "ip4-drop",
@@ -1555,8 +1632,8 @@ VLIB_REGISTER_NODE (esp4_decrypt_tun_post_node) = {
.format_trace = format_esp_decrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(esp_decrypt_error_strings),
- .error_strings = esp_decrypt_error_strings,
+ .n_errors = ESP_DECRYPT_N_ERROR,
+ .error_counters = esp_decrypt_error_counters,
.sibling_of = "esp4-decrypt-tun",
};
@@ -1566,8 +1643,8 @@ VLIB_REGISTER_NODE (esp6_decrypt_tun_node) = {
.vector_size = sizeof (u32),
.format_trace = format_esp_decrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(esp_decrypt_error_strings),
- .error_strings = esp_decrypt_error_strings,
+ .n_errors = ESP_DECRYPT_N_ERROR,
+ .error_counters = esp_decrypt_error_counters,
.n_next_nodes = ESP_DECRYPT_N_NEXT,
.next_nodes = {
[ESP_DECRYPT_NEXT_DROP] = "ip6-drop",
@@ -1585,12 +1662,11 @@ VLIB_REGISTER_NODE (esp6_decrypt_tun_post_node) = {
.format_trace = format_esp_decrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(esp_decrypt_error_strings),
- .error_strings = esp_decrypt_error_strings,
+ .n_errors = ESP_DECRYPT_N_ERROR,
+ .error_counters = esp_decrypt_error_counters,
.sibling_of = "esp6-decrypt-tun",
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
diff --git a/src/vnet/ipsec/esp_encrypt.c b/src/vnet/ipsec/esp_encrypt.c
index d102bd68f74..dd47053874c 100644
--- a/src/vnet/ipsec/esp_encrypt.c
+++ b/src/vnet/ipsec/esp_encrypt.c
@@ -18,11 +18,13 @@
#include <vnet/vnet.h>
#include <vnet/api_errno.h>
#include <vnet/ip/ip.h>
+#include <vnet/interface_output.h>
#include <vnet/crypto/crypto.h>
#include <vnet/ipsec/ipsec.h>
#include <vnet/ipsec/ipsec_tun.h>
+#include <vnet/ipsec/ipsec.api_enum.h>
#include <vnet/ipsec/esp.h>
#include <vnet/tunnel/tunnel_dp.h>
@@ -43,29 +45,6 @@ typedef enum
ESP_ENCRYPT_N_NEXT,
} esp_encrypt_next_t;
-#define foreach_esp_encrypt_error \
- _ (RX_PKTS, "ESP pkts received") \
- _ (POST_RX_PKTS, "ESP-post pkts received") \
- _ (HANDOFF, "Hand-off") \
- _ (SEQ_CYCLED, "sequence number cycled (packet dropped)") \
- _ (CRYPTO_ENGINE_ERROR, "crypto engine error (packet dropped)") \
- _ (CRYPTO_QUEUE_FULL, "crypto queue full (packet dropped)") \
- _ (NO_BUFFERS, "no buffers (packet dropped)")
-
-typedef enum
-{
-#define _(sym,str) ESP_ENCRYPT_ERROR_##sym,
- foreach_esp_encrypt_error
-#undef _
- ESP_ENCRYPT_N_ERROR,
-} esp_encrypt_error_t;
-
-static char *esp_encrypt_error_strings[] = {
-#define _(sym,string) string,
- foreach_esp_encrypt_error
-#undef _
-};
-
typedef struct
{
u32 sa_index;
@@ -82,6 +61,8 @@ typedef struct
u32 next_index;
} esp_encrypt_post_trace_t;
+typedef vl_counter_esp_encrypt_enum_t esp_encrypt_error_t;
+
/* packet trace format function */
static u8 *
format_esp_encrypt_trace (u8 * s, va_list * args)
@@ -114,8 +95,7 @@ format_esp_post_encrypt_trace (u8 * s, va_list * args)
/* pad packet in input buffer */
static_always_inline u8 *
esp_add_footer_and_icv (vlib_main_t *vm, vlib_buffer_t **last, u8 esp_align,
- u8 icv_sz, vlib_node_runtime_t *node,
- u16 buffer_data_size, uword total_len)
+ u8 icv_sz, u16 buffer_data_size, uword total_len)
{
static const u8 pad_data[ESP_MAX_BLOCK_SIZE] = {
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
@@ -169,11 +149,9 @@ esp_update_ip4_hdr (ip4_header_t * ip4, u16 len, int is_transport, int is_udp)
if (is_transport)
{
u8 prot = is_udp ? IP_PROTOCOL_UDP : IP_PROTOCOL_IPSEC_ESP;
-
- sum = ip_csum_update (ip4->checksum, ip4->protocol,
- prot, ip4_header_t, protocol);
+ sum = ip_csum_update (ip4->checksum, ip4->protocol, prot, ip4_header_t,
+ protocol);
ip4->protocol = prot;
-
sum = ip_csum_update (sum, old_len, len, ip4_header_t, length);
}
else
@@ -202,9 +180,9 @@ ext_hdr_is_pre_esp (u8 nexthdr)
return !u8x16_is_all_zero (ext_hdr_types == u8x16_splat (nexthdr));
#else
- return ((nexthdr ^ IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) |
- (nexthdr ^ IP_PROTOCOL_IPV6_ROUTE) |
- (nexthdr ^ IP_PROTOCOL_IPV6_FRAGMENTATION) != 0);
+ return (!(nexthdr ^ IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) ||
+ !(nexthdr ^ IP_PROTOCOL_IPV6_ROUTE) ||
+ !(nexthdr ^ IP_PROTOCOL_IPV6_FRAGMENTATION));
#endif
}
@@ -223,9 +201,8 @@ esp_get_ip6_hdr_len (ip6_header_t * ip6, ip6_ext_header_t ** ext_hdr)
return len;
}
- p = (void *) (ip6 + 1);
+ p = ip6_next_header (ip6);
len += ip6_ext_header_len (p);
-
while (ext_hdr_is_pre_esp (p->next_hdr))
{
len += ip6_ext_header_len (p);
@@ -236,6 +213,25 @@ esp_get_ip6_hdr_len (ip6_header_t * ip6, ip6_ext_header_t ** ext_hdr)
return len;
}
+/* IPsec IV generation: IVs requirements differ depending of the
+ * encryption mode: IVs must be unpredictable for AES-CBC whereas it can
+ * be predictable but should never be reused with the same key material
+ * for CTR and GCM.
+ * To avoid reusing the same IVs between multiple VPP instances and between
+ * restarts, we use a properly chosen PRNG to generate IVs. To ensure the IV is
+ * unpredictable for CBC, it is then encrypted using the same key as the
+ * message. You can refer to NIST SP800-38a and NIST SP800-38d for more
+ * details. */
+static_always_inline void *
+esp_generate_iv (ipsec_sa_t *sa, void *payload, int iv_sz)
+{
+ ASSERT (iv_sz >= sizeof (u64));
+ u64 *iv = (u64 *) (payload - iv_sz);
+ clib_memset_u8 (iv, 0, iv_sz);
+ *iv = clib_pcg64i_random_r (&sa->iv_prng);
+ return iv;
+}
+
static_always_inline void
esp_process_chained_ops (vlib_main_t * vm, vlib_node_runtime_t * node,
vnet_crypto_op_t * ops, vlib_buffer_t * b[],
@@ -257,8 +253,10 @@ esp_process_chained_ops (vlib_main_t * vm, vlib_node_runtime_t * node,
if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
{
u32 bi = op->user_data;
- b[bi]->error = node->errors[ESP_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR];
- nexts[bi] = drop_next;
+ esp_encrypt_set_next_index (b[bi], node, vm->thread_index,
+ ESP_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR,
+ bi, nexts, drop_next,
+ vnet_buffer (b[bi])->ipsec.sad_index);
n_fail--;
}
op++;
@@ -285,8 +283,10 @@ esp_process_ops (vlib_main_t * vm, vlib_node_runtime_t * node,
if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
{
u32 bi = op->user_data;
- b[bi]->error = node->errors[ESP_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR];
- nexts[bi] = drop_next;
+ esp_encrypt_set_next_index (b[bi], node, vm->thread_index,
+ ESP_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR,
+ bi, nexts, drop_next,
+ vnet_buffer (b[bi])->ipsec.sad_index);
n_fail--;
}
op++;
@@ -389,28 +389,36 @@ esp_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
vnet_crypto_op_t *op;
vec_add2_aligned (crypto_ops[0], op, 1, CLIB_CACHE_LINE_BYTES);
vnet_crypto_op_init (op, sa0->crypto_enc_op_id);
+ u8 *crypto_start = payload;
+ /* esp_add_footer_and_icv() in esp_encrypt_inline() makes sure we always
+ * have enough space for ESP header and footer which includes ICV */
+ ASSERT (payload_len > icv_sz);
+ u16 crypto_len = payload_len - icv_sz;
+
+ /* generate the IV in front of the payload */
+ void *pkt_iv = esp_generate_iv (sa0, payload, iv_sz);
- op->src = op->dst = payload;
op->key_index = sa0->crypto_key_index;
- op->len = payload_len - icv_sz;
op->user_data = bi;
if (ipsec_sa_is_set_IS_CTR (sa0))
{
- ASSERT (sizeof (u64) == iv_sz);
/* construct nonce in a scratch space in front of the IP header */
esp_ctr_nonce_t *nonce =
- (esp_ctr_nonce_t *) (payload - sizeof (u64) - hdr_len -
- sizeof (*nonce));
- u64 *pkt_iv = (u64 *) (payload - sizeof (u64));
-
+ (esp_ctr_nonce_t *) (pkt_iv - hdr_len - sizeof (*nonce));
if (ipsec_sa_is_set_IS_AEAD (sa0))
{
/* constuct aad in a scratch space in front of the nonce */
op->aad = (u8 *) nonce - sizeof (esp_aead_t);
op->aad_len = esp_aad_fill (op->aad, esp, sa0, seq_hi);
- op->tag = payload + op->len;
+ op->tag = payload + crypto_len;
op->tag_len = 16;
+ if (PREDICT_FALSE (ipsec_sa_is_set_IS_NULL_GMAC (sa0)))
+ {
+ /* RFC-4543 ENCR_NULL_AUTH_AES_GMAC: IV is part of AAD */
+ crypto_start -= iv_sz;
+ crypto_len += iv_sz;
+ }
}
else
{
@@ -418,23 +426,34 @@ esp_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
}
nonce->salt = sa0->salt;
- nonce->iv = *pkt_iv = clib_host_to_net_u64 (sa0->ctr_iv_counter++);
+ nonce->iv = *(u64 *) pkt_iv;
op->iv = (u8 *) nonce;
}
else
{
- op->iv = payload - iv_sz;
- op->flags = VNET_CRYPTO_OP_FLAG_INIT_IV;
+ /* construct zero iv in front of the IP header */
+ op->iv = pkt_iv - hdr_len - iv_sz;
+ clib_memset_u8 (op->iv, 0, iv_sz);
+ /* include iv field in crypto */
+ crypto_start -= iv_sz;
+ crypto_len += iv_sz;
}
- if (lb != b[0])
+ if (PREDICT_FALSE (lb != b[0]))
{
/* is chained */
op->flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS;
op->chunk_index = vec_len (ptd->chunks);
op->tag = vlib_buffer_get_tail (lb) - icv_sz;
- esp_encrypt_chain_crypto (vm, ptd, sa0, b[0], lb, icv_sz, payload,
- payload_len, &op->n_chunks);
+ esp_encrypt_chain_crypto (vm, ptd, sa0, b[0], lb, icv_sz,
+ crypto_start, crypto_len + icv_sz,
+ &op->n_chunks);
+ }
+ else
+ {
+ /* not chained */
+ op->src = op->dst = crypto_start;
+ op->len = crypto_len;
}
}
@@ -483,33 +502,36 @@ esp_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
esp_post_data_t *post = esp_post_data (b);
u8 *tag, *iv, *aad = 0;
u8 flag = 0;
- u32 key_index;
- i16 crypto_start_offset, integ_start_offset = 0;
+ const u32 key_index = sa->crypto_key_index;
+ i16 crypto_start_offset, integ_start_offset;
u16 crypto_total_len, integ_total_len;
post->next_index = next;
/* crypto */
- crypto_start_offset = payload - b->data;
+ crypto_start_offset = integ_start_offset = payload - b->data;
crypto_total_len = integ_total_len = payload_len - icv_sz;
tag = payload + crypto_total_len;
- key_index = sa->linked_key_index;
+ /* generate the IV in front of the payload */
+ void *pkt_iv = esp_generate_iv (sa, payload, iv_sz);
if (ipsec_sa_is_set_IS_CTR (sa))
{
- ASSERT (sizeof (u64) == iv_sz);
/* construct nonce in a scratch space in front of the IP header */
- esp_ctr_nonce_t *nonce = (esp_ctr_nonce_t *) (payload - sizeof (u64) -
- hdr_len - sizeof (*nonce));
- u64 *pkt_iv = (u64 *) (payload - sizeof (u64));
-
+ esp_ctr_nonce_t *nonce =
+ (esp_ctr_nonce_t *) (pkt_iv - hdr_len - sizeof (*nonce));
if (ipsec_sa_is_set_IS_AEAD (sa))
{
/* constuct aad in a scratch space in front of the nonce */
aad = (u8 *) nonce - sizeof (esp_aead_t);
esp_aad_fill (aad, esp, sa, sa->seq_hi);
- key_index = sa->crypto_key_index;
+ if (PREDICT_FALSE (ipsec_sa_is_set_IS_NULL_GMAC (sa)))
+ {
+ /* RFC-4543 ENCR_NULL_AUTH_AES_GMAC: IV is part of AAD */
+ crypto_start_offset -= iv_sz;
+ crypto_total_len += iv_sz;
+ }
}
else
{
@@ -517,13 +539,17 @@ esp_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
}
nonce->salt = sa->salt;
- nonce->iv = *pkt_iv = clib_host_to_net_u64 (sa->ctr_iv_counter++);
+ nonce->iv = *(u64 *) pkt_iv;
iv = (u8 *) nonce;
}
else
{
- iv = payload - iv_sz;
- flag |= VNET_CRYPTO_OP_FLAG_INIT_IV;
+ /* construct zero iv in front of the IP header */
+ iv = pkt_iv - hdr_len - iv_sz;
+ clib_memset_u8 (iv, 0, iv_sz);
+ /* include iv field in crypto */
+ crypto_start_offset -= iv_sz;
+ crypto_total_len += iv_sz;
}
if (lb != b)
@@ -531,13 +557,14 @@ esp_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
/* chain */
flag |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS;
tag = vlib_buffer_get_tail (lb) - icv_sz;
- crypto_total_len = esp_encrypt_chain_crypto (vm, ptd, sa, b, lb, icv_sz,
- payload, payload_len, 0);
+ crypto_total_len = esp_encrypt_chain_crypto (
+ vm, ptd, sa, b, lb, icv_sz, b->data + crypto_start_offset,
+ crypto_total_len + icv_sz, 0);
}
if (sa->integ_op_id)
{
- integ_start_offset = crypto_start_offset - iv_sz - sizeof (esp_header_t);
+ integ_start_offset -= iv_sz + sizeof (esp_header_t);
integ_total_len += iv_sz + sizeof (esp_header_t);
if (b != lb)
@@ -578,6 +605,7 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
u32 current_sa_bytes = 0, spi = 0;
u8 esp_align = 4, iv_sz = 0, icv_sz = 0;
ipsec_sa_t *sa0 = 0;
+ u8 sa_drop_no_crypto = 0;
vlib_buffer_t *lb;
vnet_crypto_op_t **crypto_ops = &ptd->crypto_ops;
vnet_crypto_op_t **integ_ops = &ptd->integ_ops;
@@ -594,8 +622,8 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
ESP_ENCRYPT_NEXT_HANDOFF_MPLS));
vlib_buffer_t *sync_bufs[VLIB_FRAME_SIZE];
u16 sync_nexts[VLIB_FRAME_SIZE], *sync_next = sync_nexts, n_sync = 0;
- u16 async_nexts[VLIB_FRAME_SIZE], *async_next = async_nexts, n_async = 0;
- u16 noop_nexts[VLIB_FRAME_SIZE], *noop_next = noop_nexts, n_noop = 0;
+ u16 n_async = 0;
+ u16 noop_nexts[VLIB_FRAME_SIZE], n_noop = 0;
u32 sync_bi[VLIB_FRAME_SIZE];
u32 noop_bi[VLIB_FRAME_SIZE];
esp_encrypt_error_t err;
@@ -634,12 +662,24 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
CLIB_CACHE_LINE_BYTES, LOAD);
}
+ vnet_calc_checksums_inline (vm, b[0], b[0]->flags & VNET_BUFFER_F_IS_IP4,
+ b[0]->flags & VNET_BUFFER_F_IS_IP6);
+ vnet_calc_outer_checksums_inline (vm, b[0]);
+
if (is_tun)
{
/* we are on a ipsec tunnel's feature arc */
vnet_buffer (b[0])->ipsec.sad_index =
sa_index0 = ipsec_tun_protect_get_sa_out
(vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
+
+ if (PREDICT_FALSE (INDEX_INVALID == sa_index0))
+ {
+ err = ESP_ENCRYPT_ERROR_NO_PROTECTION;
+ noop_nexts[n_noop] = drop_next;
+ b[0]->error = node->errors[err];
+ goto trace;
+ }
}
else
sa_index0 = vnet_buffer (b[0])->ipsec.sad_index;
@@ -647,18 +687,24 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (sa_index0 != current_sa_index)
{
if (current_sa_packets)
- vlib_increment_combined_counter (&ipsec_sa_counters, thread_index,
- current_sa_index,
- current_sa_packets,
- current_sa_bytes);
+ vlib_increment_combined_counter (
+ &ipsec_sa_counters, thread_index, current_sa_index,
+ current_sa_packets, current_sa_bytes);
current_sa_packets = current_sa_bytes = 0;
sa0 = ipsec_sa_get (sa_index0);
+ current_sa_index = sa_index0;
+
+ sa_drop_no_crypto = ((sa0->crypto_alg == IPSEC_CRYPTO_ALG_NONE &&
+ sa0->integ_alg == IPSEC_INTEG_ALG_NONE) &&
+ !ipsec_sa_is_set_NO_ALGO_NO_DROP (sa0));
+
+ vlib_prefetch_combined_counter (&ipsec_sa_counters, thread_index,
+ current_sa_index);
/* fetch the second cacheline ASAP */
clib_prefetch_load (sa0->cacheline1);
- current_sa_index = sa_index0;
spi = clib_net_to_host_u32 (sa0->spi);
esp_align = sa0->esp_block_align;
icv_sz = sa0->integ_icv_size;
@@ -666,7 +712,15 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
is_async = im->async_mode | ipsec_sa_is_set_IS_ASYNC (sa0);
}
- if (PREDICT_FALSE (~0 == sa0->thread_index))
+ if (PREDICT_FALSE (sa_drop_no_crypto != 0))
+ {
+ err = ESP_ENCRYPT_ERROR_NO_ENCRYPTION;
+ esp_encrypt_set_next_index (b[0], node, thread_index, err, n_noop,
+ noop_nexts, drop_next, sa_index0);
+ goto trace;
+ }
+
+ if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index))
{
/* this is the first packet to use this SA, claim the SA
* for this thread. this could happen simultaneously on
@@ -679,8 +733,9 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
vnet_buffer (b[0])->ipsec.thread_index = sa0->thread_index;
err = ESP_ENCRYPT_ERROR_HANDOFF;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- handoff_next);
+ esp_encrypt_set_next_index (b[0], node, thread_index, err, n_noop,
+ noop_nexts, handoff_next,
+ current_sa_index);
goto trace;
}
@@ -689,7 +744,8 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (n_bufs == 0)
{
err = ESP_ENCRYPT_ERROR_NO_BUFFERS;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts, drop_next);
+ esp_encrypt_set_next_index (b[0], node, thread_index, err, n_noop,
+ noop_nexts, drop_next, current_sa_index);
goto trace;
}
@@ -703,7 +759,8 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (PREDICT_FALSE (esp_seq_advance (sa0)))
{
err = ESP_ENCRYPT_ERROR_SEQ_CYCLED;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts, drop_next);
+ esp_encrypt_set_next_index (b[0], node, thread_index, err, n_noop,
+ noop_nexts, drop_next, current_sa_index);
goto trace;
}
@@ -714,13 +771,14 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
payload = vlib_buffer_get_current (b[0]);
next_hdr_ptr = esp_add_footer_and_icv (
- vm, &lb, esp_align, icv_sz, node, buffer_data_size,
+ vm, &lb, esp_align, icv_sz, buffer_data_size,
vlib_buffer_length_in_chain (vm, b[0]));
if (!next_hdr_ptr)
{
err = ESP_ENCRYPT_ERROR_NO_BUFFERS;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- drop_next);
+ esp_encrypt_set_next_index (b[0], node, thread_index, err,
+ n_noop, noop_nexts, drop_next,
+ current_sa_index);
goto trace;
}
b[0]->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
@@ -823,27 +881,41 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
}
else /* transport mode */
{
- u8 *l2_hdr, l2_len, *ip_hdr, ip_len;
+ u8 *l2_hdr, l2_len, *ip_hdr;
+ u16 ip_len;
ip6_ext_header_t *ext_hdr;
udp_header_t *udp = 0;
u16 udp_len = 0;
u8 *old_ip_hdr = vlib_buffer_get_current (b[0]);
+ /*
+ * Get extension header chain length. It might be longer than the
+ * buffer's pre_data area.
+ */
ip_len =
(VNET_LINK_IP6 == lt ?
esp_get_ip6_hdr_len ((ip6_header_t *) old_ip_hdr, &ext_hdr) :
ip4_header_bytes ((ip4_header_t *) old_ip_hdr));
+ if ((old_ip_hdr - ip_len) < &b[0]->pre_data[0])
+ {
+ err = ESP_ENCRYPT_ERROR_NO_BUFFERS;
+ esp_encrypt_set_next_index (b[0], node, thread_index, err,
+ n_noop, noop_nexts, drop_next,
+ current_sa_index);
+ goto trace;
+ }
vlib_buffer_advance (b[0], ip_len);
payload = vlib_buffer_get_current (b[0]);
next_hdr_ptr = esp_add_footer_and_icv (
- vm, &lb, esp_align, icv_sz, node, buffer_data_size,
+ vm, &lb, esp_align, icv_sz, buffer_data_size,
vlib_buffer_length_in_chain (vm, b[0]));
if (!next_hdr_ptr)
{
err = ESP_ENCRYPT_ERROR_NO_BUFFERS;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- drop_next);
+ esp_encrypt_set_next_index (b[0], node, thread_index, err,
+ n_noop, noop_nexts, drop_next,
+ current_sa_index);
goto trace;
}
@@ -879,42 +951,40 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
else
l2_len = 0;
+ u16 len;
+ len = payload_len_total + hdr_len - l2_len;
+
if (VNET_LINK_IP6 == lt)
{
ip6_header_t *ip6 = (ip6_header_t *) (old_ip_hdr);
if (PREDICT_TRUE (NULL == ext_hdr))
{
*next_hdr_ptr = ip6->protocol;
- ip6->protocol = IP_PROTOCOL_IPSEC_ESP;
+ ip6->protocol =
+ (udp) ? IP_PROTOCOL_UDP : IP_PROTOCOL_IPSEC_ESP;
}
else
{
*next_hdr_ptr = ext_hdr->next_hdr;
- ext_hdr->next_hdr = IP_PROTOCOL_IPSEC_ESP;
+ ext_hdr->next_hdr =
+ (udp) ? IP_PROTOCOL_UDP : IP_PROTOCOL_IPSEC_ESP;
}
ip6->payload_length =
- clib_host_to_net_u16 (payload_len_total + hdr_len - l2_len -
- sizeof (ip6_header_t));
+ clib_host_to_net_u16 (len - sizeof (ip6_header_t));
}
else if (VNET_LINK_IP4 == lt)
{
- u16 len;
ip4_header_t *ip4 = (ip4_header_t *) (old_ip_hdr);
*next_hdr_ptr = ip4->protocol;
- len = payload_len_total + hdr_len - l2_len;
- if (udp)
- {
- esp_update_ip4_hdr (ip4, len, /* is_transport */ 1, 1);
- udp_len = len - ip_len;
- }
- else
- esp_update_ip4_hdr (ip4, len, /* is_transport */ 1, 0);
+ esp_update_ip4_hdr (ip4, len, /* is_transport */ 1,
+ (udp != NULL));
}
clib_memcpy_le64 (ip_hdr, old_ip_hdr, ip_len);
if (udp)
{
+ udp_len = len - ip_len;
esp_fill_udp_hdr (sa0, udp, udp_len);
}
@@ -946,6 +1016,16 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
async_frames[async_op] =
vnet_crypto_async_get_frame (vm, async_op);
+
+ if (PREDICT_FALSE (!async_frames[async_op]))
+ {
+ err = ESP_ENCRYPT_ERROR_NO_AVAIL_FRAME;
+ esp_encrypt_set_next_index (b[0], node, thread_index, err,
+ n_noop, noop_nexts, drop_next,
+ current_sa_index);
+ goto trace;
+ }
+
/* Save the frame to the list we'll submit at the end */
vec_add1 (ptd->async_frames, async_frames[async_op]);
}
@@ -970,13 +1050,18 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
esp_encrypt_trace_t *tr = vlib_add_trace (vm, node, b[0],
sizeof (*tr));
- tr->sa_index = sa_index0;
- tr->spi = sa0->spi;
- tr->seq = sa0->seq;
- tr->sa_seq_hi = sa0->seq_hi;
- tr->udp_encap = ipsec_sa_is_set_UDP_ENCAP (sa0);
- tr->crypto_alg = sa0->crypto_alg;
- tr->integ_alg = sa0->integ_alg;
+ if (INDEX_INVALID == sa_index0)
+ clib_memset_u8 (tr, 0xff, sizeof (*tr));
+ else
+ {
+ tr->sa_index = sa_index0;
+ tr->spi = sa0->spi;
+ tr->seq = sa0->seq;
+ tr->sa_seq_hi = sa0->seq_hi;
+ tr->udp_encap = ipsec_sa_is_set_UDP_ENCAP (sa0);
+ tr->crypto_alg = sa0->crypto_alg;
+ tr->integ_alg = sa0->integ_alg;
+ }
}
/* next */
@@ -984,7 +1069,6 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
noop_bi[n_noop] = from[b - bufs];
n_noop++;
- noop_next++;
}
else if (!is_async)
{
@@ -996,15 +1080,15 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
else
{
n_async++;
- async_next++;
}
n_left -= 1;
b += 1;
}
- vlib_increment_combined_counter (&ipsec_sa_counters, thread_index,
- current_sa_index, current_sa_packets,
- current_sa_bytes);
+ if (INDEX_INVALID != current_sa_index)
+ vlib_increment_combined_counter (&ipsec_sa_counters, thread_index,
+ current_sa_index, current_sa_packets,
+ current_sa_bytes);
if (n_sync)
{
esp_process_ops (vm, node, ptd->crypto_ops, sync_bufs, sync_nexts,
@@ -1030,7 +1114,8 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
n_noop += esp_async_recycle_failed_submit (
vm, *async_frame, node, ESP_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR,
- n_sync, noop_bi, noop_nexts, drop_next);
+ IPSEC_SA_ERROR_CRYPTO_ENGINE_ERROR, n_noop, noop_bi,
+ noop_nexts, drop_next, true);
vnet_crypto_async_reset_frame (*async_frame);
vnet_crypto_async_free_frame (vm, *async_frame);
}
@@ -1139,15 +1224,14 @@ VLIB_NODE_FN (esp4_encrypt_node) (vlib_main_t * vm,
esp_encrypt_async_next.esp4_post_next);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp4_encrypt_node) = {
.name = "esp4-encrypt",
.vector_size = sizeof (u32),
.format_trace = format_esp_encrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN (esp_encrypt_error_strings),
- .error_strings = esp_encrypt_error_strings,
+ .n_errors = ESP_ENCRYPT_N_ERROR,
+ .error_counters = esp_encrypt_error_counters,
.n_next_nodes = ESP_ENCRYPT_N_NEXT,
.next_nodes = { [ESP_ENCRYPT_NEXT_DROP4] = "ip4-drop",
@@ -1158,7 +1242,6 @@ VLIB_REGISTER_NODE (esp4_encrypt_node) = {
[ESP_ENCRYPT_NEXT_HANDOFF_MPLS] = "error-drop",
[ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT] = "interface-output" },
};
-/* *INDENT-ON* */
VLIB_NODE_FN (esp4_encrypt_post_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1167,7 +1250,6 @@ VLIB_NODE_FN (esp4_encrypt_post_node) (vlib_main_t * vm,
return esp_encrypt_post_inline (vm, node, from_frame);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp4_encrypt_post_node) = {
.name = "esp4-encrypt-post",
.vector_size = sizeof (u32),
@@ -1175,10 +1257,9 @@ VLIB_REGISTER_NODE (esp4_encrypt_post_node) = {
.type = VLIB_NODE_TYPE_INTERNAL,
.sibling_of = "esp4-encrypt",
- .n_errors = ARRAY_LEN(esp_encrypt_error_strings),
- .error_strings = esp_encrypt_error_strings,
+ .n_errors = ESP_ENCRYPT_N_ERROR,
+ .error_counters = esp_encrypt_error_counters,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (esp6_encrypt_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1188,7 +1269,6 @@ VLIB_NODE_FN (esp6_encrypt_node) (vlib_main_t * vm,
esp_encrypt_async_next.esp6_post_next);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp6_encrypt_node) = {
.name = "esp6-encrypt",
.vector_size = sizeof (u32),
@@ -1196,10 +1276,9 @@ VLIB_REGISTER_NODE (esp6_encrypt_node) = {
.type = VLIB_NODE_TYPE_INTERNAL,
.sibling_of = "esp4-encrypt",
- .n_errors = ARRAY_LEN(esp_encrypt_error_strings),
- .error_strings = esp_encrypt_error_strings,
+ .n_errors = ESP_ENCRYPT_N_ERROR,
+ .error_counters = esp_encrypt_error_counters,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (esp6_encrypt_post_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1208,7 +1287,6 @@ VLIB_NODE_FN (esp6_encrypt_post_node) (vlib_main_t * vm,
return esp_encrypt_post_inline (vm, node, from_frame);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp6_encrypt_post_node) = {
.name = "esp6-encrypt-post",
.vector_size = sizeof (u32),
@@ -1216,10 +1294,9 @@ VLIB_REGISTER_NODE (esp6_encrypt_post_node) = {
.type = VLIB_NODE_TYPE_INTERNAL,
.sibling_of = "esp4-encrypt",
- .n_errors = ARRAY_LEN(esp_encrypt_error_strings),
- .error_strings = esp_encrypt_error_strings,
+ .n_errors = ESP_ENCRYPT_N_ERROR,
+ .error_counters = esp_encrypt_error_counters,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (esp4_encrypt_tun_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1229,15 +1306,14 @@ VLIB_NODE_FN (esp4_encrypt_tun_node) (vlib_main_t * vm,
esp_encrypt_async_next.esp4_tun_post_next);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp4_encrypt_tun_node) = {
.name = "esp4-encrypt-tun",
.vector_size = sizeof (u32),
.format_trace = format_esp_encrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(esp_encrypt_error_strings),
- .error_strings = esp_encrypt_error_strings,
+ .n_errors = ESP_ENCRYPT_N_ERROR,
+ .error_counters = esp_encrypt_error_counters,
.n_next_nodes = ESP_ENCRYPT_N_NEXT,
.next_nodes = {
@@ -1258,7 +1334,6 @@ VLIB_NODE_FN (esp4_encrypt_tun_post_node) (vlib_main_t * vm,
return esp_encrypt_post_inline (vm, node, from_frame);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp4_encrypt_tun_post_node) = {
.name = "esp4-encrypt-tun-post",
.vector_size = sizeof (u32),
@@ -1266,10 +1341,9 @@ VLIB_REGISTER_NODE (esp4_encrypt_tun_post_node) = {
.type = VLIB_NODE_TYPE_INTERNAL,
.sibling_of = "esp4-encrypt-tun",
- .n_errors = ARRAY_LEN(esp_encrypt_error_strings),
- .error_strings = esp_encrypt_error_strings,
+ .n_errors = ESP_ENCRYPT_N_ERROR,
+ .error_counters = esp_encrypt_error_counters,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (esp6_encrypt_tun_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1279,15 +1353,14 @@ VLIB_NODE_FN (esp6_encrypt_tun_node) (vlib_main_t * vm,
esp_encrypt_async_next.esp6_tun_post_next);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp6_encrypt_tun_node) = {
.name = "esp6-encrypt-tun",
.vector_size = sizeof (u32),
.format_trace = format_esp_encrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(esp_encrypt_error_strings),
- .error_strings = esp_encrypt_error_strings,
+ .n_errors = ESP_ENCRYPT_N_ERROR,
+ .error_counters = esp_encrypt_error_counters,
.n_next_nodes = ESP_ENCRYPT_N_NEXT,
.next_nodes = {
@@ -1301,7 +1374,6 @@ VLIB_REGISTER_NODE (esp6_encrypt_tun_node) = {
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (esp6_encrypt_tun_post_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1310,7 +1382,6 @@ VLIB_NODE_FN (esp6_encrypt_tun_post_node) (vlib_main_t * vm,
return esp_encrypt_post_inline (vm, node, from_frame);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp6_encrypt_tun_post_node) = {
.name = "esp6-encrypt-tun-post",
.vector_size = sizeof (u32),
@@ -1318,10 +1389,9 @@ VLIB_REGISTER_NODE (esp6_encrypt_tun_post_node) = {
.type = VLIB_NODE_TYPE_INTERNAL,
.sibling_of = "esp-mpls-encrypt-tun",
- .n_errors = ARRAY_LEN (esp_encrypt_error_strings),
- .error_strings = esp_encrypt_error_strings,
+ .n_errors = ESP_ENCRYPT_N_ERROR,
+ .error_counters = esp_encrypt_error_counters,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (esp_mpls_encrypt_tun_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
@@ -1336,8 +1406,8 @@ VLIB_REGISTER_NODE (esp_mpls_encrypt_tun_node) = {
.format_trace = format_esp_encrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(esp_encrypt_error_strings),
- .error_strings = esp_encrypt_error_strings,
+ .n_errors = ESP_ENCRYPT_N_ERROR,
+ .error_counters = esp_encrypt_error_counters,
.n_next_nodes = ESP_ENCRYPT_N_NEXT,
.next_nodes = {
@@ -1364,123 +1434,9 @@ VLIB_REGISTER_NODE (esp_mpls_encrypt_tun_post_node) = {
.type = VLIB_NODE_TYPE_INTERNAL,
.sibling_of = "esp-mpls-encrypt-tun",
- .n_errors = ARRAY_LEN (esp_encrypt_error_strings),
- .error_strings = esp_encrypt_error_strings,
-};
-
-typedef struct
-{
- u32 sa_index;
-} esp_no_crypto_trace_t;
-
-static u8 *
-format_esp_no_crypto_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- esp_no_crypto_trace_t *t = va_arg (*args, esp_no_crypto_trace_t *);
-
- s = format (s, "esp-no-crypto: sa-index %u", t->sa_index);
-
- return s;
-}
-
-enum
-{
- ESP_NO_CRYPTO_NEXT_DROP,
- ESP_NO_CRYPTO_N_NEXT,
-};
-
-enum
-{
- ESP_NO_CRYPTO_ERROR_RX_PKTS,
-};
-
-static char *esp_no_crypto_error_strings[] = {
- "Outbound ESP packets received",
-};
-
-always_inline uword
-esp_no_crypto_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
- u32 *from = vlib_frame_vector_args (frame);
- u32 n_left = frame->n_vectors;
-
- vlib_get_buffers (vm, from, b, n_left);
-
- while (n_left > 0)
- {
- u32 sa_index0;
-
- /* packets are always going to be dropped, but get the sa_index */
- sa_index0 = ipsec_tun_protect_get_sa_out
- (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
-
- if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
- {
- esp_no_crypto_trace_t *tr = vlib_add_trace (vm, node, b[0],
- sizeof (*tr));
- tr->sa_index = sa_index0;
- }
-
- n_left -= 1;
- b += 1;
- }
-
- vlib_node_increment_counter (vm, node->node_index,
- ESP_NO_CRYPTO_ERROR_RX_PKTS, frame->n_vectors);
-
- vlib_buffer_enqueue_to_single_next (vm, node, from,
- ESP_NO_CRYPTO_NEXT_DROP,
- frame->n_vectors);
-
- return frame->n_vectors;
-}
-
-VLIB_NODE_FN (esp4_no_crypto_tun_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return esp_no_crypto_inline (vm, node, from_frame);
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (esp4_no_crypto_tun_node) =
-{
- .name = "esp4-no-crypto",
- .vector_size = sizeof (u32),
- .format_trace = format_esp_no_crypto_trace,
- .n_errors = ARRAY_LEN(esp_no_crypto_error_strings),
- .error_strings = esp_no_crypto_error_strings,
- .n_next_nodes = ESP_NO_CRYPTO_N_NEXT,
- .next_nodes = {
- [ESP_NO_CRYPTO_NEXT_DROP] = "ip4-drop",
- },
-};
-
-VLIB_NODE_FN (esp6_no_crypto_tun_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return esp_no_crypto_inline (vm, node, from_frame);
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (esp6_no_crypto_tun_node) =
-{
- .name = "esp6-no-crypto",
- .vector_size = sizeof (u32),
- .format_trace = format_esp_no_crypto_trace,
- .n_errors = ARRAY_LEN(esp_no_crypto_error_strings),
- .error_strings = esp_no_crypto_error_strings,
- .n_next_nodes = ESP_NO_CRYPTO_N_NEXT,
- .next_nodes = {
- [ESP_NO_CRYPTO_NEXT_DROP] = "ip6-drop",
- },
+ .n_errors = ESP_ENCRYPT_N_ERROR,
+ .error_counters = esp_encrypt_error_counters,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
diff --git a/src/vnet/ipsec/ipsec.api b/src/vnet/ipsec/ipsec.api
index be45c3e2401..68efe8f50f7 100644
--- a/src/vnet/ipsec/ipsec.api
+++ b/src/vnet/ipsec/ipsec.api
@@ -57,74 +57,35 @@ autoreply define ipsec_interface_add_del_spd
u32 spd_id;
};
+/** \brief IPsec: Add/delete Security Policy Database entry
-enum ipsec_spd_action
-{
- /* bypass - no IPsec processing */
- IPSEC_API_SPD_ACTION_BYPASS = 0,
- /* discard - discard packet with ICMP processing */
- IPSEC_API_SPD_ACTION_DISCARD,
- /* resolve - send request to control plane for SA resolving */
- IPSEC_API_SPD_ACTION_RESOLVE,
- /* protect - apply IPsec policy using following parameters */
- IPSEC_API_SPD_ACTION_PROTECT,
-};
-
-/** \brief IPsec: Security Policy Database entry
-
- See RFC 4301, 4.4.1.1 on how to match packet to selectors
-
- @param spd_id - SPD instance id (control plane allocated)
- @param priority - priority of SPD entry (non-unique value). Used to order SPD matching - higher priorities match before lower
- @param is_outbound - entry applies to outbound traffic if non-zero, otherwise applies to inbound traffic
- @param remote_address_start - start of remote address range to match
- @param remote_address_stop - end of remote address range to match
- @param local_address_start - start of local address range to match
- @param local_address_stop - end of local address range to match
- @param protocol - protocol type to match [0 means any] otherwise IANA value
- @param remote_port_start - start of remote port range to match ...
- @param remote_port_stop - end of remote port range to match [0 to 65535 means ANY, 65535 to 0 means OPAQUE]
- @param local_port_start - start of local port range to match ...
- @param local_port_stop - end of remote port range to match [0 to 65535 means ANY, 65535 to 0 means OPAQUE]
- @param policy - action to perform on match
- @param sa_id - SAD instance id (control plane allocated)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add SPD if non-zero, else delete
+ @param entry - Description of the entry to add/dell
*/
-typedef ipsec_spd_entry
+define ipsec_spd_entry_add_del
{
- u32 spd_id;
- i32 priority;
- bool is_outbound;
-
- u32 sa_id;
- vl_api_ipsec_spd_action_t policy;
- /* Which protocol?? */
- u8 protocol;
-
- // Selector
- vl_api_address_t remote_address_start;
- vl_api_address_t remote_address_stop;
- vl_api_address_t local_address_start;
- vl_api_address_t local_address_stop;
-
- u16 remote_port_start;
- u16 remote_port_stop;
- u16 local_port_start;
- u16 local_port_stop;
+ option deprecated;
+ u32 client_index;
+ u32 context;
+ bool is_add;
+ vl_api_ipsec_spd_entry_t entry;
};
-/** \brief IPsec: Add/delete Security Policy Database entry
+/** \brief IPsec: Add/delete Security Policy Database entry v2
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@param is_add - add SPD if non-zero, else delete
@param entry - Description of the entry to add/dell
*/
-define ipsec_spd_entry_add_del
+define ipsec_spd_entry_add_del_v2
{
u32 client_index;
u32 context;
bool is_add;
- vl_api_ipsec_spd_entry_t entry;
+ vl_api_ipsec_spd_entry_v2_t entry;
};
/** \brief IPsec: Reply Add/delete Security Policy Database entry
@@ -135,6 +96,20 @@ define ipsec_spd_entry_add_del
*/
define ipsec_spd_entry_add_del_reply
{
+ option deprecated;
+ u32 context;
+ i32 retval;
+ u32 stat_index;
+};
+
+/** \brief IPsec: Reply Add/delete Security Policy Database entry v2
+
+ @param context - sender context, to match reply w/ request
+ @param retval - success/fail rutrun code
+ @param stat_index - An index for the policy in the stats segment @ /net/ipec/policy
+*/
+define ipsec_spd_entry_add_del_v2_reply
+{
u32 context;
i32 retval;
u32 stat_index;
@@ -192,18 +167,23 @@ define ipsec_spd_details {
define ipsec_sad_entry_add_del
{
option deprecated;
+
u32 client_index;
u32 context;
bool is_add;
vl_api_ipsec_sad_entry_t entry;
};
+
define ipsec_sad_entry_add_del_v2
{
+ option deprecated;
+
u32 client_index;
u32 context;
bool is_add;
vl_api_ipsec_sad_entry_v2_t entry;
};
+
define ipsec_sad_entry_add_del_v3
{
u32 client_index;
@@ -211,12 +191,21 @@ define ipsec_sad_entry_add_del_v3
bool is_add;
vl_api_ipsec_sad_entry_v3_t entry;
};
+
define ipsec_sad_entry_add
{
u32 client_index;
u32 context;
vl_api_ipsec_sad_entry_v3_t entry;
};
+
+define ipsec_sad_entry_add_v2
+{
+ u32 client_index;
+ u32 context;
+ vl_api_ipsec_sad_entry_v4_t entry;
+};
+
autoreply define ipsec_sad_entry_del
{
u32 client_index;
@@ -224,25 +213,76 @@ autoreply define ipsec_sad_entry_del
u32 id;
};
+
+/** \brief An API to bind an SAD entry to a specific worker
+
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sa_id - the id of the SA to bind
+ @param worker - the worker's index to which the SA will be bound to
+ */
+autoreply define ipsec_sad_bind
+{
+ u32 client_index;
+ u32 context;
+ u32 sa_id;
+ u32 worker;
+};
+
+autoreply define ipsec_sad_unbind
+{
+ u32 client_index;
+ u32 context;
+ u32 sa_id;
+};
+
+/** \brief An API to update the tunnel parameters and the ports associated with an SA
+
+ Used in the NAT-T case when the NAT data changes
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sa_id - the id of the SA to update
+ @param is_tun - update the tunnel if non-zero, else update only the ports
+ @param tunnel - sender context, to match reply w/ request
+ @param udp_src_port - new src port for NAT-T. Used if different from 0xffff
+ @param udp_dst_port - new dst port for NAT-T. Used if different from 0xffff
+ */
+autoreply define ipsec_sad_entry_update
+{
+ u32 client_index;
+ u32 context;
+ u32 sad_id;
+ bool is_tun;
+ vl_api_tunnel_t tunnel;
+ u16 udp_src_port [default=0xffff];
+ u16 udp_dst_port [default=0xffff];
+};
+
define ipsec_sad_entry_add_del_reply
{
option deprecated;
+
u32 context;
i32 retval;
u32 stat_index;
};
+
define ipsec_sad_entry_add_del_v2_reply
{
+ option deprecated;
+
u32 context;
i32 retval;
u32 stat_index;
};
+
define ipsec_sad_entry_add_del_v3_reply
{
u32 context;
i32 retval;
u32 stat_index;
};
+
define ipsec_sad_entry_add_reply
{
u32 context;
@@ -250,6 +290,13 @@ define ipsec_sad_entry_add_reply
u32 stat_index;
};
+define ipsec_sad_entry_add_v2_reply
+{
+ u32 context;
+ i32 retval;
+ u32 stat_index;
+};
+
/** \brief Add or Update Protection for a tunnel with IPSEC
Tunnel protection directly associates an SA with all packets
@@ -413,12 +460,15 @@ define ipsec_itf_details
define ipsec_sa_dump
{
option deprecated;
+
u32 client_index;
u32 context;
u32 sa_id;
};
define ipsec_sa_v2_dump
{
+ option deprecated;
+
u32 client_index;
u32 context;
u32 sa_id;
@@ -429,6 +479,18 @@ define ipsec_sa_v3_dump
u32 context;
u32 sa_id;
};
+define ipsec_sa_v4_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 sa_id;
+};
+define ipsec_sa_v5_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 sa_id;
+};
/** \brief IPsec security association database response
@param context - sender context which was passed in the request
@@ -444,6 +506,7 @@ define ipsec_sa_v3_dump
*/
define ipsec_sa_details {
option deprecated;
+
u32 context;
vl_api_ipsec_sad_entry_t entry;
@@ -456,6 +519,8 @@ define ipsec_sa_details {
u32 stat_index;
};
define ipsec_sa_v2_details {
+ option deprecated;
+
u32 context;
vl_api_ipsec_sad_entry_v2_t entry;
@@ -478,6 +543,28 @@ define ipsec_sa_v3_details {
u32 stat_index;
};
+define ipsec_sa_v4_details {
+ u32 context;
+ vl_api_ipsec_sad_entry_v3_t entry;
+
+ vl_api_interface_index_t sw_if_index;
+ u64 seq_outbound;
+ u64 last_seq_inbound;
+ u64 replay_window;
+ u32 thread_index;
+ u32 stat_index;
+};
+define ipsec_sa_v5_details {
+ u32 context;
+ vl_api_ipsec_sad_entry_v4_t entry;
+
+ vl_api_interface_index_t sw_if_index;
+ u64 seq_outbound;
+ u64 last_seq_inbound;
+ u64 replay_window;
+ u32 thread_index;
+ u32 stat_index;
+};
/** \brief Dump IPsec backends
@param client_index - opaque cookie to identify the sender
@@ -527,6 +614,286 @@ autoreply define ipsec_set_async_mode {
bool async_enable;
};
+counters esp_decrypt {
+ rx_pkts {
+ severity info;
+ type counter64;
+ units "packets";
+ description "ESP pkts received";
+ };
+ rx_post_pkts {
+ severity info;
+ type counter64;
+ units "packets";
+ description "ESP-POST pkts received";
+ };
+ handoff {
+ severity info;
+ type counter64;
+ units "packets";
+ description "hand-off";
+ };
+ decryption_failed {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ESP decryption failed";
+ };
+ integ_error {
+ severity error;
+ type counter64;
+ units "packets";
+ description "integrity check failed";
+ };
+ crypto_engine_error {
+ severity error;
+ type counter64;
+ units "packets";
+ description "crypto engine error (packet dropped)";
+ };
+ replay {
+ severity error;
+ type counter64;
+ units "packets";
+ description "SA replayed packet";
+ };
+ runt {
+ severity error;
+ type counter64;
+ units "packets";
+ description "undersized packet";
+ };
+ no_buffers {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no buffers (packet dropped)";
+ };
+ oversized_header {
+ severity error;
+ type counter64;
+ units "packets";
+ description "buffer with oversized header (dropped)";
+ };
+ no_tail_space {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no enough buffer tail space (dropped)";
+ };
+ tun_no_proto {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no tunnel protocol";
+ };
+ unsup_payload {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unsupported payload";
+ };
+ no_avail_frame {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no available frame (packet dropped)";
+ };
+};
+
+counters esp_encrypt {
+ rx_pkts {
+ severity info;
+ type counter64;
+ units "packets";
+ description "ESP pkts received";
+ };
+ post_rx_pkts {
+ severity info;
+ type counter64;
+ units "packets";
+ description "ESP-post pkts received";
+ };
+ handoff {
+ severity info;
+ type counter64;
+ units "packets";
+ description "Hand-off";
+ };
+ seq_cycled {
+ severity error;
+ type counter64;
+ units "packets";
+ description "sequence number cycled (packet dropped)";
+ };
+ crypto_engine_error {
+ severity error;
+ type counter64;
+ units "packets";
+ description "crypto engine error (packet dropped)";
+ };
+ crypto_queue_full {
+ severity error;
+ type counter64;
+ units "packets";
+ description "crypto queue full (packet dropped)";
+ };
+ no_buffers {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no buffers (packet dropped)";
+ };
+ no_protection {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no protecting SA (packet dropped)";
+ };
+ no_encryption {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no Encrypting SA (packet dropped)";
+ };
+ no_avail_frame {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no available frame (packet dropped)";
+ };
+};
+
+counters ah_encrypt {
+ rx_pkts {
+ severity info;
+ type counter64;
+ units "packets";
+ description "AH pkts received";
+ };
+ crypto_engine_error {
+ severity error;
+ type counter64;
+ units "packets";
+ description "crypto engine error (packet dropped)";
+ };
+ seq_cycled {
+ severity error;
+ type counter64;
+ units "packets";
+ description "sequence number cycled (packet dropped)";
+ };
+};
+
+counters ah_decrypt {
+ rx_pkts {
+ severity info;
+ type counter64;
+ units "packets";
+ description "AH pkts received";
+ };
+ decryption_failed {
+ severity error;
+ type counter64;
+ units "packets";
+ description "AH decryption failed";
+ };
+ integ_error {
+ severity error;
+ type counter64;
+ units "packets";
+ description "Integrity check failed";
+ };
+ no_tail_space {
+ severity error;
+ type counter64;
+ units "packets";
+ description "not enough buffer tail space (dropped)";
+ };
+ drop_fragments {
+ severity error;
+ type counter64;
+ units "packets";
+ description "IP fragments drop";
+ };
+ replay {
+ severity error;
+ type counter64;
+ units "packets";
+ description "SA replayed packet";
+ };
+};
+
+counters ipsec_tun {
+ rx {
+ severity info;
+ type counter64;
+ units "packets";
+ description "good packets received";
+ };
+ disabled {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ipsec packets received on disabled interface";
+ };
+ no_tunnel {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no matching tunnel";
+ };
+ tunnel_mismatch {
+ severity error;
+ type counter64;
+ units "packets";
+ description "SPI-tunnel mismatch";
+ };
+ nat_keepalive {
+ severity info;
+ type counter64;
+ units "packets";
+ description "NAT Keepalive";
+ };
+ too_short {
+ severity error;
+ type counter64;
+ units "packets";
+ description "Too Short";
+ };
+ spi_0 {
+ severity info;
+ type counter64;
+ units "packets";
+ description "SPI 0";
+ };
+};
+
+paths {
+ "/err/esp4-encrypt" "esp_encrypt";
+ "/err/esp4-encrypt-post" "esp_encrypt";
+ "/err/esp4-encrypt-tun" "esp_encrypt";
+ "/err/esp4-encrypt-tun-post" "esp_encrypt";
+ "/err/esp6-encrypt" "esp_encrypt";
+ "/err/esp6-encrypt-post" "esp_encrypt";
+ "/err/esp6-encrypt-tun" "esp_encrypt";
+ "/err/esp6-encrypt-tun-post" "esp_encrypt";
+ "/err/esp-mpls-encrypt-tun" "esp_encrypt";
+ "/err/esp-mpls-encrypt-tun-post" "esp_encrypt";
+ "/err/esp4-decrypt" "esp_decrypt";
+ "/err/esp4-decrypt-post" "esp_decrypt";
+ "/err/esp4-decrypt-tun" "esp_decrypt";
+ "/err/esp4-decrypt-tun-post" "esp_decrypt";
+ "/err/esp6-decrypt" "esp_decrypt";
+ "/err/esp6-decrypt-post" "esp_decrypt";
+ "/err/esp6-decrypt-tun" "esp_decrypt";
+ "/err/esp6-decrypt-tun-post" "esp_decrypt";
+ "/err/ah4-encrypt" "ah_encrypt";
+ "/err/ah6-encrypt" "ah_encrypt";
+ "/err/ipsec4-tun-input" "ipsec_tun";
+ "/err/ipsec6-tun-input" "ipsec_tun";
+};
+
/*
* Local Variables:
* eval: (c-set-style "gnu")
diff --git a/src/vnet/ipsec/ipsec.c b/src/vnet/ipsec/ipsec.c
index 30774ec10ff..f8c39c327ed 100644
--- a/src/vnet/ipsec/ipsec.c
+++ b/src/vnet/ipsec/ipsec.c
@@ -25,15 +25,68 @@
#include <vnet/ipsec/esp.h>
#include <vnet/ipsec/ah.h>
#include <vnet/ipsec/ipsec_tun.h>
+#include <vnet/ipsec/ipsec_itf.h>
+#include <vnet/ipsec/ipsec_spd_fp_lookup.h>
/* Flow cache is sized for 1 million flows with a load factor of .25.
*/
#define IPSEC4_OUT_SPD_DEFAULT_HASH_NUM_BUCKETS (1 << 22)
+/* Flow cache is sized for 1 million flows with a load factor of .25.
+ */
+#define IPSEC4_SPD_DEFAULT_HASH_NUM_BUCKETS (1 << 22)
+
ipsec_main_t ipsec_main;
+
esp_async_post_next_t esp_encrypt_async_next;
esp_async_post_next_t esp_decrypt_async_next;
+clib_error_t *
+ipsec_register_next_header (vlib_main_t *vm, u8 next_header,
+ const char *next_node)
+{
+ ipsec_main_t *im = &ipsec_main;
+ const vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) next_node);
+ /* -post nodes (eg. esp4-decrypt-post) are siblings of non-post nodes (eg.
+ * esp4-decrypt) and will therefore have the same next index */
+ const vlib_node_t *esp_decrypt_nodes[] = {
+ vlib_get_node (vm, im->esp4_decrypt_node_index),
+ vlib_get_node (vm, im->esp6_decrypt_node_index),
+ vlib_get_node (vm, im->esp4_decrypt_tun_node_index),
+ vlib_get_node (vm, im->esp6_decrypt_tun_node_index),
+ };
+ uword slot, max;
+ int i;
+
+ /* looks for a next_index value that we can use for all esp decrypt nodes to
+ * avoid maintaining different next index arrays... */
+
+ slot = vlib_node_get_next (vm, esp_decrypt_nodes[0]->index, node->index);
+ max = vec_len (esp_decrypt_nodes[0]->next_nodes);
+ for (i = 1; i < ARRAY_LEN (esp_decrypt_nodes); i++)
+ {
+ /* if next node already exists, check it shares the same next_index */
+ if (slot !=
+ vlib_node_get_next (vm, esp_decrypt_nodes[i]->index, node->index))
+ return clib_error_return (
+ 0, "next node already exists with different next index");
+ /* compute a suitable slot from the max of all nodes next index */
+ max = clib_max (max, vec_len (esp_decrypt_nodes[i]->next_nodes));
+ }
+
+ if (~0 == slot)
+ {
+ /* next node not there yet, add it using the computed max */
+ slot = max;
+ for (i = 0; i < ARRAY_LEN (esp_decrypt_nodes); i++)
+ vlib_node_add_next_with_slot (vm, esp_decrypt_nodes[i]->index,
+ node->index, slot);
+ }
+
+ im->next_header_registrations[next_header] = slot;
+ return 0;
+}
+
static clib_error_t *
ipsec_check_ah_support (ipsec_sa_t * sa)
{
@@ -129,14 +182,24 @@ ipsec_add_node (vlib_main_t * vm, const char *node_name,
*out_next_index = vlib_node_add_next (vm, prev_node->index, node->index);
}
+static inline uword
+ipsec_udp_registration_key (u16 port, u8 is_ip4)
+{
+ uword key = (is_ip4) ? AF_IP4 : AF_IP6;
+
+ key |= (uword) (port << 16);
+ return key;
+}
+
void
-ipsec_unregister_udp_port (u16 port)
+ipsec_unregister_udp_port (u16 port, u8 is_ip4)
{
ipsec_main_t *im = &ipsec_main;
u32 n_regs;
- uword *p;
+ uword *p, key;
- p = hash_get (im->udp_port_registrations, port);
+ key = ipsec_udp_registration_key (port, is_ip4);
+ p = hash_get (im->udp_port_registrations, key);
ASSERT (p);
@@ -144,33 +207,35 @@ ipsec_unregister_udp_port (u16 port)
if (0 == --n_regs)
{
- udp_unregister_dst_port (vlib_get_main (), port, 1);
- hash_unset (im->udp_port_registrations, port);
+ udp_unregister_dst_port (vlib_get_main (), port, is_ip4);
+ hash_unset (im->udp_port_registrations, key);
}
else
{
- hash_unset (im->udp_port_registrations, port);
- hash_set (im->udp_port_registrations, port, n_regs);
+ hash_unset (im->udp_port_registrations, key);
+ hash_set (im->udp_port_registrations, key, n_regs);
}
}
void
-ipsec_register_udp_port (u16 port)
+ipsec_register_udp_port (u16 port, u8 is_ip4)
{
ipsec_main_t *im = &ipsec_main;
- u32 n_regs;
- uword *p;
+ u32 n_regs, node_index;
+ uword *p, key;
- p = hash_get (im->udp_port_registrations, port);
+ key = ipsec_udp_registration_key (port, is_ip4);
+ node_index =
+ (is_ip4) ? ipsec4_tun_input_node.index : ipsec6_tun_input_node.index;
+ p = hash_get (im->udp_port_registrations, key);
n_regs = (p ? p[0] : 0);
if (0 == n_regs++)
- udp_register_dst_port (vlib_get_main (), port,
- ipsec4_tun_input_node.index, 1);
+ udp_register_dst_port (vlib_get_main (), port, node_index, is_ip4);
- hash_unset (im->udp_port_registrations, port);
- hash_set (im->udp_port_registrations, port, n_regs);
+ hash_unset (im->udp_port_registrations, key);
+ hash_set (im->udp_port_registrations, key, n_regs);
}
u32
@@ -210,8 +275,7 @@ ipsec_register_esp_backend (
const char *esp6_decrypt_node_name, const char *esp6_decrypt_tun_node_name,
const char *esp_mpls_encrypt_node_tun_name,
check_support_cb_t esp_check_support_cb,
- add_del_sa_sess_cb_t esp_add_del_sa_sess_cb,
- enable_disable_cb_t enable_disable_cb)
+ add_del_sa_sess_cb_t esp_add_del_sa_sess_cb)
{
ipsec_esp_backend_t *b;
@@ -242,7 +306,6 @@ ipsec_register_esp_backend (
b->check_support_cb = esp_check_support_cb;
b->add_del_sa_sess_cb = esp_add_del_sa_sess_cb;
- b->enable_disable_cb = enable_disable_cb;
return b - im->esp_backends;
}
@@ -254,6 +317,9 @@ ipsec_rsc_in_use (ipsec_main_t * im)
if (pool_elts (ipsec_sa_pool) > 0)
return clib_error_return (0, "%d SA entries configured",
pool_elts (ipsec_sa_pool));
+ if (ipsec_itf_count () > 0)
+ return clib_error_return (0, "%d IPSec interface configured",
+ ipsec_itf_count ());
return (NULL);
}
@@ -290,18 +356,6 @@ ipsec_select_esp_backend (ipsec_main_t * im, u32 backend_idx)
if (pool_is_free_index (im->esp_backends, backend_idx))
return VNET_API_ERROR_INVALID_VALUE;
- /* disable current backend */
- if (im->esp_current_backend != ~0)
- {
- ipsec_esp_backend_t *cb = pool_elt_at_index (im->esp_backends,
- im->esp_current_backend);
- if (cb->enable_disable_cb)
- {
- if ((cb->enable_disable_cb) (0) != 0)
- return -1;
- }
- }
-
ipsec_esp_backend_t *b = pool_elt_at_index (im->esp_backends, backend_idx);
im->esp_current_backend = backend_idx;
im->esp4_encrypt_node_index = b->esp4_encrypt_node_index;
@@ -320,11 +374,6 @@ ipsec_select_esp_backend (ipsec_main_t * im, u32 backend_idx)
im->esp6_encrypt_tun_node_index = b->esp6_encrypt_tun_node_index;
im->esp_mpls_encrypt_tun_node_index = b->esp_mpls_encrypt_tun_node_index;
- if (b->enable_disable_cb)
- {
- if ((b->enable_disable_cb) (1) != 0)
- return -1;
- }
return 0;
}
@@ -334,16 +383,11 @@ ipsec_set_async_mode (u32 is_enabled)
ipsec_main_t *im = &ipsec_main;
ipsec_sa_t *sa;
- vnet_crypto_request_async_mode (is_enabled);
-
im->async_mode = is_enabled;
/* change SA crypto op data */
pool_foreach (sa, ipsec_sa_pool)
- {
- sa->crypto_op_data =
- (is_enabled ? sa->async_op_data.data : sa->sync_op_data.data);
- }
+ ipsec_sa_set_async_mode (sa, is_enabled);
}
static void
@@ -417,7 +461,7 @@ ipsec_init (vlib_main_t * vm)
vm, im, "crypto engine backend", "esp4-encrypt", "esp4-encrypt-tun",
"esp4-decrypt", "esp4-decrypt-tun", "esp6-encrypt", "esp6-encrypt-tun",
"esp6-decrypt", "esp6-decrypt-tun", "esp-mpls-encrypt-tun",
- ipsec_check_esp_support, NULL, crypto_dispatch_enable_disable);
+ ipsec_check_esp_support, NULL);
im->esp_default_backend = idx;
rv = ipsec_select_esp_backend (im, idx);
@@ -511,6 +555,37 @@ ipsec_init (vlib_main_t * vm)
a->block_align = 1;
a->icv_size = 16;
+ a = im->crypto_algs + IPSEC_CRYPTO_ALG_CHACHA20_POLY1305;
+ a->enc_op_id = VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC;
+ a->dec_op_id = VNET_CRYPTO_OP_CHACHA20_POLY1305_DEC;
+ a->alg = VNET_CRYPTO_ALG_CHACHA20_POLY1305;
+ a->iv_size = 8;
+ a->icv_size = 16;
+
+ a = im->crypto_algs + IPSEC_CRYPTO_ALG_AES_NULL_GMAC_128;
+ a->enc_op_id = VNET_CRYPTO_OP_AES_128_NULL_GMAC_ENC;
+ a->dec_op_id = VNET_CRYPTO_OP_AES_128_NULL_GMAC_DEC;
+ a->alg = VNET_CRYPTO_ALG_AES_128_GCM;
+ a->iv_size = 8;
+ a->block_align = 1;
+ a->icv_size = 16;
+
+ a = im->crypto_algs + IPSEC_CRYPTO_ALG_AES_NULL_GMAC_192;
+ a->enc_op_id = VNET_CRYPTO_OP_AES_192_NULL_GMAC_ENC;
+ a->dec_op_id = VNET_CRYPTO_OP_AES_192_NULL_GMAC_DEC;
+ a->alg = VNET_CRYPTO_ALG_AES_192_GCM;
+ a->iv_size = 8;
+ a->block_align = 1;
+ a->icv_size = 16;
+
+ a = im->crypto_algs + IPSEC_CRYPTO_ALG_AES_NULL_GMAC_256;
+ a->enc_op_id = VNET_CRYPTO_OP_AES_256_NULL_GMAC_ENC;
+ a->dec_op_id = VNET_CRYPTO_OP_AES_256_NULL_GMAC_DEC;
+ a->alg = VNET_CRYPTO_ALG_AES_256_GCM;
+ a->iv_size = 8;
+ a->block_align = 1;
+ a->icv_size = 16;
+
vec_validate (im->integ_algs, IPSEC_INTEG_N_ALG - 1);
ipsec_main_integ_alg_t *i;
@@ -550,12 +625,28 @@ ipsec_init (vlib_main_t * vm)
crypto_engine_backend_register_post_node (vm);
im->ipsec4_out_spd_hash_tbl = NULL;
- im->flow_cache_flag = 0;
+ im->output_flow_cache_flag = 0;
im->ipsec4_out_spd_flow_cache_entries = 0;
im->epoch_count = 0;
im->ipsec4_out_spd_hash_num_buckets =
IPSEC4_OUT_SPD_DEFAULT_HASH_NUM_BUCKETS;
+ im->ipsec4_in_spd_hash_tbl = NULL;
+ im->input_flow_cache_flag = 0;
+ im->ipsec4_in_spd_flow_cache_entries = 0;
+ im->input_epoch_count = 0;
+ im->ipsec4_in_spd_hash_num_buckets = IPSEC4_SPD_DEFAULT_HASH_NUM_BUCKETS;
+
+ vec_validate_init_empty_aligned (im->next_header_registrations, 255, ~0,
+ CLIB_CACHE_LINE_BYTES);
+
+ im->fp_spd_ipv4_out_is_enabled = 0;
+ im->fp_spd_ipv6_out_is_enabled = 0;
+ im->fp_spd_ipv4_in_is_enabled = 0;
+ im->fp_spd_ipv6_in_is_enabled = 0;
+
+ im->fp_lookup_hash_buckets = IPSEC_FP_HASH_LOOKUP_HASH_BUCKETS;
+
return 0;
}
@@ -566,14 +657,56 @@ ipsec_config (vlib_main_t *vm, unformat_input_t *input)
{
ipsec_main_t *im = &ipsec_main;
unformat_input_t sub_input;
+
u32 ipsec4_out_spd_hash_num_buckets;
+ u32 ipsec4_in_spd_hash_num_buckets;
+ u32 ipsec_spd_fp_num_buckets;
+ bool fp_spd_ip4_enabled = false;
+ bool fp_spd_ip6_enabled = false;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (input, "ipv4-outbound-spd-flow-cache on"))
- im->flow_cache_flag = 1;
+ if (unformat (input, "ipv6-outbound-spd-fast-path on"))
+ {
+ im->fp_spd_ipv6_out_is_enabled = 1;
+ fp_spd_ip6_enabled = true;
+ }
+ else if (unformat (input, "ipv6-outbound-spd-fast-path off"))
+ im->fp_spd_ipv6_out_is_enabled = 0;
+ else if (unformat (input, "ipv4-outbound-spd-fast-path on"))
+ {
+ im->fp_spd_ipv4_out_is_enabled = 1;
+ im->output_flow_cache_flag = 0;
+ fp_spd_ip4_enabled = true;
+ }
+ else if (unformat (input, "ipv4-outbound-spd-fast-path off"))
+ im->fp_spd_ipv4_out_is_enabled = 0;
+ else if (unformat (input, "ipv6-inbound-spd-fast-path on"))
+ {
+ im->fp_spd_ipv6_in_is_enabled = 1;
+ fp_spd_ip6_enabled = true;
+ }
+ else if (unformat (input, "ipv6-inbound-spd-fast-path off"))
+ im->fp_spd_ipv6_in_is_enabled = 0;
+ else if (unformat (input, "ipv4-inbound-spd-fast-path on"))
+ {
+ im->fp_spd_ipv4_in_is_enabled = 1;
+ im->input_flow_cache_flag = 0;
+ fp_spd_ip4_enabled = true;
+ }
+ else if (unformat (input, "ipv4-inbound-spd-fast-path off"))
+ im->fp_spd_ipv4_in_is_enabled = 0;
+ else if (unformat (input, "spd-fast-path-num-buckets %d",
+ &ipsec_spd_fp_num_buckets))
+ {
+ /* Number of bihash buckets is power of 2 >= input */
+ im->fp_lookup_hash_buckets = 1ULL
+ << max_log2 (ipsec_spd_fp_num_buckets);
+ }
+ else if (unformat (input, "ipv4-outbound-spd-flow-cache on"))
+ im->output_flow_cache_flag = im->fp_spd_ipv4_out_is_enabled ? 0 : 1;
else if (unformat (input, "ipv4-outbound-spd-flow-cache off"))
- im->flow_cache_flag = 0;
+ im->output_flow_cache_flag = 0;
else if (unformat (input, "ipv4-outbound-spd-hash-buckets %d",
&ipsec4_out_spd_hash_num_buckets))
{
@@ -581,6 +714,16 @@ ipsec_config (vlib_main_t *vm, unformat_input_t *input)
im->ipsec4_out_spd_hash_num_buckets =
1ULL << max_log2 (ipsec4_out_spd_hash_num_buckets);
}
+ else if (unformat (input, "ipv4-inbound-spd-flow-cache on"))
+ im->input_flow_cache_flag = im->fp_spd_ipv4_in_is_enabled ? 0 : 1;
+ else if (unformat (input, "ipv4-inbound-spd-flow-cache off"))
+ im->input_flow_cache_flag = 0;
+ else if (unformat (input, "ipv4-inbound-spd-hash-buckets %d",
+ &ipsec4_in_spd_hash_num_buckets))
+ {
+ im->ipsec4_in_spd_hash_num_buckets =
+ 1ULL << max_log2 (ipsec4_in_spd_hash_num_buckets);
+ }
else if (unformat (input, "ip4 %U", unformat_vlib_cli_sub_input,
&sub_input))
{
@@ -619,11 +762,24 @@ ipsec_config (vlib_main_t *vm, unformat_input_t *input)
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
}
- if (im->flow_cache_flag)
+ if (im->output_flow_cache_flag)
{
vec_add2 (im->ipsec4_out_spd_hash_tbl, im->ipsec4_out_spd_hash_tbl,
im->ipsec4_out_spd_hash_num_buckets);
}
+ if (im->input_flow_cache_flag)
+ {
+ vec_add2 (im->ipsec4_in_spd_hash_tbl, im->ipsec4_in_spd_hash_tbl,
+ im->ipsec4_in_spd_hash_num_buckets);
+ }
+
+ if (fp_spd_ip4_enabled)
+ pool_alloc_aligned (im->fp_ip4_lookup_hashes_pool,
+ IPSEC_FP_IP4_HASHES_POOL_SIZE, CLIB_CACHE_LINE_BYTES);
+
+ if (fp_spd_ip6_enabled)
+ pool_alloc_aligned (im->fp_ip6_lookup_hashes_pool,
+ IPSEC_FP_IP6_HASHES_POOL_SIZE, CLIB_CACHE_LINE_BYTES);
return 0;
}
diff --git a/src/vnet/ipsec/ipsec.h b/src/vnet/ipsec/ipsec.h
index 968d377cea0..4aa09d7560e 100644
--- a/src/vnet/ipsec/ipsec.h
+++ b/src/vnet/ipsec/ipsec.h
@@ -30,13 +30,16 @@
#include <vppinfra/bihash_24_16.h>
+#define IPSEC_FP_IP4_HASHES_POOL_SIZE 128
+#define IPSEC_FP_IP6_HASHES_POOL_SIZE 128
+
typedef clib_error_t *(*add_del_sa_sess_cb_t) (u32 sa_index, u8 is_add);
typedef clib_error_t *(*check_support_cb_t) (ipsec_sa_t * sa);
typedef clib_error_t *(*enable_disable_cb_t) (int is_enable);
typedef struct
{
- u64 key[2];
+ u64 key[2]; // 16 bytes
u64 value;
i32 bucket_lock;
u32 un_used;
@@ -54,6 +57,18 @@ typedef union
ipsec4_hash_kv_16_8_t kv_16_8;
} ipsec4_spd_5tuple_t;
+typedef union
+{
+ struct
+ {
+ ip4_address_t ip4_src_addr;
+ ip4_address_t ip4_dest_addr;
+ ipsec_spd_policy_type_t policy_type;
+ u8 pad[4];
+ }; // 16 bytes total
+ ipsec4_hash_kv_16_8_t kv_16_8;
+} ipsec4_inbound_spd_tuple_t;
+
typedef struct
{
u8 *name;
@@ -78,8 +93,6 @@ typedef struct
add_del_sa_sess_cb_t add_del_sa_sess_cb;
/* check support function */
check_support_cb_t check_support_cb;
- /* enable or disable function */
- enable_disable_cb_t enable_disable_cb;
u32 esp4_encrypt_node_index;
u32 esp4_decrypt_node_index;
u32 esp4_encrypt_next_index;
@@ -131,12 +144,27 @@ typedef struct
ipsec_spd_t *spds;
/* pool of policies */
ipsec_policy_t *policies;
+ /* pool of bihash tables for ipv4 ipsec rules */
+ clib_bihash_16_8_t *fp_ip4_lookup_hashes_pool;
+ /* pool of bihash tables for ipv6 ipsec rules */
+ clib_bihash_40_8_t *fp_ip6_lookup_hashes_pool;
+
+ u32 fp_spd_ipv4_out_is_enabled;
+ u32 fp_spd_ipv4_in_is_enabled;
+ u32 fp_spd_ipv6_out_is_enabled;
+ u32 fp_spd_ipv6_in_is_enabled;
+ /* pool of fast path mask types */
+ ipsec_fp_mask_type_entry_t *fp_mask_types;
+ u32 fp_lookup_hash_buckets; /* number of buckets should be power of two */
/* hash tables of UDP port registrations */
uword *udp_port_registrations;
uword *tunnel_index_by_key;
+ /* next_header protocol registration */
+ u16 *next_header_registrations;
+
/* convenience */
vlib_main_t *vlib_main;
vnet_main_t *vnet_main;
@@ -151,6 +179,7 @@ typedef struct
uword *ipsec_if_by_sw_if_index;
ipsec4_hash_kv_16_8_t *ipsec4_out_spd_hash_tbl;
+ ipsec4_hash_kv_16_8_t *ipsec4_in_spd_hash_tbl;
clib_bihash_8_16_t tun4_protect_by_key;
clib_bihash_24_16_t tun6_protect_by_key;
@@ -181,14 +210,6 @@ typedef struct
u32 ah6_encrypt_next_index;
u32 ah6_decrypt_next_index;
- /* tun nodes to drop packets when no crypto alg set on outbound SA */
- u32 esp4_no_crypto_tun_node_index;
- u32 esp6_no_crypto_tun_node_index;
-
- /* tun nodes for encrypt on L2 interfaces */
- u32 esp4_encrypt_l2_tun_node_index;
- u32 esp6_encrypt_l2_tun_node_index;
-
/* pool of ah backends */
ipsec_ah_backend_t *ah_backends;
/* pool of esp backends */
@@ -231,9 +252,15 @@ typedef struct
u32 ipsec4_out_spd_hash_num_buckets;
u32 ipsec4_out_spd_flow_cache_entries;
u32 epoch_count;
+ u8 output_flow_cache_flag;
+
+ u32 ipsec4_in_spd_hash_num_buckets;
+ u32 ipsec4_in_spd_flow_cache_entries;
+ u32 input_epoch_count;
+ u8 input_flow_cache_flag;
+
u8 async_mode;
u16 msg_id_base;
- u8 flow_cache_flag;
} ipsec_main_t;
typedef enum ipsec_format_flags_t_
@@ -318,6 +345,23 @@ ipsec_spinlock_unlock (i32 *lock)
clib_atomic_release (lock);
}
+/* Special case to drop or hand off packets for sync/async modes.
+ *
+ * Different than sync mode, async mode only enqueue drop or hand-off packets
+ * to next nodes.
+ */
+always_inline void
+ipsec_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node,
+ u32 thread_index, u32 err, u32 ipsec_sa_err, u16 index,
+ u16 *nexts, u16 drop_next, u32 sa_index)
+{
+ nexts[index] = drop_next;
+ b->error = node->errors[err];
+ if (PREDICT_TRUE (ipsec_sa_err != ~0))
+ vlib_increment_simple_counter (&ipsec_sa_err_counters[ipsec_sa_err],
+ thread_index, sa_index, 1);
+}
+
u32 ipsec_register_ah_backend (vlib_main_t * vm, ipsec_main_t * im,
const char *name,
const char *ah4_encrypt_node_name,
@@ -335,8 +379,7 @@ u32 ipsec_register_esp_backend (
const char *esp6_decrypt_node_name, const char *esp6_decrypt_tun_node_name,
const char *esp_mpls_encrypt_tun_node_name,
check_support_cb_t esp_check_support_cb,
- add_del_sa_sess_cb_t esp_add_del_sa_sess_cb,
- enable_disable_cb_t enable_disable_cb);
+ add_del_sa_sess_cb_t esp_add_del_sa_sess_cb);
int ipsec_select_ah_backend (ipsec_main_t * im, u32 ah_backend_idx);
int ipsec_select_esp_backend (ipsec_main_t * im, u32 esp_backend_idx);
@@ -344,8 +387,12 @@ int ipsec_select_esp_backend (ipsec_main_t * im, u32 esp_backend_idx);
clib_error_t *ipsec_rsc_in_use (ipsec_main_t * im);
void ipsec_set_async_mode (u32 is_enabled);
-extern void ipsec_register_udp_port (u16 udp_port);
-extern void ipsec_unregister_udp_port (u16 udp_port);
+extern void ipsec_register_udp_port (u16 udp_port, u8 is_ip4);
+extern void ipsec_unregister_udp_port (u16 udp_port, u8 is_ip4);
+
+extern clib_error_t *ipsec_register_next_header (vlib_main_t *vm,
+ u8 next_header,
+ const char *next_node);
#endif /* __IPSEC_H__ */
diff --git a/src/vnet/ipsec/ipsec_api.c b/src/vnet/ipsec/ipsec_api.c
index 11bfa41b4f1..21216b1a614 100644
--- a/src/vnet/ipsec/ipsec_api.c
+++ b/src/vnet/ipsec/ipsec_api.c
@@ -124,6 +124,7 @@ typedef struct ipsec_dump_walk_ctx_t_
{
vl_api_registration_t *reg;
u32 context;
+ u32 sw_if_index;
} ipsec_dump_walk_ctx_t;
static walk_rc_t
@@ -149,12 +150,10 @@ send_ipsec_tunnel_protect_details (index_t itpi, void *arg)
sa = ipsec_sa_get (itp->itp_out_sa);
mp->tun.sa_out = htonl (sa->id);
mp->tun.n_sa_in = itp->itp_n_sa_in;
- /* *INDENT-OFF* */
FOR_EACH_IPSEC_PROTECT_INPUT_SA(itp, sa,
({
mp->tun.sa_in[ii++] = htonl (sa->id);
}));
- /* *INDENT-ON* */
vl_api_send_msg (ctx->reg, (u8 *) mp);
@@ -232,7 +231,8 @@ static void vl_api_ipsec_spd_entry_add_del_t_handler
p.is_ipv6 = (itype == IP46_TYPE_IP6);
- p.protocol = mp->entry.protocol;
+ p.protocol =
+ mp->entry.protocol ? mp->entry.protocol : IPSEC_POLICY_PROTOCOL_ANY;
p.rport.start = ntohs (mp->entry.remote_port_start);
p.rport.stop = ntohs (mp->entry.remote_port_stop);
p.lport.start = ntohs (mp->entry.local_port_start);
@@ -262,12 +262,69 @@ static void vl_api_ipsec_spd_entry_add_del_t_handler
goto out;
out:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IPSEC_SPD_ENTRY_ADD_DEL_REPLY,
({
rmp->stat_index = ntohl(stat_index);
}));
- /* *INDENT-ON* */
+}
+
+static void
+vl_api_ipsec_spd_entry_add_del_v2_t_handler (
+ vl_api_ipsec_spd_entry_add_del_v2_t *mp)
+{
+ vlib_main_t *vm __attribute__ ((unused)) = vlib_get_main ();
+ vl_api_ipsec_spd_entry_add_del_reply_t *rmp;
+ ip46_type_t itype;
+ u32 stat_index;
+ int rv;
+
+ stat_index = ~0;
+
+ ipsec_policy_t p;
+
+ clib_memset (&p, 0, sizeof (p));
+
+ p.id = ntohl (mp->entry.spd_id);
+ p.priority = ntohl (mp->entry.priority);
+
+ itype = ip_address_decode (&mp->entry.remote_address_start, &p.raddr.start);
+ ip_address_decode (&mp->entry.remote_address_stop, &p.raddr.stop);
+ ip_address_decode (&mp->entry.local_address_start, &p.laddr.start);
+ ip_address_decode (&mp->entry.local_address_stop, &p.laddr.stop);
+
+ p.is_ipv6 = (itype == IP46_TYPE_IP6);
+
+ p.protocol = mp->entry.protocol;
+ p.rport.start = ntohs (mp->entry.remote_port_start);
+ p.rport.stop = ntohs (mp->entry.remote_port_stop);
+ p.lport.start = ntohs (mp->entry.local_port_start);
+ p.lport.stop = ntohs (mp->entry.local_port_stop);
+
+ rv = ipsec_spd_action_decode (mp->entry.policy, &p.policy);
+
+ if (rv)
+ goto out;
+
+ /* policy action resolve unsupported */
+ if (p.policy == IPSEC_POLICY_ACTION_RESOLVE)
+ {
+ clib_warning ("unsupported action: 'resolve'");
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+ goto out;
+ }
+ p.sa_id = ntohl (mp->entry.sa_id);
+ rv =
+ ipsec_policy_mk_type (mp->entry.is_outbound, p.is_ipv6, p.policy, &p.type);
+ if (rv)
+ goto out;
+
+ rv = ipsec_add_del_policy (vm, &p, mp->is_add, &stat_index);
+ if (rv)
+ goto out;
+
+out:
+ REPLY_MACRO2 (VL_API_IPSEC_SPD_ENTRY_ADD_DEL_V2_REPLY,
+ ({ rmp->stat_index = ntohl (stat_index); }));
}
static void vl_api_ipsec_sad_entry_add_del_t_handler
@@ -321,18 +378,16 @@ static void vl_api_ipsec_sad_entry_add_del_t_handler
ip_address_decode2 (&mp->entry.tunnel_src, &tun.t_src);
ip_address_decode2 (&mp->entry.tunnel_dst, &tun.t_dst);
- rv = ipsec_sa_add_and_lock (id, spi, proto, crypto_alg, &crypto_key,
- integ_alg, &integ_key, flags, mp->entry.salt,
- htons (mp->entry.udp_src_port),
- htons (mp->entry.udp_dst_port), &tun, &sa_index);
+ rv = ipsec_sa_add_and_lock (
+ id, spi, proto, crypto_alg, &crypto_key, integ_alg, &integ_key, flags,
+ mp->entry.salt, htons (mp->entry.udp_src_port),
+ htons (mp->entry.udp_dst_port), 0, &tun, &sa_index);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IPSEC_SAD_ENTRY_ADD_DEL_REPLY,
{
rmp->stat_index = htonl (sa_index);
});
- /* *INDENT-ON* */
}
static void vl_api_ipsec_sad_entry_add_del_v2_t_handler
@@ -395,18 +450,16 @@ static void vl_api_ipsec_sad_entry_add_del_v2_t_handler
ip_address_decode2 (&mp->entry.tunnel_src, &tun.t_src);
ip_address_decode2 (&mp->entry.tunnel_dst, &tun.t_dst);
- rv = ipsec_sa_add_and_lock (
- id, spi, proto, crypto_alg, &crypto_key, integ_alg, &integ_key, flags,
- mp->entry.salt, htons (mp->entry.udp_src_port),
- htons (mp->entry.udp_dst_port), &tun, &sa_index);
+ rv = ipsec_sa_add_and_lock (
+ id, spi, proto, crypto_alg, &crypto_key, integ_alg, &integ_key, flags,
+ mp->entry.salt, htons (mp->entry.udp_src_port),
+ htons (mp->entry.udp_dst_port), 0, &tun, &sa_index);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IPSEC_SAD_ENTRY_ADD_DEL_V2_REPLY,
{
rmp->stat_index = htonl (sa_index);
});
- /* *INDENT-ON* */
}
static int
@@ -419,7 +472,7 @@ ipsec_sad_entry_add_v3 (const vl_api_ipsec_sad_entry_v3_t *entry,
ipsec_protocol_t proto;
ipsec_sa_flags_t flags;
u32 id, spi;
- tunnel_t tun;
+ tunnel_t tun = { 0 };
int rv;
id = ntohl (entry->sad_id);
@@ -453,10 +506,10 @@ ipsec_sad_entry_add_v3 (const vl_api_ipsec_sad_entry_v3_t *entry,
ipsec_key_decode (&entry->crypto_key, &crypto_key);
ipsec_key_decode (&entry->integrity_key, &integ_key);
- return ipsec_sa_add_and_lock (id, spi, proto, crypto_alg, &crypto_key,
- integ_alg, &integ_key, flags, entry->salt,
- htons (entry->udp_src_port),
- htons (entry->udp_dst_port), &tun, sa_index);
+ return ipsec_sa_add_and_lock (
+ id, spi, proto, crypto_alg, &crypto_key, integ_alg, &integ_key, flags,
+ entry->salt, htons (entry->udp_src_port), htons (entry->udp_dst_port), 0,
+ &tun, sa_index);
}
static void
@@ -482,6 +535,56 @@ vl_api_ipsec_sad_entry_add_del_v3_t_handler (
{ rmp->stat_index = htonl (sa_index); });
}
+static int
+ipsec_sad_entry_add_v4 (const vl_api_ipsec_sad_entry_v4_t *entry,
+ u32 *sa_index)
+{
+ ipsec_key_t crypto_key, integ_key;
+ ipsec_crypto_alg_t crypto_alg;
+ ipsec_integ_alg_t integ_alg;
+ ipsec_protocol_t proto;
+ ipsec_sa_flags_t flags;
+ u32 id, spi;
+ tunnel_t tun = { 0 };
+ int rv;
+
+ id = ntohl (entry->sad_id);
+ spi = ntohl (entry->spi);
+
+ rv = ipsec_proto_decode (entry->protocol, &proto);
+
+ if (rv)
+ return rv;
+
+ rv = ipsec_crypto_algo_decode (entry->crypto_algorithm, &crypto_alg);
+
+ if (rv)
+ return rv;
+
+ rv = ipsec_integ_algo_decode (entry->integrity_algorithm, &integ_alg);
+
+ if (rv)
+ return rv;
+
+ flags = ipsec_sa_flags_decode (entry->flags);
+
+ if (flags & IPSEC_SA_FLAG_IS_TUNNEL)
+ {
+ rv = tunnel_decode (&entry->tunnel, &tun);
+
+ if (rv)
+ return rv;
+ }
+
+ ipsec_key_decode (&entry->crypto_key, &crypto_key);
+ ipsec_key_decode (&entry->integrity_key, &integ_key);
+
+ return ipsec_sa_add_and_lock (
+ id, spi, proto, crypto_alg, &crypto_key, integ_alg, &integ_key, flags,
+ entry->salt, htons (entry->udp_src_port), htons (entry->udp_dst_port),
+ ntohl (entry->anti_replay_window_size), &tun, sa_index);
+}
+
static void
vl_api_ipsec_sad_entry_del_t_handler (vl_api_ipsec_sad_entry_del_t *mp)
{
@@ -507,6 +610,74 @@ vl_api_ipsec_sad_entry_add_t_handler (vl_api_ipsec_sad_entry_add_t *mp)
}
static void
+vl_api_ipsec_sad_entry_add_v2_t_handler (vl_api_ipsec_sad_entry_add_v2_t *mp)
+{
+ vl_api_ipsec_sad_entry_add_reply_t *rmp;
+ u32 sa_index = ~0;
+ int rv;
+
+ rv = ipsec_sad_entry_add_v4 (&mp->entry, &sa_index);
+
+ REPLY_MACRO2 (VL_API_IPSEC_SAD_ENTRY_ADD_V2_REPLY,
+ { rmp->stat_index = htonl (sa_index); });
+}
+
+static void
+vl_api_ipsec_sad_entry_update_t_handler (vl_api_ipsec_sad_entry_update_t *mp)
+{
+ vl_api_ipsec_sad_entry_update_reply_t *rmp;
+ u32 id;
+ tunnel_t tun = { 0 };
+ int rv;
+
+ id = ntohl (mp->sad_id);
+
+ if (mp->is_tun)
+ {
+ rv = tunnel_decode (&mp->tunnel, &tun);
+
+ if (rv)
+ goto out;
+ }
+
+ rv = ipsec_sa_update (id, htons (mp->udp_src_port), htons (mp->udp_dst_port),
+ &tun, mp->is_tun);
+
+out:
+ REPLY_MACRO (VL_API_IPSEC_SAD_ENTRY_UPDATE_REPLY);
+}
+
+static void
+vl_api_ipsec_sad_bind_t_handler (vl_api_ipsec_sad_bind_t *mp)
+{
+ vl_api_ipsec_sad_bind_reply_t *rmp;
+ u32 sa_id;
+ u32 worker;
+ int rv;
+
+ sa_id = ntohl (mp->sa_id);
+ worker = ntohl (mp->worker);
+
+ rv = ipsec_sa_bind (sa_id, worker, true /* bind */);
+
+ REPLY_MACRO (VL_API_IPSEC_SAD_BIND_REPLY);
+}
+
+static void
+vl_api_ipsec_sad_unbind_t_handler (vl_api_ipsec_sad_unbind_t *mp)
+{
+ vl_api_ipsec_sad_unbind_reply_t *rmp;
+ u32 sa_id;
+ int rv;
+
+ sa_id = ntohl (mp->sa_id);
+
+ rv = ipsec_sa_bind (sa_id, ~0, false /* bind */);
+
+ REPLY_MACRO (VL_API_IPSEC_SAD_UNBIND_REPLY);
+}
+
+static void
send_ipsec_spds_details (ipsec_spd_t * spd, vl_api_registration_t * reg,
u32 context)
{
@@ -660,12 +831,10 @@ vl_api_ipsec_spd_interface_dump_t_handler (vl_api_ipsec_spd_interface_dump_t *
if (mp->spd_index_valid)
{
spd_index = ntohl (mp->spd_index);
- /* *INDENT-OFF* */
hash_foreach(k, v, im->spd_index_by_sw_if_index, ({
if (v == spd_index)
send_ipsec_spd_interface_details(reg, v, k, mp->context);
}));
- /* *INDENT-ON* */
}
else
{
@@ -688,12 +857,10 @@ vl_api_ipsec_itf_create_t_handler (vl_api_ipsec_itf_create_t * mp)
if (!rv)
rv = ipsec_itf_create (ntohl (mp->itf.user_instance), mode, &sw_if_index);
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IPSEC_ITF_CREATE_REPLY,
({
rmp->sw_if_index = htonl (sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -713,6 +880,9 @@ send_ipsec_itf_details (ipsec_itf_t *itf, void *arg)
ipsec_dump_walk_ctx_t *ctx = arg;
vl_api_ipsec_itf_details_t *mp;
+ if (~0 != ctx->sw_if_index && ctx->sw_if_index != itf->ii_sw_if_index)
+ return (WALK_CONTINUE);
+
mp = vl_msg_api_alloc (sizeof (*mp));
clib_memset (mp, 0, sizeof (*mp));
mp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_IPSEC_ITF_DETAILS);
@@ -738,6 +908,7 @@ vl_api_ipsec_itf_dump_t_handler (vl_api_ipsec_itf_dump_t * mp)
ipsec_dump_walk_ctx_t ctx = {
.reg = reg,
.context = mp->context,
+ .sw_if_index = ntohl (mp->sw_if_index),
};
ipsec_itf_walk (send_ipsec_itf_details, &ctx);
@@ -833,7 +1004,10 @@ send_ipsec_sa_details (ipsec_sa_t * sa, void *arg)
mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
}
if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
- mp->replay_window = clib_host_to_net_u64 (sa->replay_window);
+ {
+ mp->replay_window =
+ clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa));
+ }
mp->stat_index = clib_host_to_net_u32 (sa->stat_index);
@@ -920,7 +1094,10 @@ send_ipsec_sa_v2_details (ipsec_sa_t * sa, void *arg)
mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
}
if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
- mp->replay_window = clib_host_to_net_u64 (sa->replay_window);
+ {
+ mp->replay_window =
+ clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa));
+ }
mp->stat_index = clib_host_to_net_u32 (sa->stat_index);
@@ -1000,7 +1177,10 @@ send_ipsec_sa_v3_details (ipsec_sa_t *sa, void *arg)
mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
}
if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
- mp->replay_window = clib_host_to_net_u64 (sa->replay_window);
+ {
+ mp->replay_window =
+ clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa));
+ }
mp->stat_index = clib_host_to_net_u32 (sa->stat_index);
@@ -1026,8 +1206,179 @@ vl_api_ipsec_sa_v3_dump_t_handler (vl_api_ipsec_sa_v3_dump_t *mp)
ipsec_sa_walk (send_ipsec_sa_v3_details, &ctx);
}
+static walk_rc_t
+send_ipsec_sa_v4_details (ipsec_sa_t *sa, void *arg)
+{
+ ipsec_dump_walk_ctx_t *ctx = arg;
+ vl_api_ipsec_sa_v4_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ clib_memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_IPSEC_SA_V4_DETAILS);
+ mp->context = ctx->context;
+
+ mp->entry.sad_id = htonl (sa->id);
+ mp->entry.spi = htonl (sa->spi);
+ mp->entry.protocol = ipsec_proto_encode (sa->protocol);
+
+ mp->entry.crypto_algorithm = ipsec_crypto_algo_encode (sa->crypto_alg);
+ ipsec_key_encode (&sa->crypto_key, &mp->entry.crypto_key);
+
+ mp->entry.integrity_algorithm = ipsec_integ_algo_encode (sa->integ_alg);
+ ipsec_key_encode (&sa->integ_key, &mp->entry.integrity_key);
+
+ mp->entry.flags = ipsec_sad_flags_encode (sa);
+ mp->entry.salt = clib_host_to_net_u32 (sa->salt);
+
+ if (ipsec_sa_is_set_IS_PROTECT (sa))
+ {
+ ipsec_sa_dump_match_ctx_t ctx = {
+ .sai = sa - ipsec_sa_pool,
+ .sw_if_index = ~0,
+ };
+ ipsec_tun_protect_walk (ipsec_sa_dump_match_sa, &ctx);
+
+ mp->sw_if_index = htonl (ctx.sw_if_index);
+ }
+ else
+ mp->sw_if_index = ~0;
+
+ if (ipsec_sa_is_set_IS_TUNNEL (sa))
+ tunnel_encode (&sa->tunnel, &mp->entry.tunnel);
+
+ if (ipsec_sa_is_set_UDP_ENCAP (sa))
+ {
+ mp->entry.udp_src_port = sa->udp_hdr.src_port;
+ mp->entry.udp_dst_port = sa->udp_hdr.dst_port;
+ }
+
+ mp->seq_outbound = clib_host_to_net_u64 (((u64) sa->seq));
+ mp->last_seq_inbound = clib_host_to_net_u64 (((u64) sa->seq));
+ if (ipsec_sa_is_set_USE_ESN (sa))
+ {
+ mp->seq_outbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
+ mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
+ }
+ if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
+ {
+ mp->replay_window =
+ clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa));
+ }
+
+ mp->thread_index = clib_host_to_net_u32 (sa->thread_index);
+ mp->stat_index = clib_host_to_net_u32 (sa->stat_index);
+
+ vl_api_send_msg (ctx->reg, (u8 *) mp);
+
+ return (WALK_CONTINUE);
+}
+
+static void
+vl_api_ipsec_sa_v4_dump_t_handler (vl_api_ipsec_sa_v4_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ ipsec_dump_walk_ctx_t ctx = {
+ .reg = reg,
+ .context = mp->context,
+ };
+
+ ipsec_sa_walk (send_ipsec_sa_v4_details, &ctx);
+}
+
+static walk_rc_t
+send_ipsec_sa_v5_details (ipsec_sa_t *sa, void *arg)
+{
+ ipsec_dump_walk_ctx_t *ctx = arg;
+ vl_api_ipsec_sa_v5_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ clib_memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_IPSEC_SA_V5_DETAILS);
+ mp->context = ctx->context;
+
+ mp->entry.sad_id = htonl (sa->id);
+ mp->entry.spi = htonl (sa->spi);
+ mp->entry.protocol = ipsec_proto_encode (sa->protocol);
+
+ mp->entry.crypto_algorithm = ipsec_crypto_algo_encode (sa->crypto_alg);
+ ipsec_key_encode (&sa->crypto_key, &mp->entry.crypto_key);
+
+ mp->entry.integrity_algorithm = ipsec_integ_algo_encode (sa->integ_alg);
+ ipsec_key_encode (&sa->integ_key, &mp->entry.integrity_key);
+
+ mp->entry.flags = ipsec_sad_flags_encode (sa);
+ mp->entry.salt = clib_host_to_net_u32 (sa->salt);
+
+ if (ipsec_sa_is_set_IS_PROTECT (sa))
+ {
+ ipsec_sa_dump_match_ctx_t ctx = {
+ .sai = sa - ipsec_sa_pool,
+ .sw_if_index = ~0,
+ };
+ ipsec_tun_protect_walk (ipsec_sa_dump_match_sa, &ctx);
+
+ mp->sw_if_index = htonl (ctx.sw_if_index);
+ }
+ else
+ mp->sw_if_index = ~0;
+
+ if (ipsec_sa_is_set_IS_TUNNEL (sa))
+ tunnel_encode (&sa->tunnel, &mp->entry.tunnel);
+
+ if (ipsec_sa_is_set_UDP_ENCAP (sa))
+ {
+ mp->entry.udp_src_port = sa->udp_hdr.src_port;
+ mp->entry.udp_dst_port = sa->udp_hdr.dst_port;
+ }
+
+ mp->seq_outbound = clib_host_to_net_u64 (((u64) sa->seq));
+ mp->last_seq_inbound = clib_host_to_net_u64 (((u64) sa->seq));
+ if (ipsec_sa_is_set_USE_ESN (sa))
+ {
+ mp->seq_outbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
+ mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
+ }
+ if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
+ {
+ mp->replay_window =
+ clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa));
+
+ mp->entry.anti_replay_window_size =
+ clib_host_to_net_u32 (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (sa));
+ }
+
+ mp->thread_index = clib_host_to_net_u32 (sa->thread_index);
+ mp->stat_index = clib_host_to_net_u32 (sa->stat_index);
+
+ vl_api_send_msg (ctx->reg, (u8 *) mp);
+
+ return (WALK_CONTINUE);
+}
+
+static void
+vl_api_ipsec_sa_v5_dump_t_handler (vl_api_ipsec_sa_v5_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ ipsec_dump_walk_ctx_t ctx = {
+ .reg = reg,
+ .context = mp->context,
+ };
+
+ ipsec_sa_walk (send_ipsec_sa_v5_details, &ctx);
+}
+
static void
-vl_api_ipsec_backend_dump_t_handler (vl_api_ipsec_backend_dump_t * mp)
+vl_api_ipsec_backend_dump_t_handler (vl_api_ipsec_backend_dump_t *mp)
{
vl_api_registration_t *rp;
ipsec_main_t *im = &ipsec_main;
@@ -1043,7 +1394,6 @@ vl_api_ipsec_backend_dump_t_handler (vl_api_ipsec_backend_dump_t * mp)
ipsec_ah_backend_t *ab;
ipsec_esp_backend_t *eb;
- /* *INDENT-OFF* */
pool_foreach (ab, im->ah_backends) {
vl_api_ipsec_backend_details_t *mp = vl_msg_api_alloc (sizeof (*mp));
clib_memset (mp, 0, sizeof (*mp));
@@ -1068,7 +1418,6 @@ vl_api_ipsec_backend_dump_t_handler (vl_api_ipsec_backend_dump_t * mp)
mp->active = mp->index == im->esp_current_backend ? 1 : 0;
vl_api_send_msg (rp, (u8 *)mp);
}
- /* *INDENT-ON* */
}
static void
diff --git a/src/vnet/ipsec/ipsec_cli.c b/src/vnet/ipsec/ipsec_cli.c
index 95e8145fe92..07d9df8f204 100644
--- a/src/vnet/ipsec/ipsec_cli.c
+++ b/src/vnet/ipsec/ipsec_cli.c
@@ -71,14 +71,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_spd_command, static) = {
.path = "set interface ipsec spd",
.short_help =
"set interface ipsec spd <int> <id>",
.function = set_interface_spd_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
ipsec_sa_add_del_command_fn (vlib_main_t * vm,
@@ -88,6 +86,7 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm,
unformat_input_t _line_input, *line_input = &_line_input;
ipsec_crypto_alg_t crypto_alg;
ipsec_integ_alg_t integ_alg;
+ u32 anti_replay_window_size;
ipsec_protocol_t proto;
ipsec_sa_flags_t flags;
clib_error_t *error;
@@ -105,6 +104,7 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm,
is_add = 0;
flags = IPSEC_SA_FLAG_NONE;
proto = IPSEC_PROTOCOL_ESP;
+ anti_replay_window_size = 0;
integ_alg = IPSEC_INTEG_ALG_NONE;
crypto_alg = IPSEC_CRYPTO_ALG_NONE;
udp_src = udp_dst = IPSEC_UDP_PORT_NONE;
@@ -143,7 +143,7 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm,
else if (unformat (line_input, "integ-alg %U",
unformat_ipsec_integ_alg, &integ_alg))
;
- else if (unformat (line_input, " %U", unformat_tunnel, &tun))
+ else if (unformat (line_input, "%U", unformat_tunnel, &tun))
{
flags |= IPSEC_SA_FLAG_IS_TUNNEL;
if (AF_IP6 == tunnel_get_af (&tun))
@@ -153,6 +153,9 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm,
udp_src = i;
else if (unformat (line_input, "udp-dst-port %d", &i))
udp_dst = i;
+ else if (unformat (line_input, "anti-replay-size %d",
+ &anti_replay_window_size))
+ flags |= IPSEC_SA_FLAG_USE_ANTI_REPLAY;
else if (unformat (line_input, "inbound"))
flags |= IPSEC_SA_FLAG_IS_INBOUND;
else if (unformat (line_input, "use-anti-replay"))
@@ -184,9 +187,10 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm,
error = clib_error_return (0, "missing spi");
goto done;
}
- rv = ipsec_sa_add_and_lock (id, spi, proto, crypto_alg, &ck, integ_alg,
- &ik, flags, clib_host_to_net_u32 (salt),
- udp_src, udp_dst, &tun, &sai);
+ rv =
+ ipsec_sa_add_and_lock (id, spi, proto, crypto_alg, &ck, integ_alg, &ik,
+ flags, clib_host_to_net_u32 (salt), udp_src,
+ udp_dst, anti_replay_window_size, &tun, &sai);
}
else
{
@@ -202,14 +206,77 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_sa_add_del_command, static) = {
.path = "ipsec sa",
.short_help =
"ipsec sa [add|del]",
.function = ipsec_sa_add_del_command_fn,
};
-/* *INDENT-ON* */
+
+static clib_error_t *
+ipsec_sa_bind_cli (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 id = ~0;
+ u32 worker = ~0;
+ bool bind = 1;
+ int rv;
+ clib_error_t *error = NULL;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "unbind"))
+ bind = 0;
+ else if (id == ~0 && unformat (line_input, "%u", &id))
+ ;
+ else if (unformat (line_input, "%u", &worker))
+ ;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (id == ~0)
+ {
+ error = clib_error_return (0, "please specify SA ID");
+ goto done;
+ }
+
+ if (bind && ~0 == worker)
+ {
+ error = clib_error_return (0, "please specify worker to bind to");
+ goto done;
+ }
+
+ rv = ipsec_sa_bind (id, worker, bind);
+ switch (rv)
+ {
+ case VNET_API_ERROR_INVALID_VALUE:
+ error = clib_error_return (0, "please specify a valid SA ID");
+ break;
+ case VNET_API_ERROR_INVALID_WORKER:
+ error = clib_error_return (0, "please specify a valid worker index");
+ break;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (ipsec_sa_bind_cmd, static) = {
+ .path = "ipsec sa bind",
+ .short_help = "ipsec sa [unbind] <sa-id> <worker>",
+ .function = ipsec_sa_bind_cli,
+};
static clib_error_t *
ipsec_spd_add_del_command_fn (vlib_main_t * vm,
@@ -254,14 +321,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_spd_add_del_command, static) = {
.path = "ipsec spd",
.short_help =
"ipsec spd [add|del] <id>",
.function = ipsec_spd_add_del_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -279,6 +344,7 @@ ipsec_policy_add_del_command_fn (vlib_main_t * vm,
clib_memset (&p, 0, sizeof (p));
p.lport.stop = p.rport.stop = ~0;
remote_range_set = local_range_set = is_outbound = 0;
+ p.protocol = IPSEC_POLICY_PROTOCOL_ANY;
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
@@ -395,27 +461,23 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_policy_add_del_command, static) = {
.path = "ipsec policy",
.short_help =
"ipsec policy [add|del] spd <id> priority <n> ",
.function = ipsec_policy_add_del_command_fn,
};
-/* *INDENT-ON* */
static void
ipsec_sa_show_all (vlib_main_t * vm, ipsec_main_t * im, u8 detail)
{
u32 sai;
- /* *INDENT-OFF* */
pool_foreach_index (sai, ipsec_sa_pool)
{
vlib_cli_output (vm, "%U", format_ipsec_sa, sai,
(detail ? IPSEC_FORMAT_DETAIL : IPSEC_FORMAT_BRIEF));
}
- /* *INDENT-ON* */
}
static void
@@ -423,16 +485,18 @@ ipsec_spd_show_all (vlib_main_t * vm, ipsec_main_t * im)
{
u32 spdi;
- /* *INDENT-OFF* */
pool_foreach_index (spdi, im->spds) {
vlib_cli_output(vm, "%U", format_ipsec_spd, spdi);
}
- if (im->flow_cache_flag)
+ if (im->output_flow_cache_flag)
+ {
+ vlib_cli_output (vm, "%U", format_ipsec_out_spd_flow_cache);
+ }
+ if (im->input_flow_cache_flag)
{
- vlib_cli_output (vm, "%U", format_ipsec_spd_flow_cache);
+ vlib_cli_output (vm, "%U", format_ipsec_in_spd_flow_cache);
}
- /* *INDENT-ON* */
}
static void
@@ -443,14 +507,12 @@ ipsec_spd_bindings_show_all (vlib_main_t * vm, ipsec_main_t * im)
vlib_cli_output (vm, "SPD Bindings:");
- /* *INDENT-OFF* */
hash_foreach(sw_if_index, spd_id, im->spd_index_by_sw_if_index, ({
spd = pool_elt_at_index (im->spds, spd_id);
vlib_cli_output (vm, " %d -> %U", spd->id,
format_vnet_sw_if_index_name, im->vnet_main,
sw_if_index);
}));
- /* *INDENT-ON* */
}
static walk_rc_t
@@ -484,13 +546,11 @@ show_ipsec_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ipsec_command, static) = {
.path = "show ipsec all",
.short_help = "show ipsec all",
.function = show_ipsec_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_ipsec_sa_command_fn (vlib_main_t * vm,
@@ -535,12 +595,10 @@ clear_ipsec_sa_command_fn (vlib_main_t * vm,
if (~0 == sai)
{
- /* *INDENT-OFF* */
pool_foreach_index (sai, ipsec_sa_pool)
{
ipsec_sa_clear (sai);
}
- /* *INDENT-ON* */
}
else
{
@@ -553,7 +611,6 @@ clear_ipsec_sa_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ipsec_sa_command, static) = {
.path = "show ipsec sa",
.short_help = "show ipsec sa [index]",
@@ -565,7 +622,6 @@ VLIB_CLI_COMMAND (clear_ipsec_sa_command, static) = {
.short_help = "clear ipsec sa [index]",
.function = clear_ipsec_sa_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_ipsec_spd_command_fn (vlib_main_t * vm,
@@ -595,13 +651,11 @@ show_ipsec_spd_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ipsec_spd_command, static) = {
.path = "show ipsec spd",
.short_help = "show ipsec spd [index]",
.function = show_ipsec_spd_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_ipsec_tunnel_command_fn (vlib_main_t * vm,
@@ -613,13 +667,11 @@ show_ipsec_tunnel_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ipsec_tunnel_command, static) = {
.path = "show ipsec tunnel",
.short_help = "show ipsec tunnel",
.function = show_ipsec_tunnel_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
ipsec_show_backends_command_fn (vlib_main_t * vm,
@@ -634,7 +686,6 @@ ipsec_show_backends_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "IPsec AH backends available:");
u8 *s = format (NULL, "%=25s %=25s %=10s\n", "Name", "Index", "Active");
ipsec_ah_backend_t *ab;
- /* *INDENT-OFF* */
pool_foreach (ab, im->ah_backends) {
s = format (s, "%=25s %=25u %=10s\n", ab->name, ab - im->ah_backends,
ab - im->ah_backends == im->ah_current_backend ? "yes" : "no");
@@ -650,13 +701,11 @@ ipsec_show_backends_command_fn (vlib_main_t * vm,
s = format (s, " dec6 %s (next %d)\n", n->name, ab->ah6_decrypt_next_index);
}
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "%v", s);
- _vec_len (s) = 0;
+ vec_set_len (s, 0);
vlib_cli_output (vm, "IPsec ESP backends available:");
s = format (s, "%=25s %=25s %=10s\n", "Name", "Index", "Active");
ipsec_esp_backend_t *eb;
- /* *INDENT-OFF* */
pool_foreach (eb, im->esp_backends) {
s = format (s, "%=25s %=25u %=10s\n", eb->name, eb - im->esp_backends,
eb - im->esp_backends == im->esp_current_backend ? "yes"
@@ -673,20 +722,17 @@ ipsec_show_backends_command_fn (vlib_main_t * vm,
s = format (s, " dec6 %s (next %d)\n", n->name, eb->esp6_decrypt_next_index);
}
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "%v", s);
vec_free (s);
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_show_backends_command, static) = {
.path = "show ipsec backends",
.short_help = "show ipsec backends",
.function = ipsec_show_backends_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
ipsec_select_backend_command_fn (vlib_main_t * vm,
@@ -748,14 +794,12 @@ ipsec_select_backend_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_select_backend_command, static) = {
.path = "ipsec select backend",
.short_help = "ipsec select backend <ah|esp> <backend index>",
.function = ipsec_select_backend_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
clear_ipsec_counters_command_fn (vlib_main_t * vm,
@@ -764,18 +808,17 @@ clear_ipsec_counters_command_fn (vlib_main_t * vm,
{
vlib_clear_combined_counters (&ipsec_spd_policy_counters);
vlib_clear_combined_counters (&ipsec_sa_counters);
- vlib_clear_simple_counters (&ipsec_sa_lost_counters);
+ for (int i = 0; i < IPSEC_SA_N_ERRORS; i++)
+ vlib_clear_simple_counters (&ipsec_sa_err_counters[i]);
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_ipsec_counters_command, static) = {
.path = "clear ipsec counters",
.short_help = "clear ipsec counters",
.function = clear_ipsec_counters_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
ipsec_tun_protect_cmd (vlib_main_t * vm,
@@ -825,7 +868,6 @@ ipsec_tun_protect_cmd (vlib_main_t * vm,
/**
* Protect tunnel with IPSEC
*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_tun_protect_cmd_node, static) =
{
.path = "ipsec tunnel protect",
@@ -833,7 +875,6 @@ VLIB_CLI_COMMAND (ipsec_tun_protect_cmd_node, static) =
.short_help = "ipsec tunnel protect <interface> input-sa <SA> output-sa <SA> [add|del]",
// this is not MP safe
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -848,14 +889,12 @@ ipsec_tun_protect_show (vlib_main_t * vm,
/**
* show IPSEC tunnel protection
*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_tun_protect_show_node, static) =
{
.path = "show ipsec protect",
.function = ipsec_tun_protect_show,
.short_help = "show ipsec protect",
};
-/* *INDENT-ON* */
static int
ipsec_tun_protect4_hash_show_one (clib_bihash_kv_8_16_t * kv, void *arg)
@@ -904,14 +943,12 @@ ipsec_tun_protect_hash_show (vlib_main_t * vm,
/**
* show IPSEC tunnel protection hash tables
*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_tun_protect_hash_show_node, static) =
{
.path = "show ipsec protect-hash",
.function = ipsec_tun_protect_hash_show,
.short_help = "show ipsec protect-hash",
};
-/* *INDENT-ON* */
clib_error_t *
ipsec_cli_init (vlib_main_t * vm)
@@ -948,13 +985,11 @@ set_async_mode_command_fn (vlib_main_t * vm, unformat_input_t * input,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_async_mode_command, static) = {
.path = "set ipsec async mode",
.short_help = "set ipsec async mode on|off",
.function = set_async_mode_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ipsec/ipsec_format.c b/src/vnet/ipsec/ipsec_format.c
index 751d098bcdd..e421a0d96b4 100644
--- a/src/vnet/ipsec/ipsec_format.c
+++ b/src/vnet/ipsec/ipsec_format.c
@@ -153,8 +153,8 @@ format_ipsec_replay_window (u8 * s, va_list * args)
return s;
}
-u8 *
-format_ipsec_policy (u8 * s, va_list * args)
+static u8 *
+format_ipsec_policy_with_suffix (u8 *s, va_list *args, u8 *suffix)
{
u32 pi = va_arg (*args, u32);
ip46_type_t ip_type = IP46_TYPE_IP4;
@@ -168,7 +168,7 @@ format_ipsec_policy (u8 * s, va_list * args)
pi, p->priority,
format_ipsec_policy_action, p->policy,
format_ipsec_policy_type, p->type);
- if (p->protocol)
+ if (p->protocol != IPSEC_POLICY_PROTOCOL_ANY)
{
s = format (s, "%U", format_ip_protocol, p->protocol);
}
@@ -180,6 +180,9 @@ format_ipsec_policy (u8 * s, va_list * args)
{
s = format (s, " sa %u", p->sa_id);
}
+ if (suffix)
+ s = format (s, " %s", suffix);
+
if (p->is_ipv6)
{
ip_type = IP46_TYPE_IP6;
@@ -201,6 +204,152 @@ format_ipsec_policy (u8 * s, va_list * args)
}
u8 *
+format_ipsec_policy (u8 *s, va_list *args)
+{
+ return format_ipsec_policy_with_suffix (s, args, 0);
+}
+
+u8 *
+format_ipsec_fp_policy (u8 *s, va_list *args)
+{
+ return format_ipsec_policy_with_suffix (s, args, (u8 *) "<fast-path>");
+}
+
+/**
+ * @brief Context when walking the fp bihash table. We need to filter
+ * only those policies that are of given type as we walk the table.
+ */
+typedef struct ipsec_spd_policy_ctx_t_
+{
+ u32 *policies;
+ ipsec_spd_policy_type_t t;
+} ipsec_fp_walk_ctx_t;
+
+static int
+ipsec_fp_table_walk_ip4_cb (clib_bihash_kv_16_8_t *kvp, void *arg)
+{
+ ipsec_fp_walk_ctx_t *ctx = (ipsec_fp_walk_ctx_t *) arg;
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_policy_t *p;
+
+ ipsec_fp_lookup_value_t *val = (ipsec_fp_lookup_value_t *) &kvp->value;
+
+ u32 *policy_id;
+
+ vec_foreach (policy_id, val->fp_policies_ids)
+ {
+ p = pool_elt_at_index (im->policies, *policy_id);
+ if (p->type == ctx->t)
+ vec_add1 (ctx->policies, *policy_id);
+ }
+
+ return BIHASH_WALK_CONTINUE;
+}
+
+static int
+ipsec_fp_table_walk_ip6_cb (clib_bihash_kv_40_8_t *kvp, void *arg)
+{
+ ipsec_fp_walk_ctx_t *ctx = (ipsec_fp_walk_ctx_t *) arg;
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_policy_t *p;
+
+ ipsec_fp_lookup_value_t *val = (ipsec_fp_lookup_value_t *) &kvp->value;
+
+ u32 *policy_id;
+
+ vec_foreach (policy_id, val->fp_policies_ids)
+ {
+ p = pool_elt_at_index (im->policies, *policy_id);
+ if (p->type == ctx->t)
+ vec_add1 (ctx->policies, *policy_id);
+ }
+
+ return BIHASH_WALK_CONTINUE;
+}
+
+u8 *
+format_ipsec_fp_policies (u8 *s, va_list *args)
+{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_spd_t *spd = va_arg (*args, ipsec_spd_t *);
+ ipsec_spd_policy_type_t t = va_arg (*args, ipsec_spd_policy_type_t);
+ u32 *i;
+ ipsec_fp_walk_ctx_t ctx = {
+ .policies = 0,
+ .t = t,
+ };
+
+ u32 ip4_in_lookup_hash_idx = spd->fp_spd.ip4_in_lookup_hash_idx;
+ u32 ip4_out_lookup_hash_idx = spd->fp_spd.ip4_out_lookup_hash_idx;
+ u32 ip6_in_lookup_hash_idx = spd->fp_spd.ip6_in_lookup_hash_idx;
+ u32 ip6_out_lookup_hash_idx = spd->fp_spd.ip6_out_lookup_hash_idx;
+
+ switch (t)
+ {
+ case IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT:
+ case IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS:
+ case IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD:
+ if (INDEX_INVALID != ip4_in_lookup_hash_idx)
+ {
+ clib_bihash_16_8_t *bihash_table = pool_elt_at_index (
+ im->fp_ip4_lookup_hashes_pool, ip4_in_lookup_hash_idx);
+
+ clib_bihash_foreach_key_value_pair_16_8 (
+ bihash_table, ipsec_fp_table_walk_ip4_cb, &ctx);
+ }
+
+ break;
+
+ case IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT:
+ case IPSEC_SPD_POLICY_IP6_INBOUND_BYPASS:
+ case IPSEC_SPD_POLICY_IP6_INBOUND_DISCARD:
+ if (INDEX_INVALID != ip6_in_lookup_hash_idx)
+ {
+ clib_bihash_40_8_t *bihash_table = pool_elt_at_index (
+ im->fp_ip6_lookup_hashes_pool, ip6_in_lookup_hash_idx);
+
+ clib_bihash_foreach_key_value_pair_40_8 (
+ bihash_table, ipsec_fp_table_walk_ip6_cb, &ctx);
+ }
+
+ break;
+ case IPSEC_SPD_POLICY_IP4_OUTBOUND:
+ if (INDEX_INVALID != ip4_out_lookup_hash_idx)
+ {
+ clib_bihash_16_8_t *bihash_table = pool_elt_at_index (
+ im->fp_ip4_lookup_hashes_pool, ip4_out_lookup_hash_idx);
+
+ clib_bihash_foreach_key_value_pair_16_8 (
+ bihash_table, ipsec_fp_table_walk_ip4_cb, &ctx);
+ }
+
+ break;
+ case IPSEC_SPD_POLICY_IP6_OUTBOUND:
+ if (INDEX_INVALID != ip6_out_lookup_hash_idx)
+ {
+ clib_bihash_40_8_t *bihash_table = pool_elt_at_index (
+ im->fp_ip6_lookup_hashes_pool, ip6_out_lookup_hash_idx);
+
+ clib_bihash_foreach_key_value_pair_40_8 (
+ bihash_table, ipsec_fp_table_walk_ip6_cb, &ctx);
+ }
+
+ break;
+ default:
+ break;
+ }
+
+ vec_foreach (i, ctx.policies)
+ {
+ s = format (s, "\n %U", format_ipsec_fp_policy, *i);
+ }
+
+ vec_free (ctx.policies);
+
+ return s;
+}
+
+u8 *
format_ipsec_spd (u8 * s, va_list * args)
{
u32 si = va_arg (*args, u32);
@@ -218,12 +367,13 @@ format_ipsec_spd (u8 * s, va_list * args)
s = format (s, "spd %u", spd->id);
-#define _(v, n) \
- s = format (s, "\n %s:", n); \
- vec_foreach(i, spd->policies[IPSEC_SPD_POLICY_##v]) \
- { \
- s = format (s, "\n %U", format_ipsec_policy, *i); \
- }
+#define _(v, n) \
+ s = format (s, "\n %s:", n); \
+ vec_foreach (i, spd->policies[IPSEC_SPD_POLICY_##v]) \
+ { \
+ s = format (s, "\n %U", format_ipsec_policy, *i); \
+ } \
+ s = format (s, "\n %U", format_ipsec_fp_policies, spd, IPSEC_SPD_POLICY_##v);
foreach_ipsec_spd_policy_type;
#undef _
@@ -232,17 +382,28 @@ done:
}
u8 *
-format_ipsec_spd_flow_cache (u8 *s, va_list *args)
+format_ipsec_out_spd_flow_cache (u8 *s, va_list *args)
{
ipsec_main_t *im = &ipsec_main;
- s = format (s, "\nip4-outbound-spd-flow-cache-entries: %u",
+ s = format (s, "\nipv4-outbound-spd-flow-cache-entries: %u",
im->ipsec4_out_spd_flow_cache_entries);
return (s);
}
u8 *
+format_ipsec_in_spd_flow_cache (u8 *s, va_list *args)
+{
+ ipsec_main_t *im = &ipsec_main;
+
+ s = format (s, "\nipv4-inbound-spd-flow-cache-entries: %u",
+ im->ipsec4_in_spd_flow_cache_entries);
+
+ return (s);
+}
+
+u8 *
format_ipsec_key (u8 * s, va_list * args)
{
ipsec_key_t *key = va_arg (*args, ipsec_key_t *);
@@ -283,7 +444,7 @@ format_ipsec_sa (u8 * s, va_list * args)
u32 sai = va_arg (*args, u32);
ipsec_format_flags_t flags = va_arg (*args, ipsec_format_flags_t);
vlib_counter_t counts;
- counter_t lost;
+ counter_t errors;
ipsec_sa_t *sa;
if (pool_is_free_index (ipsec_sa_pool, sai))
@@ -305,16 +466,18 @@ format_ipsec_sa (u8 * s, va_list * args)
s = format (s, "\n salt 0x%x", clib_net_to_host_u32 (sa->salt));
s = format (s, "\n thread-index:%d", sa->thread_index);
s = format (s, "\n seq %u seq-hi %u", sa->seq, sa->seq_hi);
- s = format (s, "\n window %U", format_ipsec_replay_window,
- sa->replay_window);
- s = format (s, "\n crypto alg %U",
- format_ipsec_crypto_alg, sa->crypto_alg);
+ s = format (s, "\n window-size: %llu",
+ IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (sa));
+ s = format (s, "\n window: Bl <- %U Tl", format_ipsec_replay_window,
+ ipsec_sa_anti_replay_get_64b_window (sa));
+ s =
+ format (s, "\n crypto alg %U", format_ipsec_crypto_alg, sa->crypto_alg);
if (sa->crypto_alg && (flags & IPSEC_FORMAT_INSECURE))
s = format (s, " key %U", format_ipsec_key, &sa->crypto_key);
else
s = format (s, " key [redacted]");
- s = format (s, "\n integrity alg %U",
- format_ipsec_integ_alg, sa->integ_alg);
+ s =
+ format (s, "\n integrity alg %U", format_ipsec_integ_alg, sa->integ_alg);
if (sa->integ_alg && (flags & IPSEC_FORMAT_INSECURE))
s = format (s, " key %U", format_ipsec_key, &sa->integ_key);
else
@@ -324,12 +487,17 @@ format_ipsec_sa (u8 * s, va_list * args)
clib_host_to_net_u16 (sa->udp_hdr.dst_port));
vlib_get_combined_counter (&ipsec_sa_counters, sai, &counts);
- lost = vlib_get_simple_counter (&ipsec_sa_lost_counters, sai);
- s = format (s, "\n tx/rx:[packets:%Ld bytes:%Ld], lost:[packets:%Ld]",
- counts.packets, counts.bytes, lost);
+ s = format (s, "\n tx/rx:[packets:%Ld bytes:%Ld]", counts.packets,
+ counts.bytes);
+ s = format (s, "\n SA errors:");
+#define _(index, val, err, desc) \
+ errors = vlib_get_simple_counter (&ipsec_sa_err_counters[index], sai); \
+ s = format (s, "\n " #desc ":[packets:%Ld]", errors);
+ foreach_ipsec_sa_err
+#undef _
- if (ipsec_sa_is_set_IS_TUNNEL (sa))
- s = format (s, "\n%U", format_tunnel, &sa->tunnel, 3);
+ if (ipsec_sa_is_set_IS_TUNNEL (sa)) s =
+ format (s, "\n%U", format_tunnel, &sa->tunnel, 3);
done:
return (s);
@@ -381,12 +549,10 @@ format_ipsec_tun_protect (u8 * s, va_list * args)
IPSEC_FORMAT_BRIEF);
s = format (s, "\n input-sa:");
- /* *INDENT-OFF* */
FOR_EACH_IPSEC_PROTECT_INPUT_SAI(itp, sai,
({
s = format (s, "\n %U", format_ipsec_sa, sai, IPSEC_FORMAT_BRIEF);
}));
- /* *INDENT-ON* */
return (s);
}
diff --git a/src/vnet/ipsec/ipsec_handoff.c b/src/vnet/ipsec/ipsec_handoff.c
index e8daa1a6a23..68a859cf732 100644
--- a/src/vnet/ipsec/ipsec_handoff.c
+++ b/src/vnet/ipsec/ipsec_handoff.c
@@ -259,7 +259,6 @@ VLIB_NODE_FN (ah6_decrypt_handoff) (vlib_main_t * vm,
return ipsec_handoff (vm, node, from_frame, im->ah6_dec_fq_index);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp4_encrypt_handoff) = {
.name = "esp4-encrypt-handoff",
.vector_size = sizeof (u32),
@@ -416,7 +415,6 @@ VLIB_REGISTER_NODE (ah6_decrypt_handoff) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ipsec/ipsec_input.c b/src/vnet/ipsec/ipsec_input.c
index 96bad28c2b5..9cec7dd15d1 100644
--- a/src/vnet/ipsec/ipsec_input.c
+++ b/src/vnet/ipsec/ipsec_input.c
@@ -19,6 +19,7 @@
#include <vnet/api_errno.h>
#include <vnet/ip/ip.h>
#include <vnet/feature/feature.h>
+#include <vnet/ipsec/ipsec_spd_fp_lookup.h>
#include <vnet/ipsec/ipsec.h>
#include <vnet/ipsec/esp.h>
@@ -51,6 +52,7 @@ typedef struct
ip_protocol_t proto;
u32 spd;
u32 policy_index;
+ u32 policy_type;
u32 sa_id;
u32 spi;
u32 seq;
@@ -64,15 +66,119 @@ format_ipsec_input_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
ipsec_input_trace_t *t = va_arg (*args, ipsec_input_trace_t *);
- s = format (s, "%U: sa_id %u spd %u policy %d spi %u (0x%08x) seq %u",
- format_ip_protocol, t->proto, t->sa_id,
- t->spd, t->policy_index, t->spi, t->spi, t->seq);
+ s =
+ format (s, "%U: sa_id %u type: %u spd %u policy %d spi %u (0x%08x) seq %u",
+ format_ip_protocol, t->proto, t->sa_id, t->policy_type, t->spd,
+ t->policy_index, t->spi, t->spi, t->seq);
return s;
}
+always_inline void
+ipsec4_input_spd_add_flow_cache_entry (ipsec_main_t *im, u32 sa, u32 da,
+ ipsec_spd_policy_type_t policy_type,
+ u32 pol_id)
+{
+ u64 hash;
+ u8 is_overwrite = 0, is_stale_overwrite = 0;
+ /* Store in network byte order to avoid conversion on lookup */
+ ipsec4_inbound_spd_tuple_t ip4_tuple = {
+ .ip4_src_addr = (ip4_address_t) clib_host_to_net_u32 (sa),
+ .ip4_dest_addr = (ip4_address_t) clib_host_to_net_u32 (da),
+ .policy_type = policy_type
+ };
+
+ ip4_tuple.kv_16_8.value =
+ (((u64) pol_id) << 32) | ((u64) im->input_epoch_count);
+
+ hash = ipsec4_hash_16_8 (&ip4_tuple.kv_16_8);
+ hash &= (im->ipsec4_in_spd_hash_num_buckets - 1);
+
+ ipsec_spinlock_lock (&im->ipsec4_in_spd_hash_tbl[hash].bucket_lock);
+ /* Check if we are overwriting an existing entry so we know
+ whether to increment the flow cache counter. Since flow
+ cache counter is reset on any policy add/remove, but
+ hash table values are not, we need to check if the entry
+ we are overwriting is stale or not. If it's a stale entry
+ overwrite, we still want to increment flow cache counter */
+ is_overwrite = (im->ipsec4_in_spd_hash_tbl[hash].value != 0);
+ /* Check if we are overwriting a stale entry by comparing
+ with current epoch count */
+ if (PREDICT_FALSE (is_overwrite))
+ is_stale_overwrite =
+ (im->input_epoch_count !=
+ ((u32) (im->ipsec4_in_spd_hash_tbl[hash].value & 0xFFFFFFFF)));
+ clib_memcpy_fast (&im->ipsec4_in_spd_hash_tbl[hash], &ip4_tuple.kv_16_8,
+ sizeof (ip4_tuple.kv_16_8));
+ ipsec_spinlock_unlock (&im->ipsec4_in_spd_hash_tbl[hash].bucket_lock);
+
+ /* Increment the counter to track active flow cache entries
+ when entering a fresh entry or overwriting a stale one */
+ if (!is_overwrite || is_stale_overwrite)
+ clib_atomic_fetch_add_relax (&im->ipsec4_in_spd_flow_cache_entries, 1);
+
+ return;
+}
+
always_inline ipsec_policy_t *
-ipsec_input_policy_match (ipsec_spd_t * spd, u32 sa, u32 da,
+ipsec4_input_spd_find_flow_cache_entry (ipsec_main_t *im, u32 sa, u32 da,
+ ipsec_spd_policy_type_t policy_type)
+{
+ ipsec_policy_t *p = NULL;
+ ipsec4_hash_kv_16_8_t kv_result;
+ u64 hash;
+ ipsec4_inbound_spd_tuple_t ip4_tuple = { .ip4_src_addr = (ip4_address_t) sa,
+ .ip4_dest_addr = (ip4_address_t) da,
+ .policy_type = policy_type };
+
+ hash = ipsec4_hash_16_8 (&ip4_tuple.kv_16_8);
+ hash &= (im->ipsec4_in_spd_hash_num_buckets - 1);
+
+ ipsec_spinlock_lock (&im->ipsec4_in_spd_hash_tbl[hash].bucket_lock);
+ kv_result = im->ipsec4_in_spd_hash_tbl[hash];
+ ipsec_spinlock_unlock (&im->ipsec4_in_spd_hash_tbl[hash].bucket_lock);
+
+ if (ipsec4_hash_key_compare_16_8 ((u64 *) &ip4_tuple.kv_16_8,
+ (u64 *) &kv_result))
+ {
+ if (im->input_epoch_count == ((u32) (kv_result.value & 0xFFFFFFFF)))
+ {
+ /* Get the policy based on the index */
+ p =
+ pool_elt_at_index (im->policies, ((u32) (kv_result.value >> 32)));
+ }
+ }
+
+ return p;
+}
+
+always_inline void
+ipsec_fp_in_5tuple_from_ip4_range (ipsec_fp_5tuple_t *tuple, u32 sa, u32 da,
+ u32 spi, u8 action)
+{
+ clib_memset (tuple->l3_zero_pad, 0, sizeof (tuple->l3_zero_pad));
+ tuple->laddr.as_u32 = da;
+ tuple->raddr.as_u32 = sa;
+ tuple->spi = spi;
+ tuple->action = action;
+ tuple->is_ipv6 = 0;
+}
+
+always_inline void
+ipsec_fp_in_5tuple_from_ip6_range (ipsec_fp_5tuple_t *tuple, ip6_address_t *sa,
+ ip6_address_t *da, u32 spi, u8 action)
+
+{
+ clib_memcpy (&tuple->ip6_laddr, da, sizeof (ip6_address_t));
+ clib_memcpy (&tuple->ip6_raddr, sa, sizeof (ip6_address_t));
+
+ tuple->spi = spi;
+ tuple->action = action;
+ tuple->is_ipv6 = 1;
+}
+
+always_inline ipsec_policy_t *
+ipsec_input_policy_match (ipsec_spd_t *spd, u32 sa, u32 da,
ipsec_spd_policy_type_t policy_type)
{
ipsec_main_t *im = &ipsec_main;
@@ -95,13 +201,18 @@ ipsec_input_policy_match (ipsec_spd_t * spd, u32 sa, u32 da,
if (sa > clib_net_to_host_u32 (p->raddr.stop.ip4.as_u32))
continue;
+ if (im->input_flow_cache_flag)
+ {
+ /* Add an Entry in Flow cache */
+ ipsec4_input_spd_add_flow_cache_entry (im, sa, da, policy_type, *i);
+ }
return p;
}
return 0;
}
always_inline ipsec_policy_t *
-ipsec_input_protect_policy_match (ipsec_spd_t * spd, u32 sa, u32 da, u32 spi)
+ipsec_input_protect_policy_match (ipsec_spd_t *spd, u32 sa, u32 da, u32 spi)
{
ipsec_main_t *im = &ipsec_main;
ipsec_policy_t *p;
@@ -124,7 +235,7 @@ ipsec_input_protect_policy_match (ipsec_spd_t * spd, u32 sa, u32 da, u32 spi)
if (sa != clib_net_to_host_u32 (s->tunnel.t_src.ip.ip4.as_u32))
continue;
- return p;
+ goto return_policy;
}
if (da < clib_net_to_host_u32 (p->laddr.start.ip4.as_u32))
@@ -139,6 +250,14 @@ ipsec_input_protect_policy_match (ipsec_spd_t * spd, u32 sa, u32 da, u32 spi)
if (sa > clib_net_to_host_u32 (p->raddr.stop.ip4.as_u32))
continue;
+ return_policy:
+ if (im->input_flow_cache_flag)
+ {
+ /* Add an Entry in Flow cache */
+ ipsec4_input_spd_add_flow_cache_entry (
+ im, sa, da, IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT, *i);
+ }
+
return p;
}
return 0;
@@ -154,6 +273,194 @@ ip6_addr_match_range (ip6_address_t * a, ip6_address_t * la,
return 0;
}
+always_inline void
+ipsec_esp_packet_process (vlib_main_t *vm, ipsec_main_t *im, ip4_header_t *ip0,
+ esp_header_t *esp0, u32 thread_index,
+ ipsec_spd_t *spd0, vlib_buffer_t **b,
+ vlib_node_runtime_t *node, u64 *ipsec_bypassed,
+ u64 *ipsec_dropped, u64 *ipsec_matched,
+ u64 *ipsec_unprocessed, u16 *next)
+
+{
+ ipsec_policy_t *p0 = NULL;
+ u32 pi0;
+ u8 has_space0;
+ bool search_flow_cache = false;
+ ipsec_policy_t *policies[1];
+ ipsec_fp_5tuple_t tuples[1];
+ bool ip_v6 = true;
+
+ /* if flow cache is enabled, first search through flow cache for a
+ * policy match for either protect, bypass or discard rules, in that
+ * order. if no match is found search_flow_cache is set to false (1)
+ * and we revert back to linear search
+ */
+
+ search_flow_cache = im->input_flow_cache_flag;
+udp_or_esp:
+
+ if (esp0->spi == 0)
+ {
+ /* RFC 4303, section 2.1: The SPI value of zero (0 is reserved for
+ * local, implementation-specific use and MUST NOT be sent on the wire.
+ */
+ *ipsec_unprocessed += 1;
+ next[0] = IPSEC_INPUT_NEXT_DROP;
+ return;
+ }
+
+ if (im->fp_spd_ipv4_in_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip4_in_lookup_hash_idx))
+ {
+ ipsec_fp_in_5tuple_from_ip4_range (&tuples[0], ip0->src_address.as_u32,
+ ip0->dst_address.as_u32,
+ clib_net_to_host_u32 (esp0->spi),
+ IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT);
+ ipsec_fp_in_policy_match_n (&spd0->fp_spd, !ip_v6, tuples, policies, 1);
+ p0 = policies[0];
+ }
+ else if (search_flow_cache) /* attempt to match policy in flow cache */
+ {
+ p0 = ipsec4_input_spd_find_flow_cache_entry (
+ im, ip0->src_address.as_u32, ip0->dst_address.as_u32,
+ IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT);
+ }
+
+ else /* linear search if flow cache is not enabled,
+ or flow cache search just failed */
+ {
+ p0 = ipsec_input_protect_policy_match (
+ spd0, clib_net_to_host_u32 (ip0->src_address.as_u32),
+ clib_net_to_host_u32 (ip0->dst_address.as_u32),
+ clib_net_to_host_u32 (esp0->spi));
+ }
+ has_space0 = vlib_buffer_has_space (b[0], (clib_address_t) (esp0 + 1) -
+ (clib_address_t) ip0);
+
+ if (PREDICT_TRUE ((p0 != NULL) & (has_space0)))
+ {
+ *ipsec_matched += 1;
+
+ pi0 = p0 - im->policies;
+ vlib_increment_combined_counter (&ipsec_spd_policy_counters,
+ thread_index, pi0, 1,
+ clib_net_to_host_u16 (ip0->length));
+
+ vnet_buffer (b[0])->ipsec.sad_index = p0->sa_index;
+ next[0] = im->esp4_decrypt_next_index;
+ vlib_buffer_advance (b[0], ((u8 *) esp0 - (u8 *) ip0));
+ goto trace0;
+ }
+ else
+ {
+ p0 = 0;
+ pi0 = ~0;
+ }
+ if (im->fp_spd_ipv4_in_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip4_in_lookup_hash_idx))
+ {
+ tuples->action = IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS;
+ ipsec_fp_in_policy_match_n (&spd0->fp_spd, !ip_v6, tuples, policies, 1);
+ p0 = policies[0];
+ }
+ else if (search_flow_cache)
+ {
+ p0 = ipsec4_input_spd_find_flow_cache_entry (
+ im, ip0->src_address.as_u32, ip0->dst_address.as_u32,
+ IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS);
+ }
+
+ else
+ {
+ p0 = ipsec_input_policy_match (
+ spd0, clib_net_to_host_u32 (ip0->src_address.as_u32),
+ clib_net_to_host_u32 (ip0->dst_address.as_u32),
+ IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS);
+ }
+
+ if (PREDICT_TRUE ((p0 != NULL)))
+ {
+ *ipsec_bypassed += 1;
+
+ pi0 = p0 - im->policies;
+ vlib_increment_combined_counter (&ipsec_spd_policy_counters,
+ thread_index, pi0, 1,
+ clib_net_to_host_u16 (ip0->length));
+
+ goto trace0;
+ }
+ else
+ {
+ p0 = 0;
+ pi0 = ~0;
+ };
+ if (im->fp_spd_ipv4_in_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip4_in_lookup_hash_idx))
+ {
+ tuples->action = IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD;
+ ipsec_fp_in_policy_match_n (&spd0->fp_spd, !ip_v6, tuples, policies, 1);
+ p0 = policies[0];
+ }
+ else
+
+ if (search_flow_cache)
+ {
+ p0 = ipsec4_input_spd_find_flow_cache_entry (
+ im, ip0->src_address.as_u32, ip0->dst_address.as_u32,
+ IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD);
+ }
+
+ else
+ {
+ p0 = ipsec_input_policy_match (
+ spd0, clib_net_to_host_u32 (ip0->src_address.as_u32),
+ clib_net_to_host_u32 (ip0->dst_address.as_u32),
+ IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD);
+ }
+
+ if (PREDICT_TRUE ((p0 != NULL)))
+ {
+ *ipsec_dropped += 1;
+
+ pi0 = p0 - im->policies;
+ vlib_increment_combined_counter (&ipsec_spd_policy_counters,
+ thread_index, pi0, 1,
+ clib_net_to_host_u16 (ip0->length));
+
+ next[0] = IPSEC_INPUT_NEXT_DROP;
+ goto trace0;
+ }
+ else
+ {
+ p0 = 0;
+ pi0 = ~0;
+ };
+ /* flow cache search failed, try again with linear search */
+ if (search_flow_cache && p0 == NULL)
+ {
+ search_flow_cache = false;
+ goto udp_or_esp;
+ }
+
+ /* Drop by default if no match on PROTECT, BYPASS or DISCARD */
+ *ipsec_unprocessed += 1;
+ next[0] = IPSEC_INPUT_NEXT_DROP;
+
+trace0:
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+ PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ipsec_input_trace_t *tr = vlib_add_trace (vm, node, b[0], sizeof (*tr));
+
+ tr->proto = ip0->protocol;
+ tr->sa_id = p0 ? p0->sa_id : ~0;
+ tr->spi = has_space0 ? clib_net_to_host_u32 (esp0->spi) : ~0;
+ tr->seq = has_space0 ? clib_net_to_host_u32 (esp0->seq) : ~0;
+ tr->spd = spd0->id;
+ tr->policy_index = pi0;
+ }
+}
+
always_inline ipsec_policy_t *
ipsec6_input_protect_policy_match (ipsec_spd_t * spd,
ip6_address_t * sa,
@@ -225,6 +532,7 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm,
ipsec_spd_t *spd0;
ipsec_policy_t *p0 = NULL;
u8 has_space0;
+ bool search_flow_cache = false;
if (n_left_from > 2)
{
@@ -240,29 +548,62 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm,
ip0 = vlib_buffer_get_current (b[0]);
- if (PREDICT_TRUE
- (ip0->protocol == IP_PROTOCOL_IPSEC_ESP
- || ip0->protocol == IP_PROTOCOL_UDP))
+ if (ip0->protocol == IP_PROTOCOL_UDP)
{
+ udp_header_t *udp0 = NULL;
+ udp0 = (udp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0));
+ /* RFC5996 Section 2.23 "Port 4500 is reserved for
+ * UDP-encapsulated ESP and IKE."
+ */
+ if (clib_host_to_net_u16 (4500) == udp0->dst_port)
+ {
+ esp0 = (esp_header_t *) ((u8 *) udp0 + sizeof (udp_header_t));
+
+ ipsec_esp_packet_process (vm, im, ip0, esp0, thread_index, spd0,
+ b, node, &ipsec_bypassed,
+ &ipsec_dropped, &ipsec_matched,
+ &ipsec_unprocessed, next);
+ if (ipsec_bypassed > 0)
+ goto ipsec_bypassed;
+ }
+ }
+ else if (PREDICT_TRUE (ip0->protocol == IP_PROTOCOL_IPSEC_ESP))
+ {
esp0 = (esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0));
- if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_UDP))
+ ipsec_esp_packet_process (vm, im, ip0, esp0, thread_index, spd0, b,
+ node, &ipsec_bypassed, &ipsec_dropped,
+ &ipsec_matched, &ipsec_unprocessed, next);
+ if (ipsec_bypassed > 0)
+ goto ipsec_bypassed;
+ }
+ else if (ip0->protocol == IP_PROTOCOL_IPSEC_AH)
+ {
+ ah0 = (ah_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0));
+
+ // if flow cache is enabled, first search through flow cache for a
+ // policy match and revert back to linear search on failure
+ search_flow_cache = im->input_flow_cache_flag;
+
+ ah:
+ if (search_flow_cache)
{
- /* FIXME Skip, if not a UDP encapsulated packet */
- esp0 = (esp_header_t *) ((u8 *) esp0 + sizeof (udp_header_t));
+ p0 = ipsec4_input_spd_find_flow_cache_entry (
+ im, ip0->src_address.as_u32, ip0->dst_address.as_u32,
+ IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT);
}
- p0 = ipsec_input_protect_policy_match (spd0,
- clib_net_to_host_u32
- (ip0->src_address.as_u32),
- clib_net_to_host_u32
- (ip0->dst_address.as_u32),
- clib_net_to_host_u32
- (esp0->spi));
+ else
+ {
+ p0 = ipsec_input_protect_policy_match (
+ spd0, clib_net_to_host_u32 (ip0->src_address.as_u32),
+ clib_net_to_host_u32 (ip0->dst_address.as_u32),
+ clib_net_to_host_u32 (ah0->spi));
+ }
has_space0 =
vlib_buffer_has_space (b[0],
- (clib_address_t) (esp0 + 1) -
+ (clib_address_t) (ah0 + 1) -
(clib_address_t) ip0);
if (PREDICT_TRUE ((p0 != NULL) & (has_space0)))
@@ -275,127 +616,72 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm,
thread_index, pi0, 1, clib_net_to_host_u16 (ip0->length));
vnet_buffer (b[0])->ipsec.sad_index = p0->sa_index;
- next[0] = im->esp4_decrypt_next_index;
- vlib_buffer_advance (b[0], ((u8 *) esp0 - (u8 *) ip0));
- goto trace0;
+ next[0] = im->ah4_decrypt_next_index;
+ goto trace1;
}
else
{
p0 = 0;
pi0 = ~0;
- };
+ }
- p0 = ipsec_input_policy_match (spd0,
- clib_net_to_host_u32
- (ip0->src_address.as_u32),
- clib_net_to_host_u32
- (ip0->dst_address.as_u32),
- IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS);
- if (PREDICT_TRUE ((p0 != NULL)))
+ if (search_flow_cache)
{
- ipsec_bypassed += 1;
-
- pi0 = p0 - im->policies;
- vlib_increment_combined_counter (
- &ipsec_spd_policy_counters, thread_index, pi0, 1,
- clib_net_to_host_u16 (ip0->length));
-
- goto trace0;
+ p0 = ipsec4_input_spd_find_flow_cache_entry (
+ im, ip0->src_address.as_u32, ip0->dst_address.as_u32,
+ IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS);
}
+
else
{
- p0 = 0;
- pi0 = ~0;
- };
+ p0 = ipsec_input_policy_match (
+ spd0, clib_net_to_host_u32 (ip0->src_address.as_u32),
+ clib_net_to_host_u32 (ip0->dst_address.as_u32),
+ IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS);
+ }
- p0 = ipsec_input_policy_match (spd0,
- clib_net_to_host_u32
- (ip0->src_address.as_u32),
- clib_net_to_host_u32
- (ip0->dst_address.as_u32),
- IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD);
if (PREDICT_TRUE ((p0 != NULL)))
{
- ipsec_dropped += 1;
+ ipsec_bypassed += 1;
pi0 = p0 - im->policies;
vlib_increment_combined_counter (
&ipsec_spd_policy_counters, thread_index, pi0, 1,
clib_net_to_host_u16 (ip0->length));
- next[0] = IPSEC_INPUT_NEXT_DROP;
- goto trace0;
+ goto trace1;
}
else
{
p0 = 0;
pi0 = ~0;
};
- trace0:
- if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
- PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
- {
- ipsec_input_trace_t *tr =
- vlib_add_trace (vm, node, b[0], sizeof (*tr));
-
- tr->proto = ip0->protocol;
- tr->sa_id = p0 ? p0->sa_id : ~0;
- tr->spi = has_space0 ? clib_net_to_host_u32 (esp0->spi) : ~0;
- tr->seq = has_space0 ? clib_net_to_host_u32 (esp0->seq) : ~0;
- tr->spd = spd0->id;
- tr->policy_index = pi0;
- }
- }
- else if (ip0->protocol == IP_PROTOCOL_IPSEC_AH)
- {
- ah0 = (ah_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0));
- p0 = ipsec_input_protect_policy_match (spd0,
- clib_net_to_host_u32
- (ip0->src_address.as_u32),
- clib_net_to_host_u32
- (ip0->dst_address.as_u32),
- clib_net_to_host_u32
- (ah0->spi));
-
- has_space0 =
- vlib_buffer_has_space (b[0],
- (clib_address_t) (ah0 + 1) -
- (clib_address_t) ip0);
- if (PREDICT_TRUE ((p0 != NULL) & (has_space0)))
+ if (search_flow_cache)
{
- ipsec_matched += 1;
-
- pi0 = p0 - im->policies;
- vlib_increment_combined_counter
- (&ipsec_spd_policy_counters,
- thread_index, pi0, 1, clib_net_to_host_u16 (ip0->length));
-
- vnet_buffer (b[0])->ipsec.sad_index = p0->sa_index;
- next[0] = im->ah4_decrypt_next_index;
- goto trace1;
+ p0 = ipsec4_input_spd_find_flow_cache_entry (
+ im, ip0->src_address.as_u32, ip0->dst_address.as_u32,
+ IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD);
}
+
else
{
- p0 = 0;
- pi0 = ~0;
+ p0 = ipsec_input_policy_match (
+ spd0, clib_net_to_host_u32 (ip0->src_address.as_u32),
+ clib_net_to_host_u32 (ip0->dst_address.as_u32),
+ IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD);
}
- p0 = ipsec_input_policy_match (spd0,
- clib_net_to_host_u32
- (ip0->src_address.as_u32),
- clib_net_to_host_u32
- (ip0->dst_address.as_u32),
- IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS);
if (PREDICT_TRUE ((p0 != NULL)))
{
- ipsec_bypassed += 1;
+ ipsec_dropped += 1;
pi0 = p0 - im->policies;
vlib_increment_combined_counter (
&ipsec_spd_policy_counters, thread_index, pi0, 1,
clib_net_to_host_u16 (ip0->length));
+ next[0] = IPSEC_INPUT_NEXT_DROP;
goto trace1;
}
else
@@ -404,29 +690,17 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm,
pi0 = ~0;
};
- p0 = ipsec_input_policy_match (spd0,
- clib_net_to_host_u32
- (ip0->src_address.as_u32),
- clib_net_to_host_u32
- (ip0->dst_address.as_u32),
- IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD);
- if (PREDICT_TRUE ((p0 != NULL)))
+ // flow cache search failed, retry with linear search
+ if (search_flow_cache && p0 == NULL)
{
- ipsec_dropped += 1;
+ search_flow_cache = false;
+ goto ah;
+ }
- pi0 = p0 - im->policies;
- vlib_increment_combined_counter (
- &ipsec_spd_policy_counters, thread_index, pi0, 1,
- clib_net_to_host_u16 (ip0->length));
+ /* Drop by default if no match on PROTECT, BYPASS or DISCARD */
+ ipsec_unprocessed += 1;
+ next[0] = IPSEC_INPUT_NEXT_DROP;
- next[0] = IPSEC_INPUT_NEXT_DROP;
- goto trace1;
- }
- else
- {
- p0 = 0;
- pi0 = ~0;
- };
trace1:
if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
@@ -444,6 +718,7 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm,
}
else
{
+ ipsec_bypassed:
ipsec_unprocessed += 1;
}
n_left_from -= 1;
@@ -475,8 +750,6 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ipsec4_input_node) = {
.name = "ipsec4-input-feature",
.vector_size = sizeof (u32),
@@ -491,7 +764,6 @@ VLIB_REGISTER_NODE (ipsec4_input_node) = {
#undef _
},
};
-/* *INDENT-ON* */
extern vlib_node_registration_t ipsec6_input_node;
@@ -504,6 +776,9 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm,
ipsec_main_t *im = &ipsec_main;
u32 ipsec_unprocessed = 0;
u32 ipsec_matched = 0;
+ ipsec_policy_t *policies[1];
+ ipsec_fp_5tuple_t tuples[1];
+ bool ip_v6 = true;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
@@ -519,7 +794,7 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm,
while (n_left_from > 0 && n_left_to_next > 0)
{
- u32 bi0, next0, pi0;
+ u32 bi0, next0, pi0 = ~0;
vlib_buffer_t *b0;
ip6_header_t *ip0;
esp_header_t *esp0;
@@ -556,11 +831,22 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm,
clib_net_to_host_u16 (ip0->payload_length) + header_size,
spd0->id);
#endif
- p0 = ipsec6_input_protect_policy_match (spd0,
- &ip0->src_address,
- &ip0->dst_address,
- clib_net_to_host_u32
- (esp0->spi));
+ if (im->fp_spd_ipv6_in_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID !=
+ spd0->fp_spd.ip6_in_lookup_hash_idx))
+ {
+ ipsec_fp_in_5tuple_from_ip6_range (
+ &tuples[0], &ip0->src_address, &ip0->dst_address,
+ clib_net_to_host_u32 (esp0->spi),
+ IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT);
+ ipsec_fp_in_policy_match_n (&spd0->fp_spd, ip_v6, tuples,
+ policies, 1);
+ p0 = policies[0];
+ }
+ else
+ p0 = ipsec6_input_protect_policy_match (
+ spd0, &ip0->src_address, &ip0->dst_address,
+ clib_net_to_host_u32 (esp0->spi));
if (PREDICT_TRUE (p0 != 0))
{
@@ -576,11 +862,15 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm,
vnet_buffer (b0)->ipsec.sad_index = p0->sa_index;
next0 = im->esp6_decrypt_next_index;
vlib_buffer_advance (b0, header_size);
+ /* TODO Add policy matching for bypass and discard policy
+ * type */
goto trace0;
}
else
{
pi0 = ~0;
+ ipsec_unprocessed += 1;
+ next0 = IPSEC_INPUT_NEXT_DROP;
}
}
else if (ip0->protocol == IP_PROTOCOL_IPSEC_AH)
@@ -608,6 +898,8 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm,
else
{
pi0 = ~0;
+ ipsec_unprocessed += 1;
+ next0 = IPSEC_INPUT_NEXT_DROP;
}
}
else
@@ -623,11 +915,16 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm,
vlib_add_trace (vm, node, b0, sizeof (*tr));
if (p0)
- tr->sa_id = p0->sa_id;
+ {
+ tr->sa_id = p0->sa_id;
+ tr->policy_type = p0->type;
+ }
+
tr->proto = ip0->protocol;
tr->spi = clib_net_to_host_u32 (esp0->spi);
tr->seq = clib_net_to_host_u32 (esp0->seq);
tr->spd = spd0->id;
+ tr->policy_index = pi0;
}
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
@@ -648,7 +945,6 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ipsec6_input_node) = {
.name = "ipsec6-input-feature",
.vector_size = sizeof (u32),
@@ -663,7 +959,6 @@ VLIB_REGISTER_NODE (ipsec6_input_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ipsec/ipsec_itf.c b/src/vnet/ipsec/ipsec_itf.c
index 532d5be4c07..b86bf6a110c 100644
--- a/src/vnet/ipsec/ipsec_itf.c
+++ b/src/vnet/ipsec/ipsec_itf.c
@@ -21,6 +21,7 @@
#include <vnet/ipsec/ipsec.h>
#include <vnet/adj/adj_midchain.h>
#include <vnet/ethernet/mac_address.h>
+#include <vnet/mpls/mpls.h>
/* bitmap of Allocated IPSEC_ITF instances */
static uword *ipsec_itf_instances;
@@ -36,6 +37,12 @@ ipsec_itf_get (index_t ii)
return (pool_elt_at_index (ipsec_itf_pool, ii));
}
+u32
+ipsec_itf_count (void)
+{
+ return (pool_elts (ipsec_itf_pool));
+}
+
static ipsec_itf_t *
ipsec_itf_find_by_sw_if_index (u32 sw_if_index)
{
@@ -181,7 +188,6 @@ ipsec_itf_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
(ai, NULL, NULL, ADJ_FLAG_MIDCHAIN_IP_STACK, ipsec_itf_build_rewrite ());
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (ipsec_itf_device_class) = {
.name = "IPSEC Tunnel",
.format_device_name = format_ipsec_itf_name,
@@ -201,7 +207,6 @@ VNET_HW_INTERFACE_CLASS(ipsec_p2mp_hw_interface_class) = {
.update_adjacency = ipsec_itf_update_adj,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_NBMA,
};
-/* *INDENT-ON* */
/*
* Maintain a bitmap of allocated ipsec_itf instance numbers.
@@ -268,6 +273,20 @@ ipsec_itf_instance_free (u32 instance)
return 0;
}
+void
+ipsec_itf_reset_tx_nodes (u32 sw_if_index)
+{
+ vnet_feature_modify_end_node (
+ ip4_main.lookup_main.output_feature_arc_index, sw_if_index,
+ vlib_get_node_by_name (vlib_get_main (), (u8 *) "ip4-drop")->index);
+ vnet_feature_modify_end_node (
+ ip6_main.lookup_main.output_feature_arc_index, sw_if_index,
+ vlib_get_node_by_name (vlib_get_main (), (u8 *) "ip6-drop")->index);
+ vnet_feature_modify_end_node (
+ mpls_main.output_feature_arc_index, sw_if_index,
+ vlib_get_node_by_name (vlib_get_main (), (u8 *) "mpls-drop")->index);
+}
+
int
ipsec_itf_create (u32 user_instance, tunnel_mode_t mode, u32 * sw_if_indexp)
{
@@ -312,6 +331,7 @@ ipsec_itf_create (u32 user_instance, tunnel_mode_t mode, u32 * sw_if_indexp)
ipsec_itf_index_by_sw_if_index[hi->sw_if_index] = t_idx;
ipsec_itf->ii_sw_if_index = *sw_if_indexp = hi->sw_if_index;
+ ipsec_itf_reset_tx_nodes (hi->sw_if_index);
return 0;
}
@@ -336,6 +356,8 @@ ipsec_itf_delete (u32 sw_if_index)
if (ipsec_itf_instance_free (hw->dev_instance) < 0)
return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ vnet_reset_interface_l3_output_node (vnm->vlib_main, sw_if_index);
+
vnet_delete_hw_interface (vnm, hw->hw_if_index);
pool_put (ipsec_itf_pool, ipsec_itf);
@@ -359,6 +381,7 @@ ipsec_itf_create_cli (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
unformat_input_t _line_input, *line_input = &_line_input;
+ tunnel_mode_t mode = TUNNEL_MODE_P2P;
u32 instance, sw_if_index;
clib_error_t *error;
mac_address_t mac;
@@ -374,6 +397,8 @@ ipsec_itf_create_cli (vlib_main_t * vm,
{
if (unformat (line_input, "instance %d", &instance))
;
+ else if (unformat (line_input, "p2mp"))
+ mode = TUNNEL_MODE_MP;
else
{
error = clib_error_return (0, "unknown input: %U",
@@ -388,7 +413,7 @@ ipsec_itf_create_cli (vlib_main_t * vm,
return error;
}
- rv = ipsec_itf_create (instance, TUNNEL_MODE_P2P, &sw_if_index);
+ rv = ipsec_itf_create (instance, mode, &sw_if_index);
if (rv)
return clib_error_return (0, "iPSec interface create failed");
@@ -403,17 +428,15 @@ ipsec_itf_create_cli (vlib_main_t * vm,
*
* @cliexpar
* The following two command syntaxes are equivalent:
- * @cliexcmd{ipsec itf create [instance <instance>]}
+ * @cliexcmd{ipsec itf create [instance <instance>] [p2mp]}
* Example of how to create a ipsec interface:
* @cliexcmd{ipsec itf create}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_itf_create_command, static) = {
.path = "ipsec itf create",
- .short_help = "ipsec itf create [instance <instance>]",
+ .short_help = "ipsec itf create [instance <instance>] [p2mp]",
.function = ipsec_itf_create_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
ipsec_itf_delete_cli (vlib_main_t * vm,
@@ -458,13 +481,11 @@ ipsec_itf_delete_cli (vlib_main_t * vm,
* Example of how to create a ipsec_itf interface:
* @cliexcmd{ipsec itf delete ipsec0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_itf_delete_command, static) = {
.path = "ipsec itf delete",
.short_help = "ipsec itf delete <interface>",
.function = ipsec_itf_delete_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
ipsec_interface_show (vlib_main_t * vm,
@@ -472,12 +493,10 @@ ipsec_interface_show (vlib_main_t * vm,
{
index_t ii;
- /* *INDENT-OFF* */
pool_foreach_index (ii, ipsec_itf_pool)
{
vlib_cli_output (vm, "%U", format_ipsec_itf, ii);
}
- /* *INDENT-ON* */
return NULL;
}
@@ -485,14 +504,12 @@ ipsec_interface_show (vlib_main_t * vm,
/**
* show IPSEC tunnel protection hash tables
*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_interface_show_node, static) =
{
.path = "show ipsec interface",
.function = ipsec_interface_show,
.short_help = "show ipsec interface",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ipsec/ipsec_itf.h b/src/vnet/ipsec/ipsec_itf.h
index 4958d102b65..bf13096ed8f 100644
--- a/src/vnet/ipsec/ipsec_itf.h
+++ b/src/vnet/ipsec/ipsec_itf.h
@@ -102,6 +102,7 @@ typedef struct ipsec_itf_t_
extern int ipsec_itf_create (u32 user_instance,
tunnel_mode_t mode, u32 * sw_if_indexp);
extern int ipsec_itf_delete (u32 sw_if_index);
+extern void ipsec_itf_reset_tx_nodes (u32 sw_if_index);
extern void ipsec_itf_adj_stack (adj_index_t ai, u32 sai);
extern void ipsec_itf_adj_unstack (adj_index_t ai);
@@ -109,6 +110,7 @@ extern void ipsec_itf_adj_unstack (adj_index_t ai);
extern u8 *format_ipsec_itf (u8 * s, va_list * a);
extern ipsec_itf_t *ipsec_itf_get (index_t ii);
+extern u32 ipsec_itf_count (void);
typedef walk_rc_t (*ipsec_itf_walk_cb_t) (ipsec_itf_t *itf, void *ctx);
extern void ipsec_itf_walk (ipsec_itf_walk_cb_t cd, void *ctx);
diff --git a/src/vnet/ipsec/ipsec_output.c b/src/vnet/ipsec/ipsec_output.c
index 84927debaca..787da9359e0 100644
--- a/src/vnet/ipsec/ipsec_output.c
+++ b/src/vnet/ipsec/ipsec_output.c
@@ -21,6 +21,7 @@
#include <vnet/ipsec/ipsec.h>
#include <vnet/ipsec/ipsec_io.h>
+#include <vnet/ipsec/ipsec_output.h>
#define foreach_ipsec_output_error \
_(RX_PKTS, "IPSec pkts received") \
@@ -63,207 +64,6 @@ format_ipsec_output_trace (u8 * s, va_list * args)
return s;
}
-always_inline void
-ipsec4_out_spd_add_flow_cache_entry (ipsec_main_t *im, u8 pr, u32 la, u32 ra,
- u16 lp, u16 rp, u32 pol_id)
-{
- u64 hash;
- u8 overwrite = 0, stale_overwrite = 0;
- ipsec4_spd_5tuple_t ip4_5tuple = { .ip4_addr = { (ip4_address_t) la,
- (ip4_address_t) ra },
- .port = { lp, rp },
- .proto = pr };
-
- ip4_5tuple.kv_16_8.value = (((u64) pol_id) << 32) | ((u64) im->epoch_count);
-
- hash = ipsec4_hash_16_8 (&ip4_5tuple.kv_16_8);
- hash &= (im->ipsec4_out_spd_hash_num_buckets - 1);
-
- ipsec_spinlock_lock (&im->ipsec4_out_spd_hash_tbl[hash].bucket_lock);
- /* Check if we are overwriting an existing entry so we know
- whether to increment the flow cache counter. Since flow
- cache counter is reset on any policy add/remove, but
- hash table values are not, we also need to check if the entry
- we are overwriting is stale or not. If it's a stale entry
- overwrite, we still want to increment flow cache counter */
- overwrite = (im->ipsec4_out_spd_hash_tbl[hash].value != 0);
- /* Check for stale entry by comparing with current epoch count */
- if (PREDICT_FALSE (overwrite))
- stale_overwrite =
- (im->epoch_count !=
- ((u32) (im->ipsec4_out_spd_hash_tbl[hash].value & 0xFFFFFFFF)));
- clib_memcpy_fast (&im->ipsec4_out_spd_hash_tbl[hash], &ip4_5tuple.kv_16_8,
- sizeof (ip4_5tuple.kv_16_8));
- ipsec_spinlock_unlock (&im->ipsec4_out_spd_hash_tbl[hash].bucket_lock);
-
- /* Increment the counter to track active flow cache entries
- when entering a fresh entry or overwriting a stale one */
- if (!overwrite || stale_overwrite)
- clib_atomic_fetch_add_relax (&im->ipsec4_out_spd_flow_cache_entries, 1);
-
- return;
-}
-
-always_inline ipsec_policy_t *
-ipsec4_out_spd_find_flow_cache_entry (ipsec_main_t *im, u8 pr, u32 la, u32 ra,
- u16 lp, u16 rp)
-{
- ipsec_policy_t *p = NULL;
- ipsec4_hash_kv_16_8_t kv_result;
- u64 hash;
-
- if (PREDICT_FALSE ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP) &&
- (pr != IP_PROTOCOL_SCTP)))
- {
- lp = 0;
- rp = 0;
- }
- ipsec4_spd_5tuple_t ip4_5tuple = { .ip4_addr = { (ip4_address_t) la,
- (ip4_address_t) ra },
- .port = { lp, rp },
- .proto = pr };
-
- hash = ipsec4_hash_16_8 (&ip4_5tuple.kv_16_8);
- hash &= (im->ipsec4_out_spd_hash_num_buckets - 1);
-
- ipsec_spinlock_lock (&im->ipsec4_out_spd_hash_tbl[hash].bucket_lock);
- kv_result = im->ipsec4_out_spd_hash_tbl[hash];
- ipsec_spinlock_unlock (&im->ipsec4_out_spd_hash_tbl[hash].bucket_lock);
-
- if (ipsec4_hash_key_compare_16_8 ((u64 *) &ip4_5tuple.kv_16_8,
- (u64 *) &kv_result))
- {
- if (im->epoch_count == ((u32) (kv_result.value & 0xFFFFFFFF)))
- {
- /* Get the policy based on the index */
- p =
- pool_elt_at_index (im->policies, ((u32) (kv_result.value >> 32)));
- }
- }
-
- return p;
-}
-
-always_inline ipsec_policy_t *
-ipsec_output_policy_match (ipsec_spd_t *spd, u8 pr, u32 la, u32 ra, u16 lp,
- u16 rp, u8 flow_cache_enabled)
-{
- ipsec_main_t *im = &ipsec_main;
- ipsec_policy_t *p;
- u32 *i;
-
- if (!spd)
- return 0;
-
- vec_foreach (i, spd->policies[IPSEC_SPD_POLICY_IP4_OUTBOUND])
- {
- p = pool_elt_at_index (im->policies, *i);
- if (PREDICT_FALSE (p->protocol && (p->protocol != pr)))
- continue;
-
- if (ra < clib_net_to_host_u32 (p->raddr.start.ip4.as_u32))
- continue;
-
- if (ra > clib_net_to_host_u32 (p->raddr.stop.ip4.as_u32))
- continue;
-
- if (la < clib_net_to_host_u32 (p->laddr.start.ip4.as_u32))
- continue;
-
- if (la > clib_net_to_host_u32 (p->laddr.stop.ip4.as_u32))
- continue;
-
- if (PREDICT_FALSE ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP) &&
- (pr != IP_PROTOCOL_SCTP)))
- {
- lp = 0;
- rp = 0;
- goto add_flow_cache;
- }
-
- if (lp < p->lport.start)
- continue;
-
- if (lp > p->lport.stop)
- continue;
-
- if (rp < p->rport.start)
- continue;
-
- if (rp > p->rport.stop)
- continue;
-
- add_flow_cache:
- if (flow_cache_enabled)
- {
- /* Add an Entry in Flow cache */
- ipsec4_out_spd_add_flow_cache_entry (
- im, pr, clib_host_to_net_u32 (la), clib_host_to_net_u32 (ra),
- clib_host_to_net_u16 (lp), clib_host_to_net_u16 (rp), *i);
- }
-
- return p;
- }
- return 0;
-}
-
-always_inline uword
-ip6_addr_match_range (ip6_address_t * a, ip6_address_t * la,
- ip6_address_t * ua)
-{
- if ((memcmp (a->as_u64, la->as_u64, 2 * sizeof (u64)) >= 0) &&
- (memcmp (a->as_u64, ua->as_u64, 2 * sizeof (u64)) <= 0))
- return 1;
- return 0;
-}
-
-always_inline ipsec_policy_t *
-ipsec6_output_policy_match (ipsec_spd_t * spd,
- ip6_address_t * la,
- ip6_address_t * ra, u16 lp, u16 rp, u8 pr)
-{
- ipsec_main_t *im = &ipsec_main;
- ipsec_policy_t *p;
- u32 *i;
-
- if (!spd)
- return 0;
-
- vec_foreach (i, spd->policies[IPSEC_SPD_POLICY_IP6_OUTBOUND])
- {
- p = pool_elt_at_index (im->policies, *i);
- if (PREDICT_FALSE (p->protocol && (p->protocol != pr)))
- continue;
-
- if (!ip6_addr_match_range (ra, &p->raddr.start.ip6, &p->raddr.stop.ip6))
- continue;
-
- if (!ip6_addr_match_range (la, &p->laddr.start.ip6, &p->laddr.stop.ip6))
- continue;
-
- if (PREDICT_FALSE
- ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP)
- && (pr != IP_PROTOCOL_SCTP)))
- return p;
-
- if (lp < p->lport.start)
- continue;
-
- if (lp > p->lport.stop)
- continue;
-
- if (rp < p->rport.start)
- continue;
-
- if (rp > p->rport.stop)
- continue;
-
- return p;
- }
-
- return 0;
-}
-
static inline uword
ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * from_frame, int is_ipv6)
@@ -278,7 +78,7 @@ ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
ipsec_spd_t *spd0 = 0;
int bogus;
u64 nc_protect = 0, nc_bypass = 0, nc_discard = 0, nc_nomatch = 0;
- u8 flow_cache_enabled = im->flow_cache_flag;
+ u8 flow_cache_enabled = im->output_flow_cache_flag;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
@@ -535,7 +335,6 @@ VLIB_NODE_FN (ipsec4_output_node) (vlib_main_t * vm,
return ipsec_output_inline (vm, node, frame, 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ipsec4_output_node) = {
.name = "ipsec4-output-feature",
.vector_size = sizeof (u32),
@@ -552,7 +351,6 @@ VLIB_REGISTER_NODE (ipsec4_output_node) = {
#undef _
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ipsec6_output_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
diff --git a/src/vnet/ipsec/ipsec_output.h b/src/vnet/ipsec/ipsec_output.h
new file mode 100644
index 00000000000..30f4ebedeb7
--- /dev/null
+++ b/src/vnet/ipsec/ipsec_output.h
@@ -0,0 +1,489 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2021 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef IPSEC_OUTPUT_H
+#define IPSEC_OUTPUT_H
+
+#include <vppinfra/types.h>
+#include <vnet/ipsec/ipsec_spd.h>
+#include <vnet/ipsec/ipsec_spd_fp_lookup.h>
+
+always_inline void
+ipsec4_out_spd_add_flow_cache_entry (ipsec_main_t *im, u8 pr, u32 la, u32 ra,
+ u16 lp, u16 rp, u32 pol_id)
+{
+ u64 hash;
+ u8 overwrite = 0, stale_overwrite = 0;
+ ipsec4_spd_5tuple_t ip4_5tuple = { .ip4_addr = { (ip4_address_t) la,
+ (ip4_address_t) ra },
+ .port = { lp, rp },
+ .proto = pr };
+
+ ip4_5tuple.kv_16_8.value = (((u64) pol_id) << 32) | ((u64) im->epoch_count);
+
+ hash = ipsec4_hash_16_8 (&ip4_5tuple.kv_16_8);
+ hash &= (im->ipsec4_out_spd_hash_num_buckets - 1);
+
+ ipsec_spinlock_lock (&im->ipsec4_out_spd_hash_tbl[hash].bucket_lock);
+ /* Check if we are overwriting an existing entry so we know
+ whether to increment the flow cache counter. Since flow
+ cache counter is reset on any policy add/remove, but
+ hash table values are not, we also need to check if the entry
+ we are overwriting is stale or not. If it's a stale entry
+ overwrite, we still want to increment flow cache counter */
+ overwrite = (im->ipsec4_out_spd_hash_tbl[hash].value != 0);
+ /* Check for stale entry by comparing with current epoch count */
+ if (PREDICT_FALSE (overwrite))
+ stale_overwrite =
+ (im->epoch_count !=
+ ((u32) (im->ipsec4_out_spd_hash_tbl[hash].value & 0xFFFFFFFF)));
+ clib_memcpy_fast (&im->ipsec4_out_spd_hash_tbl[hash], &ip4_5tuple.kv_16_8,
+ sizeof (ip4_5tuple.kv_16_8));
+ ipsec_spinlock_unlock (&im->ipsec4_out_spd_hash_tbl[hash].bucket_lock);
+
+ /* Increment the counter to track active flow cache entries
+ when entering a fresh entry or overwriting a stale one */
+ if (!overwrite || stale_overwrite)
+ clib_atomic_fetch_add_relax (&im->ipsec4_out_spd_flow_cache_entries, 1);
+
+ return;
+}
+
+always_inline void
+ipsec4_out_spd_add_flow_cache_entry_n (ipsec_main_t *im,
+ ipsec4_spd_5tuple_t *ip4_5tuple,
+ u32 pol_id)
+{
+ u64 hash;
+ u8 overwrite = 0, stale_overwrite = 0;
+
+ ip4_5tuple->kv_16_8.value = (((u64) pol_id) << 32) | ((u64) im->epoch_count);
+
+ hash = ipsec4_hash_16_8 (&ip4_5tuple->kv_16_8);
+ hash &= (im->ipsec4_out_spd_hash_num_buckets - 1);
+
+ ipsec_spinlock_lock (&im->ipsec4_out_spd_hash_tbl[hash].bucket_lock);
+ /* Check if we are overwriting an existing entry so we know
+ whether to increment the flow cache counter. Since flow
+ cache counter is reset on any policy add/remove, but
+ hash table values are not, we also need to check if the entry
+ we are overwriting is stale or not. If it's a stale entry
+ overwrite, we still want to increment flow cache counter */
+ overwrite = (im->ipsec4_out_spd_hash_tbl[hash].value != 0);
+ /* Check for stale entry by comparing with current epoch count */
+ if (PREDICT_FALSE (overwrite))
+ stale_overwrite =
+ (im->epoch_count !=
+ ((u32) (im->ipsec4_out_spd_hash_tbl[hash].value & 0xFFFFFFFF)));
+ clib_memcpy_fast (&im->ipsec4_out_spd_hash_tbl[hash], &ip4_5tuple->kv_16_8,
+ sizeof (ip4_5tuple->kv_16_8));
+ ipsec_spinlock_unlock (&im->ipsec4_out_spd_hash_tbl[hash].bucket_lock);
+
+ /* Increment the counter to track active flow cache entries
+ when entering a fresh entry or overwriting a stale one */
+ if (!overwrite || stale_overwrite)
+ clib_atomic_fetch_add_relax (&im->ipsec4_out_spd_flow_cache_entries, 1);
+
+ return;
+}
+
+always_inline void
+ipsec_fp_5tuple_from_ip4_range (ipsec_fp_5tuple_t *tuple, u32 la, u32 ra,
+ u16 lp, u16 rp, u8 pr)
+{
+ clib_memset (tuple->l3_zero_pad, 0, sizeof (tuple->l3_zero_pad));
+ tuple->laddr.as_u32 = clib_host_to_net_u32 (la);
+ tuple->raddr.as_u32 = clib_host_to_net_u32 (ra);
+
+ if (PREDICT_FALSE ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP) &&
+ (pr != IP_PROTOCOL_SCTP)))
+ {
+ tuple->lport = 0;
+ tuple->rport = 0;
+ }
+ else
+ {
+ tuple->lport = lp;
+ tuple->rport = rp;
+ }
+
+ tuple->protocol = pr;
+ tuple->is_ipv6 = 0;
+}
+
+always_inline void
+ipsec_fp_5tuple_from_ip4_range_n (ipsec_fp_5tuple_t *tuples,
+ ipsec4_spd_5tuple_t *ip4_5tuple, u32 n)
+{
+ u32 n_left = n;
+ ipsec_fp_5tuple_t *tuple = tuples;
+
+ while (n_left)
+ {
+ clib_memset (tuple->l3_zero_pad, 0, sizeof (tuple->l3_zero_pad));
+ tuple->laddr.as_u32 =
+ clib_host_to_net_u32 (ip4_5tuple->ip4_addr[0].as_u32);
+ tuple->raddr.as_u32 =
+ clib_host_to_net_u32 (ip4_5tuple->ip4_addr[1].as_u32);
+ if (PREDICT_FALSE ((ip4_5tuple->proto != IP_PROTOCOL_TCP) &&
+ (ip4_5tuple->proto != IP_PROTOCOL_UDP) &&
+ (ip4_5tuple->proto != IP_PROTOCOL_SCTP)))
+ {
+ tuple->lport = 0;
+ tuple->rport = 0;
+ }
+ else
+ {
+ tuple->lport = ip4_5tuple->port[0];
+ tuple->rport = ip4_5tuple->port[1];
+ }
+ tuple->protocol = ip4_5tuple->proto;
+ tuple->is_ipv6 = 0;
+ n_left--;
+ tuple++;
+ }
+}
+
+always_inline int
+ipsec_output_policy_match_n (ipsec_spd_t *spd,
+ ipsec4_spd_5tuple_t *ip4_5tuples,
+ ipsec_policy_t **policies, u32 n,
+ u8 flow_cache_enabled)
+{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_policy_t *p;
+ ipsec_policy_t **pp = policies;
+ u32 n_left = n;
+ ipsec4_spd_5tuple_t *ip4_5tuple = ip4_5tuples;
+ u32 policy_ids[n], *policy_id = policy_ids;
+ ipsec_fp_5tuple_t tuples[n];
+ u32 *i;
+ u32 counter = 0;
+
+ if (!spd)
+ return 0;
+
+ clib_memset (policies, 0, n * sizeof (ipsec_policy_t *));
+
+ if (im->fp_spd_ipv4_out_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd->fp_spd.ip4_out_lookup_hash_idx))
+ {
+ ipsec_fp_5tuple_from_ip4_range_n (tuples, ip4_5tuples, n);
+ counter += ipsec_fp_out_policy_match_n (&spd->fp_spd, 0, tuples,
+ policies, policy_ids, n);
+ }
+
+ while (n_left)
+ {
+ if (*pp != 0)
+ goto next;
+
+ vec_foreach (i, spd->policies[IPSEC_SPD_POLICY_IP4_OUTBOUND])
+ {
+ p = pool_elt_at_index (im->policies, *i);
+ if (PREDICT_FALSE (p->protocol &&
+ (p->protocol != ip4_5tuple->proto)))
+ continue;
+
+ if (ip4_5tuple->ip4_addr[0].as_u32 <
+ clib_net_to_host_u32 (p->raddr.start.ip4.as_u32))
+ continue;
+
+ if (ip4_5tuple->ip4_addr[1].as_u32 >
+ clib_net_to_host_u32 (p->raddr.stop.ip4.as_u32))
+ continue;
+
+ if (ip4_5tuple->ip4_addr[0].as_u32 <
+ clib_net_to_host_u32 (p->laddr.start.ip4.as_u32))
+ continue;
+
+ if (ip4_5tuple->ip4_addr[1].as_u32 >
+ clib_net_to_host_u32 (p->laddr.stop.ip4.as_u32))
+ continue;
+
+ if (PREDICT_FALSE ((ip4_5tuple->proto != IP_PROTOCOL_TCP) &&
+ (ip4_5tuple->proto != IP_PROTOCOL_UDP) &&
+ (ip4_5tuple->proto != IP_PROTOCOL_SCTP)))
+ {
+ ip4_5tuple->port[0] = 0;
+ ip4_5tuple->port[1] = 0;
+ goto add_policy;
+ }
+
+ if (ip4_5tuple->port[0] < p->lport.start)
+ continue;
+
+ if (ip4_5tuple->port[0] > p->lport.stop)
+ continue;
+
+ if (ip4_5tuple->port[1] < p->rport.start)
+ continue;
+
+ if (ip4_5tuple->port[1] > p->rport.stop)
+ continue;
+
+ add_policy:
+ *pp = p;
+ *policy_id = *i;
+ counter++;
+ break;
+ }
+
+ next:
+ n_left--;
+ pp++;
+ ip4_5tuple++;
+ policy_id++;
+ }
+
+ if (flow_cache_enabled)
+ {
+ n_left = n;
+ policy_id = policy_ids;
+ ip4_5tuple = ip4_5tuples;
+ pp = policies;
+
+ while (n_left)
+ {
+ if (*pp != NULL)
+ {
+ /* Add an Entry in Flow cache */
+ ipsec4_out_spd_add_flow_cache_entry_n (im, ip4_5tuple,
+ *policy_id);
+ }
+
+ n_left--;
+ policy_id++;
+ ip4_5tuple++;
+ pp++;
+ }
+ }
+
+ return counter;
+}
+
+always_inline ipsec_policy_t *
+ipsec4_out_spd_find_flow_cache_entry (ipsec_main_t *im, u8 pr, u32 la, u32 ra,
+ u16 lp, u16 rp)
+{
+ ipsec_policy_t *p = NULL;
+ ipsec4_hash_kv_16_8_t kv_result;
+ u64 hash;
+
+ if (PREDICT_FALSE ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP) &&
+ (pr != IP_PROTOCOL_SCTP)))
+ {
+ lp = 0;
+ rp = 0;
+ }
+ ipsec4_spd_5tuple_t ip4_5tuple = { .ip4_addr = { (ip4_address_t) la,
+ (ip4_address_t) ra },
+ .port = { lp, rp },
+ .proto = pr };
+
+ hash = ipsec4_hash_16_8 (&ip4_5tuple.kv_16_8);
+ hash &= (im->ipsec4_out_spd_hash_num_buckets - 1);
+
+ ipsec_spinlock_lock (&im->ipsec4_out_spd_hash_tbl[hash].bucket_lock);
+ kv_result = im->ipsec4_out_spd_hash_tbl[hash];
+ ipsec_spinlock_unlock (&im->ipsec4_out_spd_hash_tbl[hash].bucket_lock);
+
+ if (ipsec4_hash_key_compare_16_8 ((u64 *) &ip4_5tuple.kv_16_8,
+ (u64 *) &kv_result))
+ {
+ if (im->epoch_count == ((u32) (kv_result.value & 0xFFFFFFFF)))
+ {
+ /* Get the policy based on the index */
+ p =
+ pool_elt_at_index (im->policies, ((u32) (kv_result.value >> 32)));
+ }
+ }
+
+ return p;
+}
+
+always_inline ipsec_policy_t *
+ipsec_output_policy_match (ipsec_spd_t *spd, u8 pr, u32 la, u32 ra, u16 lp,
+ u16 rp, u8 flow_cache_enabled)
+{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_policy_t *p;
+ ipsec_policy_t *policies[1];
+ ipsec_fp_5tuple_t tuples[1];
+ u32 fp_policy_ids[1];
+
+ u32 *i;
+
+ if (!spd)
+ return 0;
+
+ if (im->fp_spd_ipv4_out_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd->fp_spd.ip4_out_lookup_hash_idx))
+ {
+ ipsec_fp_5tuple_from_ip4_range (&tuples[0], la, ra, lp, rp, pr);
+ ipsec_fp_out_policy_match_n (&spd->fp_spd, 0, tuples, policies,
+ fp_policy_ids, 1);
+ p = policies[0];
+ i = fp_policy_ids;
+ if (PREDICT_FALSE ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP) &&
+ (pr != IP_PROTOCOL_SCTP)))
+ {
+ lp = 0;
+ rp = 0;
+ }
+ goto add_flow_cache;
+ }
+
+ vec_foreach (i, spd->policies[IPSEC_SPD_POLICY_IP4_OUTBOUND])
+ {
+ p = pool_elt_at_index (im->policies, *i);
+ if (PREDICT_FALSE ((p->protocol != IPSEC_POLICY_PROTOCOL_ANY) &&
+ (p->protocol != pr)))
+ continue;
+
+ if (ra < clib_net_to_host_u32 (p->raddr.start.ip4.as_u32))
+ continue;
+
+ if (ra > clib_net_to_host_u32 (p->raddr.stop.ip4.as_u32))
+ continue;
+
+ if (la < clib_net_to_host_u32 (p->laddr.start.ip4.as_u32))
+ continue;
+
+ if (la > clib_net_to_host_u32 (p->laddr.stop.ip4.as_u32))
+ continue;
+
+ if (PREDICT_FALSE ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP) &&
+ (pr != IP_PROTOCOL_SCTP)))
+ {
+ lp = 0;
+ rp = 0;
+ goto add_flow_cache;
+ }
+
+ if (lp < p->lport.start)
+ continue;
+
+ if (lp > p->lport.stop)
+ continue;
+
+ if (rp < p->rport.start)
+ continue;
+
+ if (rp > p->rport.stop)
+ continue;
+
+ add_flow_cache:
+ if (flow_cache_enabled)
+ {
+ /* Add an Entry in Flow cache */
+ ipsec4_out_spd_add_flow_cache_entry (
+ im, pr, clib_host_to_net_u32 (la), clib_host_to_net_u32 (ra),
+ clib_host_to_net_u16 (lp), clib_host_to_net_u16 (rp), *i);
+ }
+
+ return p;
+ }
+ return 0;
+}
+
+always_inline uword
+ip6_addr_match_range (ip6_address_t *a, ip6_address_t *la, ip6_address_t *ua)
+{
+ if ((memcmp (a->as_u64, la->as_u64, 2 * sizeof (u64)) >= 0) &&
+ (memcmp (a->as_u64, ua->as_u64, 2 * sizeof (u64)) <= 0))
+ return 1;
+ return 0;
+}
+
+always_inline void
+ipsec_fp_5tuple_from_ip6_range (ipsec_fp_5tuple_t *tuple, ip6_address_t *la,
+ ip6_address_t *ra, u16 lp, u16 rp, u8 pr)
+
+{
+ clib_memcpy (&tuple->ip6_laddr, la, sizeof (ip6_address_t));
+ clib_memcpy (&tuple->ip6_raddr, ra, sizeof (ip6_address_t));
+
+ tuple->lport = lp;
+ tuple->rport = rp;
+ tuple->protocol = pr;
+ tuple->is_ipv6 = 1;
+}
+
+always_inline ipsec_policy_t *
+ipsec6_output_policy_match (ipsec_spd_t *spd, ip6_address_t *la,
+ ip6_address_t *ra, u16 lp, u16 rp, u8 pr)
+{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_policy_t *p;
+ ipsec_policy_t *policies[1];
+ ipsec_fp_5tuple_t tuples[1];
+ u32 fp_policy_ids[1];
+
+ u32 *i;
+
+ if (!spd)
+ return 0;
+
+ if (im->fp_spd_ipv6_out_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd->fp_spd.ip6_out_lookup_hash_idx))
+ {
+
+ ipsec_fp_5tuple_from_ip6_range (&tuples[0], la, ra, lp, rp, pr);
+ ipsec_fp_out_policy_match_n (&spd->fp_spd, 1, tuples, policies,
+ fp_policy_ids, 1);
+ p = policies[0];
+ i = fp_policy_ids;
+ return p;
+ }
+
+ vec_foreach (i, spd->policies[IPSEC_SPD_POLICY_IP6_OUTBOUND])
+ {
+ p = pool_elt_at_index (im->policies, *i);
+ if (PREDICT_FALSE ((p->protocol != IPSEC_POLICY_PROTOCOL_ANY) &&
+ (p->protocol != pr)))
+ continue;
+
+ if (!ip6_addr_match_range (ra, &p->raddr.start.ip6, &p->raddr.stop.ip6))
+ continue;
+
+ if (!ip6_addr_match_range (la, &p->laddr.start.ip6, &p->laddr.stop.ip6))
+ continue;
+
+ if (PREDICT_FALSE ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP) &&
+ (pr != IP_PROTOCOL_SCTP)))
+ return p;
+
+ if (lp < p->lport.start)
+ continue;
+
+ if (lp > p->lport.stop)
+ continue;
+
+ if (rp < p->rport.start)
+ continue;
+
+ if (rp > p->rport.stop)
+ continue;
+
+ return p;
+ }
+
+ return 0;
+}
+
+#endif /* !IPSEC_OUTPUT_H */
diff --git a/src/vnet/ipsec/ipsec_punt.h b/src/vnet/ipsec/ipsec_punt.h
index afed908bffb..9b9fc803391 100644
--- a/src/vnet/ipsec/ipsec_punt.h
+++ b/src/vnet/ipsec/ipsec_punt.h
@@ -20,7 +20,8 @@
#define foreach_ipsec_punt_reason \
_ (IP4_SPI_UDP_0, "ipsec4-spi-o-udp-0", IP4_PACKET) \
_ (IP4_NO_SUCH_TUNNEL, "ipsec4-no-such-tunnel", IP4_PACKET) \
- _ (IP6_NO_SUCH_TUNNEL, "ipsec6-no-such-tunnel", IP6_PACKET)
+ _ (IP6_NO_SUCH_TUNNEL, "ipsec6-no-such-tunnel", IP6_PACKET) \
+ _ (IP6_SPI_UDP_0, "ipsec6-spi-o-udp-0", IP6_PACKET)
typedef enum ipsec_punt_reason_t_
{
diff --git a/src/vnet/ipsec/ipsec_sa.c b/src/vnet/ipsec/ipsec_sa.c
index 387d8a747a3..1d5195ec793 100644
--- a/src/vnet/ipsec/ipsec_sa.c
+++ b/src/vnet/ipsec/ipsec_sa.c
@@ -13,12 +13,14 @@
* limitations under the License.
*/
+#include <sys/random.h>
#include <vnet/ipsec/ipsec.h>
#include <vnet/ipsec/esp.h>
#include <vnet/udp/udp_local.h>
#include <vnet/fib/fib_table.h>
#include <vnet/fib/fib_entry_track.h>
#include <vnet/ipsec/ipsec_tun.h>
+#include <vnet/ipsec/ipsec.api_enum.h>
/**
* @brief
@@ -28,10 +30,8 @@ vlib_combined_counter_main_t ipsec_sa_counters = {
.name = "SA",
.stat_segment_name = "/net/ipsec/sa",
};
-vlib_simple_counter_main_t ipsec_sa_lost_counters = {
- .name = "SA-lost",
- .stat_segment_name = "/net/ipsec/sa/lost",
-};
+/* Per-SA error counters */
+vlib_simple_counter_main_t ipsec_sa_err_counters[IPSEC_SA_N_ERRORS];
ipsec_sa_t *ipsec_sa_pool;
@@ -93,18 +93,40 @@ ipsec_sa_stack (ipsec_sa_t * sa)
}
void
+ipsec_sa_set_async_mode (ipsec_sa_t *sa, int is_enabled)
+{
+ if (is_enabled)
+ {
+ sa->crypto_key_index = sa->crypto_async_key_index;
+ sa->crypto_enc_op_id = sa->crypto_async_enc_op_id;
+ sa->crypto_dec_op_id = sa->crypto_async_dec_op_id;
+ sa->integ_key_index = ~0;
+ sa->integ_op_id = ~0;
+ }
+ else
+ {
+ sa->crypto_key_index = sa->crypto_sync_key_index;
+ sa->crypto_enc_op_id = sa->crypto_sync_enc_op_id;
+ sa->crypto_dec_op_id = sa->crypto_sync_dec_op_id;
+ sa->integ_key_index = sa->integ_sync_key_index;
+ sa->integ_op_id = sa->integ_sync_op_id;
+ }
+}
+
+void
ipsec_sa_set_crypto_alg (ipsec_sa_t * sa, ipsec_crypto_alg_t crypto_alg)
{
ipsec_main_t *im = &ipsec_main;
sa->crypto_alg = crypto_alg;
sa->crypto_iv_size = im->crypto_algs[crypto_alg].iv_size;
sa->esp_block_align = clib_max (4, im->crypto_algs[crypto_alg].block_align);
- sa->sync_op_data.crypto_enc_op_id = im->crypto_algs[crypto_alg].enc_op_id;
- sa->sync_op_data.crypto_dec_op_id = im->crypto_algs[crypto_alg].dec_op_id;
+ sa->crypto_sync_enc_op_id = im->crypto_algs[crypto_alg].enc_op_id;
+ sa->crypto_sync_dec_op_id = im->crypto_algs[crypto_alg].dec_op_id;
sa->crypto_calg = im->crypto_algs[crypto_alg].alg;
ASSERT (sa->crypto_iv_size <= ESP_MAX_IV_SIZE);
ASSERT (sa->esp_block_align <= ESP_MAX_BLOCK_SIZE);
- if (IPSEC_CRYPTO_ALG_IS_GCM (crypto_alg))
+ if (IPSEC_CRYPTO_ALG_IS_GCM (crypto_alg) ||
+ IPSEC_CRYPTO_ALG_CTR_AEAD_OTHERS (crypto_alg))
{
sa->integ_icv_size = im->crypto_algs[crypto_alg].icv_size;
ipsec_sa_set_IS_CTR (sa);
@@ -114,6 +136,13 @@ ipsec_sa_set_crypto_alg (ipsec_sa_t * sa, ipsec_crypto_alg_t crypto_alg)
{
ipsec_sa_set_IS_CTR (sa);
}
+ else if (IPSEC_CRYPTO_ALG_IS_NULL_GMAC (crypto_alg))
+ {
+ sa->integ_icv_size = im->crypto_algs[crypto_alg].icv_size;
+ ipsec_sa_set_IS_CTR (sa);
+ ipsec_sa_set_IS_AEAD (sa);
+ ipsec_sa_set_IS_NULL_GMAC (sa);
+ }
}
void
@@ -122,7 +151,7 @@ ipsec_sa_set_integ_alg (ipsec_sa_t * sa, ipsec_integ_alg_t integ_alg)
ipsec_main_t *im = &ipsec_main;
sa->integ_alg = integ_alg;
sa->integ_icv_size = im->integ_algs[integ_alg].icv_size;
- sa->sync_op_data.integ_op_id = im->integ_algs[integ_alg].op_id;
+ sa->integ_sync_op_id = im->integ_algs[integ_alg].op_id;
sa->integ_calg = im->integ_algs[integ_alg].alg;
ASSERT (sa->integ_icv_size <= ESP_MAX_ICV_SIZE);
}
@@ -130,44 +159,167 @@ ipsec_sa_set_integ_alg (ipsec_sa_t * sa, ipsec_integ_alg_t integ_alg)
void
ipsec_sa_set_async_op_ids (ipsec_sa_t * sa)
{
- /* *INDENT-OFF* */
if (ipsec_sa_is_set_USE_ESN (sa))
{
-#define _(n, s, k) \
- if( sa->sync_op_data.crypto_enc_op_id == VNET_CRYPTO_OP_##n##_ENC ) \
- sa->async_op_data.crypto_async_enc_op_id = \
- VNET_CRYPTO_OP_##n##_TAG16_AAD12_ENC; \
- if( sa->sync_op_data.crypto_dec_op_id == VNET_CRYPTO_OP_##n##_DEC ) \
- sa->async_op_data.crypto_async_dec_op_id = \
- VNET_CRYPTO_OP_##n##_TAG16_AAD12_DEC;
- foreach_crypto_aead_alg
+#define _(n, s, k) \
+ if (sa->crypto_sync_enc_op_id == VNET_CRYPTO_OP_##n##_ENC) \
+ sa->crypto_async_enc_op_id = VNET_CRYPTO_OP_##n##_TAG16_AAD12_ENC; \
+ if (sa->crypto_sync_dec_op_id == VNET_CRYPTO_OP_##n##_DEC) \
+ sa->crypto_async_dec_op_id = VNET_CRYPTO_OP_##n##_TAG16_AAD12_DEC;
+ foreach_crypto_aead_alg
#undef _
}
else
{
-#define _(n, s, k) \
- if( sa->sync_op_data.crypto_enc_op_id == VNET_CRYPTO_OP_##n##_ENC ) \
- sa->async_op_data.crypto_async_enc_op_id = \
- VNET_CRYPTO_OP_##n##_TAG16_AAD8_ENC; \
- if( sa->sync_op_data.crypto_dec_op_id == VNET_CRYPTO_OP_##n##_DEC ) \
- sa->async_op_data.crypto_async_dec_op_id = \
- VNET_CRYPTO_OP_##n##_TAG16_AAD8_DEC;
- foreach_crypto_aead_alg
+#define _(n, s, k) \
+ if (sa->crypto_sync_enc_op_id == VNET_CRYPTO_OP_##n##_ENC) \
+ sa->crypto_async_enc_op_id = VNET_CRYPTO_OP_##n##_TAG16_AAD8_ENC; \
+ if (sa->crypto_sync_dec_op_id == VNET_CRYPTO_OP_##n##_DEC) \
+ sa->crypto_async_dec_op_id = VNET_CRYPTO_OP_##n##_TAG16_AAD8_DEC;
+ foreach_crypto_aead_alg
#undef _
}
-#define _(c, h, s, k ,d) \
- if( sa->sync_op_data.crypto_enc_op_id == VNET_CRYPTO_OP_##c##_ENC && \
- sa->sync_op_data.integ_op_id == VNET_CRYPTO_OP_##h##_HMAC) \
- sa->async_op_data.crypto_async_enc_op_id = \
- VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC; \
- if( sa->sync_op_data.crypto_dec_op_id == VNET_CRYPTO_OP_##c##_DEC && \
- sa->sync_op_data.integ_op_id == VNET_CRYPTO_OP_##h##_HMAC) \
- sa->async_op_data.crypto_async_dec_op_id = \
- VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC;
+#define _(c, h, s, k, d) \
+ if (sa->crypto_sync_enc_op_id == VNET_CRYPTO_OP_##c##_ENC && \
+ sa->integ_sync_op_id == VNET_CRYPTO_OP_##h##_HMAC) \
+ sa->crypto_async_enc_op_id = VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC; \
+ if (sa->crypto_sync_dec_op_id == VNET_CRYPTO_OP_##c##_DEC && \
+ sa->integ_sync_op_id == VNET_CRYPTO_OP_##h##_HMAC) \
+ sa->crypto_async_dec_op_id = VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC;
foreach_crypto_link_async_alg
#undef _
- /* *INDENT-ON* */
+}
+
+int
+ipsec_sa_update (u32 id, u16 src_port, u16 dst_port, const tunnel_t *tun,
+ bool is_tun)
+{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_sa_t *sa;
+ u32 sa_index;
+ uword *p;
+ int rv;
+
+ p = hash_get (im->sa_index_by_sa_id, id);
+ if (!p)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ sa = ipsec_sa_get (p[0]);
+ sa_index = sa - ipsec_sa_pool;
+
+ if (is_tun && ipsec_sa_is_set_IS_TUNNEL (sa) &&
+ (ip_address_cmp (&tun->t_src, &sa->tunnel.t_src) != 0 ||
+ ip_address_cmp (&tun->t_dst, &sa->tunnel.t_dst) != 0))
+ {
+ /* if the source IP is updated for an inbound SA under a tunnel protect,
+ we need to update the tun_protect DB with the new src IP */
+ if (ipsec_sa_is_set_IS_INBOUND (sa) &&
+ ip_address_cmp (&tun->t_src, &sa->tunnel.t_src) != 0 &&
+ !ip46_address_is_zero (&tun->t_src.ip))
+ {
+ if (ip46_address_is_ip4 (&sa->tunnel.t_src.ip))
+ {
+ ipsec4_tunnel_kv_t old_key, new_key;
+ clib_bihash_kv_8_16_t res,
+ *bkey = (clib_bihash_kv_8_16_t *) &old_key;
+
+ ipsec4_tunnel_mk_key (&old_key, &sa->tunnel.t_src.ip.ip4,
+ clib_host_to_net_u32 (sa->spi));
+ ipsec4_tunnel_mk_key (&new_key, &tun->t_src.ip.ip4,
+ clib_host_to_net_u32 (sa->spi));
+
+ if (!clib_bihash_search_8_16 (&im->tun4_protect_by_key, bkey,
+ &res))
+ {
+ clib_bihash_add_del_8_16 (&im->tun4_protect_by_key, &res, 0);
+ res.key = new_key.key;
+ clib_bihash_add_del_8_16 (&im->tun4_protect_by_key, &res, 1);
+ }
+ }
+ else
+ {
+ ipsec6_tunnel_kv_t old_key = {
+ .key = {
+ .remote_ip = sa->tunnel.t_src.ip.ip6,
+ .spi = clib_host_to_net_u32 (sa->spi),
+ },
+ }, new_key = {
+ .key = {
+ .remote_ip = tun->t_src.ip.ip6,
+ .spi = clib_host_to_net_u32 (sa->spi),
+ }};
+ clib_bihash_kv_24_16_t res,
+ *bkey = (clib_bihash_kv_24_16_t *) &old_key;
+
+ if (!clib_bihash_search_24_16 (&im->tun6_protect_by_key, bkey,
+ &res))
+ {
+ clib_bihash_add_del_24_16 (&im->tun6_protect_by_key, &res,
+ 0);
+ clib_memcpy (&res.key, &new_key.key, 3);
+ clib_bihash_add_del_24_16 (&im->tun6_protect_by_key, &res,
+ 1);
+ }
+ }
+ }
+ tunnel_unresolve (&sa->tunnel);
+ tunnel_copy (tun, &sa->tunnel);
+ if (!ipsec_sa_is_set_IS_INBOUND (sa))
+ {
+ dpo_reset (&sa->dpo);
+
+ sa->tunnel_flags = sa->tunnel.t_encap_decap_flags;
+
+ rv = tunnel_resolve (&sa->tunnel, FIB_NODE_TYPE_IPSEC_SA, sa_index);
+
+ if (rv)
+ {
+ hash_unset (im->sa_index_by_sa_id, sa->id);
+ pool_put (ipsec_sa_pool, sa);
+ return rv;
+ }
+ ipsec_sa_stack (sa);
+ /* generate header templates */
+ if (ipsec_sa_is_set_IS_TUNNEL_V6 (sa))
+ {
+ tunnel_build_v6_hdr (&sa->tunnel,
+ (ipsec_sa_is_set_UDP_ENCAP (sa) ?
+ IP_PROTOCOL_UDP :
+ IP_PROTOCOL_IPSEC_ESP),
+ &sa->ip6_hdr);
+ }
+ else
+ {
+ tunnel_build_v4_hdr (&sa->tunnel,
+ (ipsec_sa_is_set_UDP_ENCAP (sa) ?
+ IP_PROTOCOL_UDP :
+ IP_PROTOCOL_IPSEC_ESP),
+ &sa->ip4_hdr);
+ }
+ }
+ }
+
+ if (ipsec_sa_is_set_UDP_ENCAP (sa))
+ {
+ if (dst_port != IPSEC_UDP_PORT_NONE &&
+ dst_port != clib_net_to_host_u16 (sa->udp_hdr.dst_port))
+ {
+ if (ipsec_sa_is_set_IS_INBOUND (sa))
+ {
+ ipsec_unregister_udp_port (
+ clib_net_to_host_u16 (sa->udp_hdr.dst_port),
+ !ipsec_sa_is_set_IS_TUNNEL_V6 (sa));
+ ipsec_register_udp_port (dst_port,
+ !ipsec_sa_is_set_IS_TUNNEL_V6 (sa));
+ }
+ sa->udp_hdr.dst_port = clib_host_to_net_u16 (dst_port);
+ }
+ if (src_port != IPSEC_UDP_PORT_NONE &&
+ src_port != clib_net_to_host_u16 (sa->udp_hdr.src_port))
+ sa->udp_hdr.src_port = clib_host_to_net_u16 (src_port);
+ }
+ return (0);
}
int
@@ -175,13 +327,15 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
ipsec_crypto_alg_t crypto_alg, const ipsec_key_t *ck,
ipsec_integ_alg_t integ_alg, const ipsec_key_t *ik,
ipsec_sa_flags_t flags, u32 salt, u16 src_port,
- u16 dst_port, const tunnel_t *tun, u32 *sa_out_index)
+ u16 dst_port, u32 anti_replay_window_size,
+ const tunnel_t *tun, u32 *sa_out_index)
{
vlib_main_t *vm = vlib_get_main ();
ipsec_main_t *im = &ipsec_main;
clib_error_t *err;
ipsec_sa_t *sa;
u32 sa_index;
+ u64 rand[2];
uword *p;
int rv;
@@ -189,16 +343,24 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
if (p)
return VNET_API_ERROR_ENTRY_ALREADY_EXISTS;
+ if (getrandom (rand, sizeof (rand), 0) != sizeof (rand))
+ return VNET_API_ERROR_INIT_FAILED;
+
pool_get_aligned_zero (ipsec_sa_pool, sa, CLIB_CACHE_LINE_BYTES);
+ clib_pcg64i_srandom_r (&sa->iv_prng, rand[0], rand[1]);
+
fib_node_init (&sa->node, FIB_NODE_TYPE_IPSEC_SA);
fib_node_lock (&sa->node);
sa_index = sa - ipsec_sa_pool;
vlib_validate_combined_counter (&ipsec_sa_counters, sa_index);
vlib_zero_combined_counter (&ipsec_sa_counters, sa_index);
- vlib_validate_simple_counter (&ipsec_sa_lost_counters, sa_index);
- vlib_zero_simple_counter (&ipsec_sa_lost_counters, sa_index);
+ for (int i = 0; i < IPSEC_SA_N_ERRORS; i++)
+ {
+ vlib_validate_simple_counter (&ipsec_sa_err_counters[i], sa_index);
+ vlib_zero_simple_counter (&ipsec_sa_err_counters[i], sa_index);
+ }
tunnel_copy (tun, &sa->tunnel);
sa->id = id;
@@ -216,12 +378,14 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
ipsec_sa_set_crypto_alg (sa, crypto_alg);
ipsec_sa_set_async_op_ids (sa);
+ if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && anti_replay_window_size > 64)
+ ipsec_sa_set_ANTI_REPLAY_HUGE (sa);
+
clib_memcpy (&sa->crypto_key, ck, sizeof (sa->crypto_key));
- sa->crypto_key_index = vnet_crypto_key_add (vm,
- im->crypto_algs[crypto_alg].alg,
- (u8 *) ck->data, ck->len);
- if (~0 == sa->crypto_key_index)
+ sa->crypto_sync_key_index = vnet_crypto_key_add (
+ vm, im->crypto_algs[crypto_alg].alg, (u8 *) ck->data, ck->len);
+ if (~0 == sa->crypto_sync_key_index)
{
pool_put (ipsec_sa_pool, sa);
return VNET_API_ERROR_KEY_LENGTH;
@@ -229,42 +393,39 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
if (integ_alg != IPSEC_INTEG_ALG_NONE)
{
- sa->integ_key_index = vnet_crypto_key_add (vm,
- im->
- integ_algs[integ_alg].alg,
- (u8 *) ik->data, ik->len);
- if (~0 == sa->integ_key_index)
+ sa->integ_sync_key_index = vnet_crypto_key_add (
+ vm, im->integ_algs[integ_alg].alg, (u8 *) ik->data, ik->len);
+ if (~0 == sa->integ_sync_key_index)
{
pool_put (ipsec_sa_pool, sa);
return VNET_API_ERROR_KEY_LENGTH;
}
}
- if (sa->async_op_data.crypto_async_enc_op_id &&
- !ipsec_sa_is_set_IS_AEAD (sa))
- { //AES-CBC & HMAC
- sa->async_op_data.linked_key_index =
- vnet_crypto_key_add_linked (vm, sa->crypto_key_index,
- sa->integ_key_index);
- }
+ if (sa->crypto_async_enc_op_id && !ipsec_sa_is_set_IS_AEAD (sa))
+ sa->crypto_async_key_index =
+ vnet_crypto_key_add_linked (vm, sa->crypto_sync_key_index,
+ sa->integ_sync_key_index); // AES-CBC & HMAC
+ else
+ sa->crypto_async_key_index = sa->crypto_sync_key_index;
if (im->async_mode)
- sa->crypto_op_data = sa->async_op_data.data;
+ {
+ ipsec_sa_set_async_mode (sa, 1);
+ }
+ else if (ipsec_sa_is_set_IS_ASYNC (sa))
+ {
+ ipsec_sa_set_async_mode (sa, 1 /* is_enabled */);
+ }
else
{
- if (ipsec_sa_is_set_IS_ASYNC (sa))
- {
- vnet_crypto_request_async_mode (1);
- sa->crypto_op_data = sa->async_op_data.data;
- }
- else
- sa->crypto_op_data = sa->sync_op_data.data;
+ ipsec_sa_set_async_mode (sa, 0 /* is_enabled */);
}
err = ipsec_check_support_cb (im, sa);
if (err)
{
- clib_warning ("%s", err->what);
+ clib_warning ("%v", err->what);
pool_put (ipsec_sa_pool, sa);
return VNET_API_ERROR_UNIMPLEMENTED;
}
@@ -325,7 +486,20 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
sa->udp_hdr.src_port = clib_host_to_net_u16 (src_port);
if (ipsec_sa_is_set_IS_INBOUND (sa))
- ipsec_register_udp_port (clib_host_to_net_u16 (sa->udp_hdr.dst_port));
+ ipsec_register_udp_port (clib_host_to_net_u16 (sa->udp_hdr.dst_port),
+ !ipsec_sa_is_set_IS_TUNNEL_V6 (sa));
+ }
+
+ /* window size rounded up to next power of 2 */
+ if (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa))
+ {
+ anti_replay_window_size = 1 << max_log2 (anti_replay_window_size);
+ sa->replay_window_huge =
+ clib_bitmap_set_region (0, 0, 1, anti_replay_window_size);
+ }
+ else
+ {
+ sa->replay_window = ~0;
}
hash_set (im->sa_index_by_sa_id, sa->id, sa_index);
@@ -351,18 +525,51 @@ ipsec_sa_del (ipsec_sa_t * sa)
(void) ipsec_call_add_del_callbacks (im, sa, sa_index, 0);
if (ipsec_sa_is_set_IS_ASYNC (sa))
- vnet_crypto_request_async_mode (0);
+ {
+ if (!ipsec_sa_is_set_IS_AEAD (sa))
+ vnet_crypto_key_del (vm, sa->crypto_async_key_index);
+ }
+
if (ipsec_sa_is_set_UDP_ENCAP (sa) && ipsec_sa_is_set_IS_INBOUND (sa))
- ipsec_unregister_udp_port (clib_net_to_host_u16 (sa->udp_hdr.dst_port));
+ ipsec_unregister_udp_port (clib_net_to_host_u16 (sa->udp_hdr.dst_port),
+ !ipsec_sa_is_set_IS_TUNNEL_V6 (sa));
if (ipsec_sa_is_set_IS_TUNNEL (sa) && !ipsec_sa_is_set_IS_INBOUND (sa))
dpo_reset (&sa->dpo);
- vnet_crypto_key_del (vm, sa->crypto_key_index);
+ vnet_crypto_key_del (vm, sa->crypto_sync_key_index);
if (sa->integ_alg != IPSEC_INTEG_ALG_NONE)
- vnet_crypto_key_del (vm, sa->integ_key_index);
+ vnet_crypto_key_del (vm, sa->integ_sync_key_index);
+ if (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa))
+ clib_bitmap_free (sa->replay_window_huge);
pool_put (ipsec_sa_pool, sa);
}
+int
+ipsec_sa_bind (u32 id, u32 worker, bool bind)
+{
+ ipsec_main_t *im = &ipsec_main;
+ uword *p;
+ ipsec_sa_t *sa;
+
+ p = hash_get (im->sa_index_by_sa_id, id);
+ if (!p)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ sa = ipsec_sa_get (p[0]);
+
+ if (!bind)
+ {
+ sa->thread_index = ~0;
+ return 0;
+ }
+
+ if (worker >= vlib_num_workers ())
+ return VNET_API_ERROR_INVALID_WORKER;
+
+ sa->thread_index = vlib_get_worker_thread_index (worker);
+ return 0;
+}
+
void
ipsec_sa_unlock (index_t sai)
{
@@ -428,7 +635,8 @@ void
ipsec_sa_clear (index_t sai)
{
vlib_zero_combined_counter (&ipsec_sa_counters, sai);
- vlib_zero_simple_counter (&ipsec_sa_lost_counters, sai);
+ for (int i = 0; i < IPSEC_SA_N_ERRORS; i++)
+ vlib_zero_simple_counter (&ipsec_sa_err_counters[i], sai);
}
void
@@ -436,13 +644,11 @@ ipsec_sa_walk (ipsec_sa_walk_cb_t cb, void *ctx)
{
ipsec_sa_t *sa;
- /* *INDENT-OFF* */
pool_foreach (sa, ipsec_sa_pool)
{
if (WALK_CONTINUE != cb (sa, ctx))
break;
}
- /* *INDENT-ON* */
}
/**
@@ -459,19 +665,18 @@ ipsec_sa_fib_node_get (fib_node_index_t index)
}
static ipsec_sa_t *
-ipsec_sa_from_fib_node (fib_node_t * node)
+ipsec_sa_from_fib_node (fib_node_t *node)
{
ASSERT (FIB_NODE_TYPE_IPSEC_SA == node->fn_type);
- return ((ipsec_sa_t *) (((char *) node) -
- STRUCT_OFFSET_OF (ipsec_sa_t, node)));
-
+ return (
+ (ipsec_sa_t *) (((char *) node) - STRUCT_OFFSET_OF (ipsec_sa_t, node)));
}
/**
* Function definition to inform the FIB node that its last lock has gone.
*/
static void
-ipsec_sa_last_lock_gone (fib_node_t * node)
+ipsec_sa_last_lock_gone (fib_node_t *node)
{
/*
* The ipsec SA is a root of the graph. As such
@@ -484,7 +689,7 @@ ipsec_sa_last_lock_gone (fib_node_t * node)
* Function definition to backwalk a FIB node
*/
static fib_node_back_walk_rc_t
-ipsec_sa_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx)
+ipsec_sa_back_walk (fib_node_t *node, fib_node_back_walk_ctx_t *ctx)
{
ipsec_sa_stack (ipsec_sa_from_fib_node (node));
@@ -501,16 +706,24 @@ const static fib_node_vft_t ipsec_sa_vft = {
.fnv_back_walk = ipsec_sa_back_walk,
};
-/* force inclusion from application's main.c */
+/* Init per-SA error counters and node type */
clib_error_t *
-ipsec_sa_interface_init (vlib_main_t * vm)
+ipsec_sa_init (vlib_main_t *vm)
{
fib_node_register_type (FIB_NODE_TYPE_IPSEC_SA, &ipsec_sa_vft);
- return 0;
+#define _(index, val, err, desc) \
+ ipsec_sa_err_counters[index].name = \
+ (char *) format (0, "SA-" #err "%c", 0); \
+ ipsec_sa_err_counters[index].stat_segment_name = \
+ (char *) format (0, "/net/ipsec/sa/err/" #err "%c", 0); \
+ ipsec_sa_err_counters[index].counters = 0;
+ foreach_ipsec_sa_err
+#undef _
+ return 0;
}
-VLIB_INIT_FUNCTION (ipsec_sa_interface_init);
+VLIB_INIT_FUNCTION (ipsec_sa_init);
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ipsec/ipsec_sa.h b/src/vnet/ipsec/ipsec_sa.h
index 2cc64e19546..4f73f1eab0f 100644
--- a/src/vnet/ipsec/ipsec_sa.h
+++ b/src/vnet/ipsec/ipsec_sa.h
@@ -16,24 +16,33 @@
#define __IPSEC_SPD_SA_H__
#include <vlib/vlib.h>
+#include <vppinfra/pcg.h>
#include <vnet/crypto/crypto.h>
#include <vnet/ip/ip.h>
#include <vnet/fib/fib_node.h>
#include <vnet/tunnel/tunnel.h>
-#define foreach_ipsec_crypto_alg \
- _ (0, NONE, "none") \
- _ (1, AES_CBC_128, "aes-cbc-128") \
- _ (2, AES_CBC_192, "aes-cbc-192") \
- _ (3, AES_CBC_256, "aes-cbc-256") \
- _ (4, AES_CTR_128, "aes-ctr-128") \
- _ (5, AES_CTR_192, "aes-ctr-192") \
- _ (6, AES_CTR_256, "aes-ctr-256") \
- _ (7, AES_GCM_128, "aes-gcm-128") \
- _ (8, AES_GCM_192, "aes-gcm-192") \
- _ (9, AES_GCM_256, "aes-gcm-256") \
- _ (10, DES_CBC, "des-cbc") \
- _ (11, 3DES_CBC, "3des-cbc")
+#define ESP_MAX_ICV_SIZE (32)
+#define ESP_MAX_IV_SIZE (16)
+#define ESP_MAX_BLOCK_SIZE (16)
+
+#define foreach_ipsec_crypto_alg \
+ _ (0, NONE, "none") \
+ _ (1, AES_CBC_128, "aes-cbc-128") \
+ _ (2, AES_CBC_192, "aes-cbc-192") \
+ _ (3, AES_CBC_256, "aes-cbc-256") \
+ _ (4, AES_CTR_128, "aes-ctr-128") \
+ _ (5, AES_CTR_192, "aes-ctr-192") \
+ _ (6, AES_CTR_256, "aes-ctr-256") \
+ _ (7, AES_GCM_128, "aes-gcm-128") \
+ _ (8, AES_GCM_192, "aes-gcm-192") \
+ _ (9, AES_GCM_256, "aes-gcm-256") \
+ _ (10, DES_CBC, "des-cbc") \
+ _ (11, 3DES_CBC, "3des-cbc") \
+ _ (12, CHACHA20_POLY1305, "chacha20-poly1305") \
+ _ (13, AES_NULL_GMAC_128, "aes-null-gmac-128") \
+ _ (14, AES_NULL_GMAC_192, "aes-null-gmac-192") \
+ _ (15, AES_NULL_GMAC_256, "aes-null-gmac-256")
typedef enum
{
@@ -43,6 +52,11 @@ typedef enum
IPSEC_CRYPTO_N_ALG,
} __clib_packed ipsec_crypto_alg_t;
+#define IPSEC_CRYPTO_ALG_IS_NULL_GMAC(_alg) \
+ ((_alg == IPSEC_CRYPTO_ALG_AES_NULL_GMAC_128) || \
+ (_alg == IPSEC_CRYPTO_ALG_AES_NULL_GMAC_192) || \
+ (_alg == IPSEC_CRYPTO_ALG_AES_NULL_GMAC_256))
+
#define IPSEC_CRYPTO_ALG_IS_GCM(_alg) \
(((_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) || \
(_alg == IPSEC_CRYPTO_ALG_AES_GCM_192) || \
@@ -53,6 +67,9 @@ typedef enum
(_alg == IPSEC_CRYPTO_ALG_AES_CTR_192) || \
(_alg == IPSEC_CRYPTO_ALG_AES_CTR_256)))
+#define IPSEC_CRYPTO_ALG_CTR_AEAD_OTHERS(_alg) \
+ (_alg == IPSEC_CRYPTO_ALG_CHACHA20_POLY1305)
+
#define foreach_ipsec_integ_alg \
_ (0, NONE, "none") \
_ (1, MD5_96, "md5-96") /* RFC2403 */ \
@@ -102,7 +119,10 @@ typedef struct ipsec_key_t_
_ (64, IS_INBOUND, "inbound") \
_ (128, IS_AEAD, "aead") \
_ (256, IS_CTR, "ctr") \
- _ (512, IS_ASYNC, "async")
+ _ (512, IS_ASYNC, "async") \
+ _ (1024, NO_ALGO_NO_DROP, "no-algo-no-drop") \
+ _ (2048, IS_NULL_GMAC, "null-gmac") \
+ _ (4096, ANTI_REPLAY_HUGE, "anti-replay-huge")
typedef enum ipsec_sad_flags_t_
{
@@ -113,51 +133,64 @@ typedef enum ipsec_sad_flags_t_
STATIC_ASSERT (sizeof (ipsec_sa_flags_t) == 2, "IPSEC SA flags != 2 byte");
+#define foreach_ipsec_sa_err \
+ _ (0, LOST, lost, "packets lost") \
+ _ (1, HANDOFF, handoff, "hand-off") \
+ _ (2, INTEG_ERROR, integ_error, "Integrity check failed") \
+ _ (3, DECRYPTION_FAILED, decryption_failed, "Decryption failed") \
+ _ (4, CRYPTO_ENGINE_ERROR, crypto_engine_error, \
+ "crypto engine error (dropped)") \
+ _ (5, REPLAY, replay, "SA replayed packet") \
+ _ (6, RUNT, runt, "undersized packet") \
+ _ (7, NO_BUFFERS, no_buffers, "no buffers (dropped)") \
+ _ (8, OVERSIZED_HEADER, oversized_header, \
+ "buffer with oversized header (dropped)") \
+ _ (9, NO_TAIL_SPACE, no_tail_space, \
+ "no enough buffer tail space (dropped)") \
+ _ (10, TUN_NO_PROTO, tun_no_proto, "no tunnel protocol") \
+ _ (11, UNSUP_PAYLOAD, unsup_payload, "unsupported payload") \
+ _ (12, SEQ_CYCLED, seq_cycled, "sequence number cycled (dropped)") \
+ _ (13, CRYPTO_QUEUE_FULL, crypto_queue_full, "crypto queue full (dropped)") \
+ _ (14, NO_ENCRYPTION, no_encryption, "no Encrypting SA (dropped)") \
+ _ (15, DROP_FRAGMENTS, drop_fragments, "IP fragments drop")
+
+typedef enum
+{
+#define _(v, f, s, d) IPSEC_SA_ERROR_##f = v,
+ foreach_ipsec_sa_err
+#undef _
+ IPSEC_SA_N_ERRORS,
+} __clib_packed ipsec_sa_err_t;
+
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- /* flags */
- ipsec_sa_flags_t flags;
-
- u8 crypto_iv_size;
- u8 esp_block_align;
- u8 integ_icv_size;
-
- u8 __pad1[3];
-
- u32 thread_index;
+ clib_pcg64i_random_t iv_prng;
- u32 spi;
- u32 seq;
- u32 seq_hi;
- u64 replay_window;
- u64 ctr_iv_counter;
+ union
+ {
+ u64 replay_window;
+ clib_bitmap_t *replay_window_huge;
+ };
dpo_id_t dpo;
vnet_crypto_key_index_t crypto_key_index;
vnet_crypto_key_index_t integ_key_index;
- /* Union data shared by sync and async ops, updated when mode is
- * changed. */
- union
- {
- struct
- {
- vnet_crypto_op_id_t crypto_enc_op_id:16;
- vnet_crypto_op_id_t crypto_dec_op_id:16;
- vnet_crypto_op_id_t integ_op_id:16;
- };
+ u32 spi;
+ u32 seq;
+ u32 seq_hi;
- struct
- {
- vnet_crypto_async_op_id_t crypto_async_enc_op_id:16;
- vnet_crypto_async_op_id_t crypto_async_dec_op_id:16;
- vnet_crypto_key_index_t linked_key_index;
- };
+ u16 crypto_enc_op_id;
+ u16 crypto_dec_op_id;
+ u16 integ_op_id;
+ ipsec_sa_flags_t flags;
+ u16 thread_index;
- u64 crypto_op_data;
- };
+ u16 integ_icv_size : 6;
+ u16 crypto_iv_size : 5;
+ u16 esp_block_align : 5;
CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
@@ -179,30 +212,7 @@ typedef struct
CLIB_CACHE_LINE_ALIGN_MARK (cacheline2);
/* Elements with u64 size multiples */
- union
- {
- struct
- {
- vnet_crypto_op_id_t crypto_enc_op_id:16;
- vnet_crypto_op_id_t crypto_dec_op_id:16;
- vnet_crypto_op_id_t integ_op_id:16;
- };
- u64 data;
- } sync_op_data;
-
- union
- {
- struct
- {
- vnet_crypto_async_op_id_t crypto_async_enc_op_id:16;
- vnet_crypto_async_op_id_t crypto_async_dec_op_id:16;
- vnet_crypto_key_index_t linked_key_index;
- };
- u64 data;
- } async_op_data;
-
tunnel_t tunnel;
-
fib_node_t node;
/* elements with u32 size */
@@ -210,6 +220,16 @@ typedef struct
u32 stat_index;
vnet_crypto_alg_t integ_calg;
vnet_crypto_alg_t crypto_calg;
+ u32 crypto_sync_key_index;
+ u32 integ_sync_key_index;
+ u32 crypto_async_key_index;
+
+ /* elements with u16 size */
+ u16 crypto_sync_enc_op_id;
+ u16 crypto_sync_dec_op_id;
+ u16 integ_sync_op_id;
+ u16 crypto_async_enc_op_id;
+ u16 crypto_async_dec_op_id;
/* else u8 packed */
ipsec_crypto_alg_t crypto_alg;
@@ -219,6 +239,10 @@ typedef struct
ipsec_key_t crypto_key;
} ipsec_sa_t;
+STATIC_ASSERT (VNET_CRYPTO_N_OP_IDS < (1 << 16), "crypto ops overflow");
+STATIC_ASSERT (ESP_MAX_ICV_SIZE < (1 << 6), "integer icv overflow");
+STATIC_ASSERT (ESP_MAX_IV_SIZE < (1 << 5), "esp iv overflow");
+STATIC_ASSERT (ESP_MAX_BLOCK_SIZE < (1 << 5), "esp alignment overflow");
STATIC_ASSERT_OFFSET_OF (ipsec_sa_t, cacheline1, CLIB_CACHE_LINE_BYTES);
STATIC_ASSERT_OFFSET_OF (ipsec_sa_t, cacheline2, 2 * CLIB_CACHE_LINE_BYTES);
@@ -235,90 +259,149 @@ STATIC_ASSERT (STRUCT_OFFSET_OF (vnet_buffer_opaque_t, ipsec.sad_index) ==
STRUCT_OFFSET_OF (vnet_buffer_opaque_t, ip.save_protocol),
"IPSec data is overlapping with IP data");
-#define _(a,v,s) \
- always_inline int \
- ipsec_sa_is_set_##v (const ipsec_sa_t *sa) { \
- return (sa->flags & IPSEC_SA_FLAG_##v); \
+#define _(a, v, s) \
+ always_inline bool ipsec_sa_is_set_##v (const ipsec_sa_t *sa) \
+ { \
+ return (sa->flags & IPSEC_SA_FLAG_##v); \
}
foreach_ipsec_sa_flags
#undef _
-#define _(a,v,s) \
- always_inline int \
- ipsec_sa_set_##v (ipsec_sa_t *sa) { \
- return (sa->flags |= IPSEC_SA_FLAG_##v); \
+#define _(a, v, s) \
+ always_inline void ipsec_sa_set_##v (ipsec_sa_t *sa) \
+ { \
+ sa->flags |= IPSEC_SA_FLAG_##v; \
}
foreach_ipsec_sa_flags
#undef _
-#define _(a,v,s) \
- always_inline int \
- ipsec_sa_unset_##v (ipsec_sa_t *sa) { \
- return (sa->flags &= ~IPSEC_SA_FLAG_##v); \
+#define _(a, v, s) \
+ always_inline int ipsec_sa_unset_##v (ipsec_sa_t *sa) \
+ { \
+ return (sa->flags &= ~IPSEC_SA_FLAG_##v); \
}
- foreach_ipsec_sa_flags
+ foreach_ipsec_sa_flags
#undef _
-/**
- * @brief
- * SA packet & bytes counters
- */
-extern vlib_combined_counter_main_t ipsec_sa_counters;
-extern vlib_simple_counter_main_t ipsec_sa_lost_counters;
-
-extern void ipsec_mk_key (ipsec_key_t * key, const u8 * data, u8 len);
-
-extern int
-ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
- ipsec_crypto_alg_t crypto_alg, const ipsec_key_t *ck,
- ipsec_integ_alg_t integ_alg, const ipsec_key_t *ik,
- ipsec_sa_flags_t flags, u32 salt, u16 src_port,
- u16 dst_port, const tunnel_t *tun, u32 *sa_out_index);
+ /**
+ * @brief
+ * SA packet & bytes counters
+ */
+ extern vlib_combined_counter_main_t ipsec_sa_counters;
+extern vlib_simple_counter_main_t ipsec_sa_err_counters[IPSEC_SA_N_ERRORS];
+
+extern void ipsec_mk_key (ipsec_key_t *key, const u8 *data, u8 len);
+
+extern int ipsec_sa_update (u32 id, u16 src_port, u16 dst_port,
+ const tunnel_t *tun, bool is_tun);
+extern int ipsec_sa_add_and_lock (
+ u32 id, u32 spi, ipsec_protocol_t proto, ipsec_crypto_alg_t crypto_alg,
+ const ipsec_key_t *ck, ipsec_integ_alg_t integ_alg, const ipsec_key_t *ik,
+ ipsec_sa_flags_t flags, u32 salt, u16 src_port, u16 dst_port,
+ u32 anti_replay_window_size, const tunnel_t *tun, u32 *sa_out_index);
+extern int ipsec_sa_bind (u32 id, u32 worker, bool bind);
extern index_t ipsec_sa_find_and_lock (u32 id);
extern int ipsec_sa_unlock_id (u32 id);
extern void ipsec_sa_unlock (index_t sai);
extern void ipsec_sa_lock (index_t sai);
extern void ipsec_sa_clear (index_t sai);
-extern void ipsec_sa_set_crypto_alg (ipsec_sa_t * sa,
+extern void ipsec_sa_set_crypto_alg (ipsec_sa_t *sa,
ipsec_crypto_alg_t crypto_alg);
-extern void ipsec_sa_set_integ_alg (ipsec_sa_t * sa,
+extern void ipsec_sa_set_integ_alg (ipsec_sa_t *sa,
ipsec_integ_alg_t integ_alg);
+extern void ipsec_sa_set_async_mode (ipsec_sa_t *sa, int is_enabled);
-typedef walk_rc_t (*ipsec_sa_walk_cb_t) (ipsec_sa_t * sa, void *ctx);
+typedef walk_rc_t (*ipsec_sa_walk_cb_t) (ipsec_sa_t *sa, void *ctx);
extern void ipsec_sa_walk (ipsec_sa_walk_cb_t cd, void *ctx);
extern u8 *format_ipsec_replay_window (u8 *s, va_list *args);
-extern u8 *format_ipsec_crypto_alg (u8 * s, va_list * args);
-extern u8 *format_ipsec_integ_alg (u8 * s, va_list * args);
-extern u8 *format_ipsec_sa (u8 * s, va_list * args);
-extern u8 *format_ipsec_key (u8 * s, va_list * args);
-extern uword unformat_ipsec_crypto_alg (unformat_input_t * input,
- va_list * args);
-extern uword unformat_ipsec_integ_alg (unformat_input_t * input,
- va_list * args);
-extern uword unformat_ipsec_key (unformat_input_t * input, va_list * args);
-
-#define IPSEC_UDP_PORT_NONE ((u16)~0)
+extern u8 *format_ipsec_crypto_alg (u8 *s, va_list *args);
+extern u8 *format_ipsec_integ_alg (u8 *s, va_list *args);
+extern u8 *format_ipsec_sa (u8 *s, va_list *args);
+extern u8 *format_ipsec_key (u8 *s, va_list *args);
+extern uword unformat_ipsec_crypto_alg (unformat_input_t *input,
+ va_list *args);
+extern uword unformat_ipsec_integ_alg (unformat_input_t *input, va_list *args);
+extern uword unformat_ipsec_key (unformat_input_t *input, va_list *args);
+
+#define IPSEC_UDP_PORT_NONE ((u16) ~0)
/*
* Anti Replay definitions
*/
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (64)
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_MAX_INDEX (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE-1)
+#define IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE(_sa) \
+ (u32) (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (_sa)) ? \
+ clib_bitmap_bytes (_sa->replay_window_huge) * 8 : \
+ BITS (_sa->replay_window))
+
+#define IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN(_sa, _is_huge) \
+ (u32) (_is_huge ? clib_bitmap_bytes (_sa->replay_window_huge) * 8 : \
+ BITS (_sa->replay_window))
+
+#define IPSEC_SA_ANTI_REPLAY_WINDOW_N_SEEN(_sa) \
+ (u64) (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (_sa)) ? \
+ clib_bitmap_count_set_bits (_sa->replay_window_huge) : \
+ count_set_bits (_sa->replay_window))
+
+#define IPSEC_SA_ANTI_REPLAY_WINDOW_N_SEEN_KNOWN_WIN(_sa, _is_huge) \
+ (u64) (_is_huge ? clib_bitmap_count_set_bits (_sa->replay_window_huge) : \
+ count_set_bits (_sa->replay_window))
+
+#define IPSEC_SA_ANTI_REPLAY_WINDOW_MAX_INDEX(_sa) \
+ (u32) (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (_sa) - 1)
+
+#define IPSEC_SA_ANTI_REPLAY_WINDOW_MAX_INDEX_KNOWN_WIN(_sa, _is_huge) \
+ (u32) (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (_sa, _is_huge) - 1)
/*
* sequence number less than the lower bound are outside of the window
* From RFC4303 Appendix A:
* Bl = Tl - W + 1
*/
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND(_tl) (_tl - IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE + 1)
+#define IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND(_sa) \
+ (u32) (_sa->seq - IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (_sa) + 1)
+
+#define IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND_KNOWN_WIN(_sa, _is_huge) \
+ (u32) (_sa->seq - \
+ IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (_sa, _is_huge) + 1)
+
+always_inline u64
+ipsec_sa_anti_replay_get_64b_window (const ipsec_sa_t *sa)
+{
+ if (!ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa))
+ return sa->replay_window;
+
+ u64 w;
+ u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (sa);
+ u32 tl_win_index = sa->seq & (window_size - 1);
+
+ if (PREDICT_TRUE (tl_win_index >= 63))
+ return clib_bitmap_get_multiple (sa->replay_window_huge, tl_win_index - 63,
+ 64);
+
+ w = clib_bitmap_get_multiple_no_check (sa->replay_window_huge, 0,
+ tl_win_index + 1)
+ << (63 - tl_win_index);
+ w |= clib_bitmap_get_multiple_no_check (sa->replay_window_huge,
+ window_size - 63 + tl_win_index,
+ 63 - tl_win_index);
+
+ return w;
+}
always_inline int
-ipsec_sa_anti_replay_check (const ipsec_sa_t *sa, u32 seq)
+ipsec_sa_anti_replay_check (const ipsec_sa_t *sa, u32 seq, bool ar_huge)
{
- if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) &&
- sa->replay_window & (1ULL << (sa->seq - seq)))
- return 1;
+ u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (sa, ar_huge);
+
+ /* we assume that the packet is in the window.
+ * if the packet falls left (sa->seq - seq >= window size),
+ * the result is wrong */
+
+ if (ar_huge)
+ return clib_bitmap_get (sa->replay_window_huge, seq & (window_size - 1));
else
- return 0;
+ return (sa->replay_window >> (window_size + seq - sa->seq - 1)) & 1;
+
+ return 0;
}
/*
@@ -338,10 +421,14 @@ ipsec_sa_anti_replay_check (const ipsec_sa_t *sa, u32 seq)
always_inline int
ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
u32 hi_seq_used, bool post_decrypt,
- u32 *hi_seq_req)
+ u32 *hi_seq_req, bool ar_huge)
{
ASSERT ((post_decrypt == false) == (hi_seq_req != 0));
+ u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (sa, ar_huge);
+ u32 window_lower_bound =
+ IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND_KNOWN_WIN (sa, ar_huge);
+
if (!ipsec_sa_is_set_USE_ESN (sa))
{
if (hi_seq_req)
@@ -354,14 +441,11 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
if (PREDICT_TRUE (seq > sa->seq))
return 0;
- u32 diff = sa->seq - seq;
-
- if (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE > diff)
- return ((sa->replay_window & (1ULL << diff)) ? 1 : 0);
- else
+ /* does the packet fall out on the left of the window */
+ if (sa->seq >= seq + window_size)
return 1;
- return 0;
+ return ipsec_sa_anti_replay_check (sa, seq, ar_huge);
}
if (!ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
@@ -401,14 +485,15 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
*/
return 0;
}
- if (PREDICT_TRUE (sa->seq >= (IPSEC_SA_ANTI_REPLAY_WINDOW_MAX_INDEX)))
+
+ if (PREDICT_TRUE (sa->seq >= window_size - 1))
{
/*
- * the last sequence number VPP recieved is more than one
+ * the last sequence number VPP received is more than one
* window size greater than zero.
* Case A from RFC4303 Appendix A.
*/
- if (seq < IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND (sa->seq))
+ if (seq < window_lower_bound)
{
/*
* the received sequence number is lower than the lower bound
@@ -420,7 +505,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
{
if (hi_seq_used == sa->seq_hi)
/* the high sequence number used to succesfully decrypt this
- * packet is the same as the last-sequnence number of the SA.
+ * packet is the same as the last-sequence number of the SA.
* that means this packet did not cause a wrap.
* this packet is thus out of window and should be dropped */
return 1;
@@ -432,8 +517,8 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
}
else
{
- /* pre-decrypt it might be the might that casues a wrap, we
- * need to decrpyt to find out */
+ /* pre-decrypt it might be the packet that causes a wrap, we
+ * need to decrypt it to find out */
if (hi_seq_req)
*hi_seq_req = sa->seq_hi + 1;
return 0;
@@ -442,17 +527,17 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
else
{
/*
- * the recieved sequence number greater than the low
+ * the received sequence number greater than the low
* end of the window.
*/
if (hi_seq_req)
*hi_seq_req = sa->seq_hi;
if (seq <= sa->seq)
/*
- * The recieved seq number is within bounds of the window
+ * The received seq number is within bounds of the window
* check if it's a duplicate
*/
- return (ipsec_sa_anti_replay_check (sa, seq));
+ return ipsec_sa_anti_replay_check (sa, seq, ar_huge);
else
/*
* The received sequence number is greater than the window
@@ -465,14 +550,14 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
else
{
/*
- * the last sequence number VPP recieved is within one window
+ * the last sequence number VPP received is within one window
* size of zero, i.e. 0 < TL < WINDOW_SIZE, the lower bound is thus a
* large sequence number.
- * Note that the check below uses unsiged integer arthimetic, so the
+ * Note that the check below uses unsigned integer arithmetic, so the
* RHS will be a larger number.
* Case B from RFC4303 Appendix A.
*/
- if (seq < IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND (sa->seq))
+ if (seq < window_lower_bound)
{
/*
* the sequence number is less than the lower bound.
@@ -485,7 +570,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
*/
if (hi_seq_req)
*hi_seq_req = sa->seq_hi;
- return (ipsec_sa_anti_replay_check (sa, seq));
+ return ipsec_sa_anti_replay_check (sa, seq, ar_huge);
}
else
{
@@ -493,7 +578,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
* the packet is less the window lower bound or greater than
* the higher bound, depending on how you look at it...
* We're assuming, given that the last sequence number received,
- * TL < WINDOW_SIZE, that a largeer seq num is more likely to be
+ * TL < WINDOW_SIZE, that a larger seq num is more likely to be
* a packet that moves the window forward, than a packet that has
* wrapped the high sequence again. If it were the latter then
* we've lost close to 2^32 packets.
@@ -506,15 +591,14 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
else
{
/*
- * the packet seq number is between the lower bound (a large nubmer)
- * and MAX_SEQ_NUM. This is in the window since the window upper bound
- * tl > 0.
- * However, since TL is the other side of 0 to the received
- * packet, the SA has moved on to a higher sequence number.
+ * the packet seq number is between the lower bound (a large number)
+ * and MAX_SEQ_NUM. This is in the window since the window upper
+ * bound tl > 0. However, since TL is the other side of 0 to the
+ * received packet, the SA has moved on to a higher sequence number.
*/
if (hi_seq_req)
*hi_seq_req = sa->seq_hi - 1;
- return (ipsec_sa_anti_replay_check (sa, seq));
+ return ipsec_sa_anti_replay_check (sa, seq, ar_huge);
}
}
@@ -524,45 +608,149 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
}
always_inline u32
-ipsec_sa_anti_replay_window_shift (ipsec_sa_t *sa, u32 inc)
+ipsec_sa_anti_replay_window_shift (ipsec_sa_t *sa, u32 inc, bool ar_huge)
{
u32 n_lost = 0;
+ u32 seen = 0;
+ u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (sa, ar_huge);
- if (inc < IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE)
+ if (inc < window_size)
{
- if (sa->seq > IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE)
+ if (ar_huge)
+ {
+ /* the number of packets we saw in this section of the window */
+ clib_bitmap_t *window = sa->replay_window_huge;
+ u32 window_lower_bound = (sa->seq + 1) & (window_size - 1);
+ u32 window_next_lower_bound =
+ (window_lower_bound + inc) & (window_size - 1);
+
+ uword i_block, i_word_start, i_word_end, full_words;
+ uword n_blocks = window_size >> log2_uword_bits;
+ uword mask;
+
+ i_block = window_lower_bound >> log2_uword_bits;
+
+ i_word_start = window_lower_bound & (uword_bits - 1);
+ i_word_end = window_next_lower_bound & (uword_bits - 1);
+
+ /* We stay in the same word */
+ if (i_word_start + inc <= uword_bits)
+ {
+ mask = pow2_mask (inc) << i_word_start;
+ seen += count_set_bits (window[i_block] & mask);
+ window[i_block] &= ~mask;
+ }
+ else
+ {
+ full_words = (inc + i_word_start - uword_bits - i_word_end) >>
+ log2_uword_bits;
+
+ /* count set bits in the first word */
+ mask = (uword) ~0 << i_word_start;
+ seen += count_set_bits (window[i_block] & mask);
+ window[i_block] &= ~mask;
+ i_block = (i_block + 1) & (n_blocks - 1);
+
+ /* count set bits in the next full words */
+ /* even if the last word need to be fully counted, we treat it
+ * apart */
+ while (full_words >= 8)
+ {
+ if (full_words >= 16)
+ {
+ /* prefect the next 8 blocks (64 bytes) */
+ clib_prefetch_store (
+ &window[(i_block + 8) & (n_blocks - 1)]);
+ }
+
+ seen += count_set_bits (window[i_block]);
+ seen +=
+ count_set_bits (window[(i_block + 1) & (n_blocks - 1)]);
+ seen +=
+ count_set_bits (window[(i_block + 2) & (n_blocks - 1)]);
+ seen +=
+ count_set_bits (window[(i_block + 3) & (n_blocks - 1)]);
+ seen +=
+ count_set_bits (window[(i_block + 4) & (n_blocks - 1)]);
+ seen +=
+ count_set_bits (window[(i_block + 5) & (n_blocks - 1)]);
+ seen +=
+ count_set_bits (window[(i_block + 6) & (n_blocks - 1)]);
+ seen +=
+ count_set_bits (window[(i_block + 7) & (n_blocks - 1)]);
+ window[i_block] = 0;
+ window[(i_block + 1) & (n_blocks - 1)] = 0;
+ window[(i_block + 2) & (n_blocks - 1)] = 0;
+ window[(i_block + 3) & (n_blocks - 1)] = 0;
+ window[(i_block + 4) & (n_blocks - 1)] = 0;
+ window[(i_block + 5) & (n_blocks - 1)] = 0;
+ window[(i_block + 6) & (n_blocks - 1)] = 0;
+ window[(i_block + 7) & (n_blocks - 1)] = 0;
+
+ i_block = (i_block + 8) & (n_blocks - 1);
+ full_words -= 8;
+ }
+ while (full_words > 0)
+ {
+ // last word is treated after the loop
+ seen += count_set_bits (window[i_block]);
+ window[i_block] = 0;
+ i_block = (i_block + 1) & (n_blocks - 1);
+ full_words--;
+ }
+
+ /* the last word */
+ mask = pow2_mask (i_word_end);
+ seen += count_set_bits (window[i_block] & mask);
+ window[i_block] &= ~mask;
+ }
+
+ clib_bitmap_set_no_check (window,
+ (sa->seq + inc) & (window_size - 1), 1);
+ }
+ else
{
/*
* count how many holes there are in the portion
* of the window that we will right shift of the end
* as a result of this increments
*/
- u64 mask = (((u64) 1 << inc) - 1) << (BITS (u64) - inc);
- u64 old = sa->replay_window & mask;
+ u64 old = sa->replay_window & pow2_mask (inc);
/* the number of packets we saw in this section of the window */
- u64 seen = count_set_bits (old);
-
- /*
- * the number we missed is the size of the window section
- * minus the number we saw.
- */
- n_lost = inc - seen;
+ seen = count_set_bits (old);
+ sa->replay_window =
+ ((sa->replay_window) >> inc) | (1ULL << (window_size - 1));
}
- sa->replay_window = ((sa->replay_window) << inc) | 1;
+
+ /*
+ * the number we missed is the size of the window section
+ * minus the number we saw.
+ */
+ n_lost = inc - seen;
}
else
{
/* holes in the replay window are lost packets */
- n_lost = BITS (u64) - count_set_bits (sa->replay_window);
+ n_lost = window_size -
+ IPSEC_SA_ANTI_REPLAY_WINDOW_N_SEEN_KNOWN_WIN (sa, ar_huge);
/* any sequence numbers that now fall outside the window
* are forever lost */
- n_lost += inc - IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE;
+ n_lost += inc - window_size;
- sa->replay_window = 1;
+ if (PREDICT_FALSE (ar_huge))
+ {
+ clib_bitmap_zero (sa->replay_window_huge);
+ clib_bitmap_set_no_check (sa->replay_window_huge,
+ (sa->seq + inc) & (window_size - 1), 1);
+ }
+ else
+ {
+ sa->replay_window = 1ULL << (window_size - 1);
+ }
}
- return (n_lost);
+ return n_lost;
}
/*
@@ -576,9 +764,10 @@ ipsec_sa_anti_replay_window_shift (ipsec_sa_t *sa, u32 inc)
*/
always_inline u64
ipsec_sa_anti_replay_advance (ipsec_sa_t *sa, u32 thread_index, u32 seq,
- u32 hi_seq)
+ u32 hi_seq, bool ar_huge)
{
u64 n_lost = 0;
+ u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (sa, ar_huge);
u32 pos;
if (ipsec_sa_is_set_USE_ESN (sa))
@@ -588,25 +777,33 @@ ipsec_sa_anti_replay_advance (ipsec_sa_t *sa, u32 thread_index, u32 seq,
if (wrap == 0 && seq > sa->seq)
{
pos = seq - sa->seq;
- n_lost = ipsec_sa_anti_replay_window_shift (sa, pos);
+ n_lost = ipsec_sa_anti_replay_window_shift (sa, pos, ar_huge);
sa->seq = seq;
}
else if (wrap > 0)
{
- pos = ~seq + sa->seq + 1;
- n_lost = ipsec_sa_anti_replay_window_shift (sa, pos);
+ pos = seq + ~sa->seq + 1;
+ n_lost = ipsec_sa_anti_replay_window_shift (sa, pos, ar_huge);
sa->seq = seq;
sa->seq_hi = hi_seq;
}
else if (wrap < 0)
{
pos = ~seq + sa->seq + 1;
- sa->replay_window |= (1ULL << pos);
+ if (ar_huge)
+ clib_bitmap_set_no_check (sa->replay_window_huge,
+ seq & (window_size - 1), 1);
+ else
+ sa->replay_window |= (1ULL << (window_size - 1 - pos));
}
else
{
pos = sa->seq - seq;
- sa->replay_window |= (1ULL << pos);
+ if (ar_huge)
+ clib_bitmap_set_no_check (sa->replay_window_huge,
+ seq & (window_size - 1), 1);
+ else
+ sa->replay_window |= (1ULL << (window_size - 1 - pos));
}
}
else
@@ -614,13 +811,17 @@ ipsec_sa_anti_replay_advance (ipsec_sa_t *sa, u32 thread_index, u32 seq,
if (seq > sa->seq)
{
pos = seq - sa->seq;
- n_lost = ipsec_sa_anti_replay_window_shift (sa, pos);
+ n_lost = ipsec_sa_anti_replay_window_shift (sa, pos, ar_huge);
sa->seq = seq;
}
else
{
pos = sa->seq - seq;
- sa->replay_window |= (1ULL << pos);
+ if (ar_huge)
+ clib_bitmap_set_no_check (sa->replay_window_huge,
+ seq & (window_size - 1), 1);
+ else
+ sa->replay_window |= (1ULL << (window_size - 1 - pos));
}
}
@@ -632,8 +833,8 @@ ipsec_sa_anti_replay_advance (ipsec_sa_t *sa, u32 thread_index, u32 seq,
* Makes choice for thread_id should be assigned.
* if input ~0, gets random worker_id based on unix_time_now_nsec
*/
-always_inline u32
-ipsec_sa_assign_thread (u32 thread_id)
+always_inline u16
+ipsec_sa_assign_thread (u16 thread_id)
{
return ((thread_id) ? thread_id
: (unix_time_now_nsec () % vlib_num_workers ()) + 1);
diff --git a/src/vnet/ipsec/ipsec_spd.c b/src/vnet/ipsec/ipsec_spd.c
index 4e8017c35ff..7b9a0aea8ed 100644
--- a/src/vnet/ipsec/ipsec_spd.c
+++ b/src/vnet/ipsec/ipsec_spd.c
@@ -21,6 +21,7 @@ ipsec_add_del_spd (vlib_main_t * vm, u32 spd_id, int is_add)
{
ipsec_main_t *im = &ipsec_main;
ipsec_spd_t *spd = 0;
+ ipsec_spd_fp_t *fp_spd = 0;
uword *p;
u32 spd_index, k, v;
@@ -36,25 +37,160 @@ ipsec_add_del_spd (vlib_main_t * vm, u32 spd_id, int is_add)
spd = pool_elt_at_index (im->spds, spd_index);
if (!spd)
return VNET_API_ERROR_INVALID_VALUE;
- /* *INDENT-OFF* */
+
hash_foreach (k, v, im->spd_index_by_sw_if_index, ({
if (v == spd_index)
ipsec_set_interface_spd(vm, k, spd_id, 0);
}));
- /* *INDENT-ON* */
hash_unset (im->spd_index_by_spd_id, spd_id);
#define _(s,v) vec_free(spd->policies[IPSEC_SPD_POLICY_##s]);
foreach_ipsec_spd_policy_type
#undef _
- pool_put (im->spds, spd);
+
+ fp_spd = &spd->fp_spd;
+
+ if (im->fp_spd_ipv4_out_is_enabled)
+ {
+ if (fp_spd->ip4_out_lookup_hash_idx != INDEX_INVALID)
+ {
+ clib_bihash_16_8_t *bihash_table =
+ pool_elt_at_index (im->fp_ip4_lookup_hashes_pool,
+ fp_spd->ip4_out_lookup_hash_idx);
+
+ clib_bihash_free_16_8 (bihash_table);
+ vec_free (fp_spd->name4_out);
+ pool_put_index (im->fp_ip4_lookup_hashes_pool,
+ fp_spd->ip4_out_lookup_hash_idx);
+ }
+ }
+
+ if (im->fp_spd_ipv4_in_is_enabled)
+ {
+ if (fp_spd->ip4_in_lookup_hash_idx != INDEX_INVALID)
+ {
+ clib_bihash_16_8_t *bihash_table = pool_elt_at_index (
+ im->fp_ip4_lookup_hashes_pool, fp_spd->ip4_in_lookup_hash_idx);
+
+ clib_bihash_free_16_8 (bihash_table);
+ vec_free (fp_spd->name4_in);
+ pool_put_index (im->fp_ip4_lookup_hashes_pool,
+ fp_spd->ip4_in_lookup_hash_idx);
+ }
+ }
+
+ if (im->fp_spd_ipv6_out_is_enabled)
+ {
+ if (fp_spd->ip6_out_lookup_hash_idx != INDEX_INVALID)
+ {
+ clib_bihash_40_8_t *bihash_table =
+ pool_elt_at_index (im->fp_ip6_lookup_hashes_pool,
+ fp_spd->ip6_out_lookup_hash_idx);
+
+ clib_bihash_free_40_8 (bihash_table);
+ vec_free (fp_spd->name6_out);
+ pool_put_index (im->fp_ip6_lookup_hashes_pool,
+ fp_spd->ip6_out_lookup_hash_idx);
+ }
+ }
+ if (im->fp_spd_ipv6_in_is_enabled)
+ {
+ if (fp_spd->ip6_in_lookup_hash_idx != INDEX_INVALID)
+ {
+ clib_bihash_40_8_t *bihash_table = pool_elt_at_index (
+ im->fp_ip6_lookup_hashes_pool, fp_spd->ip6_in_lookup_hash_idx);
+
+ clib_bihash_free_40_8 (bihash_table);
+ vec_free (fp_spd->name6_in);
+ pool_put_index (im->fp_ip6_lookup_hashes_pool,
+ fp_spd->ip6_in_lookup_hash_idx);
+ }
+ }
+
+ pool_put (im->spds, spd);
}
- else /* create new SPD */
+ else /* create new SPD */
{
pool_get (im->spds, spd);
clib_memset (spd, 0, sizeof (*spd));
spd_index = spd - im->spds;
spd->id = spd_id;
hash_set (im->spd_index_by_spd_id, spd_id, spd_index);
+
+ fp_spd = &spd->fp_spd;
+ fp_spd->ip4_out_lookup_hash_idx = INDEX_INVALID;
+ fp_spd->ip4_in_lookup_hash_idx = INDEX_INVALID;
+ fp_spd->ip6_out_lookup_hash_idx = INDEX_INVALID;
+ fp_spd->ip6_in_lookup_hash_idx = INDEX_INVALID;
+
+ if (im->fp_spd_ipv4_out_is_enabled)
+ {
+ if (pool_elts (im->fp_ip4_lookup_hashes_pool) <
+ pool_max_len (im->fp_ip4_lookup_hashes_pool))
+ {
+ clib_bihash_16_8_t *bihash_table;
+ fp_spd->name4_out = format (0, "spd_%u_fp_ip4_out", spd_id);
+
+ pool_get (im->fp_ip4_lookup_hashes_pool, bihash_table);
+ fp_spd->ip4_out_lookup_hash_idx =
+ bihash_table - im->fp_ip4_lookup_hashes_pool;
+ clib_bihash_init_16_8 (bihash_table, (char *) fp_spd->name4_out,
+ im->fp_lookup_hash_buckets,
+ im->fp_lookup_hash_buckets *
+ IPSEC_FP_IP4_HASH_MEM_PER_BUCKET);
+ }
+ }
+
+ if (im->fp_spd_ipv4_in_is_enabled)
+ {
+ if (pool_elts (im->fp_ip4_lookup_hashes_pool) <
+ pool_max_len (im->fp_ip4_lookup_hashes_pool))
+ {
+ clib_bihash_16_8_t *bihash_table;
+ fp_spd->name4_in = format (0, "spd_%u_fp_ip4_in", spd_id);
+
+ pool_get (im->fp_ip4_lookup_hashes_pool, bihash_table);
+ fp_spd->ip4_in_lookup_hash_idx =
+ bihash_table - im->fp_ip4_lookup_hashes_pool;
+ clib_bihash_init_16_8 (bihash_table, (char *) fp_spd->name4_in,
+ im->fp_lookup_hash_buckets,
+ im->fp_lookup_hash_buckets *
+ IPSEC_FP_IP4_HASH_MEM_PER_BUCKET);
+ }
+ }
+ if (im->fp_spd_ipv6_out_is_enabled)
+ {
+ if (pool_elts (im->fp_ip6_lookup_hashes_pool) <
+ pool_max_len (im->fp_ip6_lookup_hashes_pool))
+ {
+ clib_bihash_40_8_t *bihash_table;
+
+ fp_spd->name6_out = format (0, "spd_%u_fp_ip6_out", spd_id);
+ pool_get (im->fp_ip6_lookup_hashes_pool, bihash_table);
+ fp_spd->ip6_out_lookup_hash_idx =
+ bihash_table - im->fp_ip6_lookup_hashes_pool;
+ clib_bihash_init_40_8 (bihash_table, (char *) fp_spd->name6_out,
+ im->fp_lookup_hash_buckets,
+ im->fp_lookup_hash_buckets *
+ IPSEC_FP_IP6_HASH_MEM_PER_BUCKET);
+ }
+ }
+ if (im->fp_spd_ipv6_in_is_enabled)
+ {
+ if (pool_elts (im->fp_ip6_lookup_hashes_pool) <
+ pool_max_len (im->fp_ip6_lookup_hashes_pool))
+ {
+ clib_bihash_40_8_t *bihash_table;
+
+ fp_spd->name6_in = format (0, "spd_%u_fp_ip6_in", spd_id);
+ pool_get (im->fp_ip6_lookup_hashes_pool, bihash_table);
+ fp_spd->ip6_in_lookup_hash_idx =
+ bihash_table - im->fp_ip6_lookup_hashes_pool;
+ clib_bihash_init_40_8 (bihash_table, (char *) fp_spd->name6_in,
+ im->fp_lookup_hash_buckets,
+ im->fp_lookup_hash_buckets *
+ IPSEC_FP_IP6_HASH_MEM_PER_BUCKET);
+ }
+ }
}
return 0;
}
diff --git a/src/vnet/ipsec/ipsec_spd.h b/src/vnet/ipsec/ipsec_spd.h
index 5bfc6ae56f6..3b1e4b40747 100644
--- a/src/vnet/ipsec/ipsec_spd.h
+++ b/src/vnet/ipsec/ipsec_spd.h
@@ -15,6 +15,8 @@
#ifndef __IPSEC_SPD_H__
#define __IPSEC_SPD_H__
+#include <vppinfra/bihash_40_8.h>
+#include <vppinfra/bihash_16_8.h>
#include <vlib/vlib.h>
#define foreach_ipsec_spd_policy_type \
@@ -40,8 +42,33 @@ typedef enum ipsec_spd_policy_t_
extern u8 *format_ipsec_policy_type (u8 * s, va_list * args);
+typedef struct
+{
+ /* index in the mask types pool */
+ u32 mask_type_idx;
+ /* counts references correspond to given mask type index */
+ u32 refcount;
+} ipsec_fp_mask_id_t;
+
+/**
+ * @brief A fast path Security Policy Database
+ */
+typedef struct
+{
+ ipsec_fp_mask_id_t *fp_mask_ids[IPSEC_SPD_POLICY_N_TYPES];
+ /* names of bihash tables */
+ u8 *name4_out;
+ u8 *name4_in;
+ u8 *name6_out;
+ u8 *name6_in;
+ u32 ip6_out_lookup_hash_idx; /* fp ip6 lookup hash out index in the pool */
+ u32 ip4_out_lookup_hash_idx; /* fp ip4 lookup hash out index in the pool */
+ u32 ip6_in_lookup_hash_idx; /* fp ip6 lookup hash in index in the pool */
+ u32 ip4_in_lookup_hash_idx; /* fp ip4 lookup hash in index in the pool */
+} ipsec_spd_fp_t;
+
/**
- * @brief A Secruity Policy Database
+ * @brief A Security Policy Database
*/
typedef struct
{
@@ -49,6 +76,7 @@ typedef struct
u32 id;
/** vectors for each of the policy types */
u32 *policies[IPSEC_SPD_POLICY_N_TYPES];
+ ipsec_spd_fp_t fp_spd;
} ipsec_spd_t;
/**
@@ -64,7 +92,8 @@ extern int ipsec_set_interface_spd (vlib_main_t * vm,
extern u8 *format_ipsec_spd (u8 * s, va_list * args);
-extern u8 *format_ipsec_spd_flow_cache (u8 *s, va_list *args);
+extern u8 *format_ipsec_out_spd_flow_cache (u8 *s, va_list *args);
+extern u8 *format_ipsec_in_spd_flow_cache (u8 *s, va_list *args);
#endif /* __IPSEC_SPD_H__ */
diff --git a/src/vnet/ipsec/ipsec_spd_fp_lookup.h b/src/vnet/ipsec/ipsec_spd_fp_lookup.h
new file mode 100644
index 00000000000..2bbd7c664f9
--- /dev/null
+++ b/src/vnet/ipsec/ipsec_spd_fp_lookup.h
@@ -0,0 +1,579 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef IPSEC_SPD_FP_LOOKUP_H
+#define IPSEC_SPD_FP_LOOKUP_H
+
+#include <vnet/ipsec/ipsec.h>
+
+static_always_inline int
+single_rule_out_match_5tuple (ipsec_policy_t *policy, ipsec_fp_5tuple_t *match)
+{
+ if (PREDICT_FALSE (policy->is_ipv6 != match->is_ipv6))
+ return (0);
+
+ if (PREDICT_FALSE (policy->protocol != IPSEC_POLICY_PROTOCOL_ANY &&
+ (policy->protocol != match->protocol)))
+ return (0);
+
+ if (!policy->is_ipv6)
+ {
+ if (PREDICT_FALSE (
+ clib_net_to_host_u32 (match->laddr.as_u32) <
+ clib_net_to_host_u32 (policy->laddr.start.ip4.as_u32)))
+ return (0);
+
+ if (PREDICT_FALSE (clib_net_to_host_u32 (match->laddr.as_u32) >
+ clib_net_to_host_u32 (policy->laddr.stop.ip4.as_u32)))
+ return (0);
+
+ if (PREDICT_FALSE (
+ clib_net_to_host_u32 (match->raddr.as_u32) <
+ clib_net_to_host_u32 (policy->raddr.start.ip4.as_u32)))
+ return (0);
+
+ if (PREDICT_FALSE (clib_net_to_host_u32 (match->raddr.as_u32) >
+ clib_net_to_host_u32 (policy->raddr.stop.ip4.as_u32)))
+ return (0);
+ }
+ else
+ {
+
+ if (ip6_address_compare (&match->ip6_laddr, &policy->laddr.start.ip6) <
+ 0)
+ return (0);
+
+ if (ip6_address_compare (&policy->laddr.stop.ip6, &match->ip6_laddr) < 0)
+
+ return (0);
+
+ if (ip6_address_compare (&match->ip6_raddr, &policy->raddr.start.ip6) <
+ 0)
+
+ return (0);
+
+ if (ip6_address_compare (&policy->raddr.stop.ip6, &match->ip6_raddr) < 0)
+
+ return (0);
+ }
+
+ if (PREDICT_FALSE ((match->protocol != IP_PROTOCOL_TCP) &&
+ (match->protocol != IP_PROTOCOL_UDP) &&
+ (match->protocol != IP_PROTOCOL_SCTP)))
+ {
+ return (1);
+ }
+
+ if (match->lport < policy->lport.start)
+ return (0);
+
+ if (match->lport > policy->lport.stop)
+ return (0);
+
+ if (match->rport < policy->rport.start)
+ return (0);
+
+ if (match->rport > policy->rport.stop)
+ return (0);
+
+ return (1);
+}
+
+static_always_inline int
+single_rule_in_match_5tuple (ipsec_policy_t *policy, ipsec_fp_5tuple_t *match)
+{
+
+ u32 da = clib_net_to_host_u32 (match->laddr.as_u32);
+ u32 sa = clib_net_to_host_u32 (match->raddr.as_u32);
+
+ if (policy->policy == IPSEC_POLICY_ACTION_PROTECT)
+ {
+ ipsec_sa_t *s = ipsec_sa_get (policy->sa_index);
+
+ if (match->spi != s->spi)
+ return (0);
+
+ if (ipsec_sa_is_set_IS_TUNNEL (s))
+ {
+ if (da != clib_net_to_host_u32 (s->tunnel.t_dst.ip.ip4.as_u32))
+ return (0);
+
+ if (sa != clib_net_to_host_u32 (s->tunnel.t_src.ip.ip4.as_u32))
+ return (0);
+ }
+ }
+ else
+ {
+ if (sa < clib_net_to_host_u32 (policy->raddr.start.ip4.as_u32))
+ return (0);
+
+ if (sa > clib_net_to_host_u32 (policy->raddr.stop.ip4.as_u32))
+ return (0);
+
+ if (da < clib_net_to_host_u32 (policy->laddr.start.ip4.as_u32))
+ return (0);
+
+ if (da > clib_net_to_host_u32 (policy->laddr.stop.ip4.as_u32))
+ return (0);
+ }
+ return (1);
+}
+
+static_always_inline u32
+ipsec_fp_in_ip6_policy_match_n (void *spd_fp, ipsec_fp_5tuple_t *tuples,
+ ipsec_policy_t **policies, u32 n)
+{
+ u32 last_priority[n];
+ u32 i = 0;
+ u32 counter = 0;
+ ipsec_fp_mask_type_entry_t *mte;
+ ipsec_fp_mask_id_t *mti;
+ ipsec_fp_5tuple_t *match = tuples;
+ ipsec_policy_t *policy;
+ u32 n_left = n;
+ clib_bihash_kv_40_8_t kv;
+ /* result of the lookup */
+ clib_bihash_kv_40_8_t result;
+ ipsec_fp_lookup_value_t *result_val =
+ (ipsec_fp_lookup_value_t *) &result.value;
+ u64 *pkey, *pmatch, *pmask;
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_spd_fp_t *pspd_fp = (ipsec_spd_fp_t *) spd_fp;
+ ipsec_fp_mask_id_t *mask_type_ids = pspd_fp->fp_mask_ids[match->action];
+ clib_bihash_40_8_t *bihash_table = pool_elt_at_index (
+ im->fp_ip6_lookup_hashes_pool, pspd_fp->ip6_in_lookup_hash_idx);
+
+ /* clear the list of matched policies pointers */
+ clib_memset (policies, 0, n * sizeof (*policies));
+ clib_memset (last_priority, 0, n * sizeof (u32));
+ n_left = n;
+ while (n_left)
+ {
+ vec_foreach (mti, mask_type_ids)
+ {
+ mte = im->fp_mask_types + mti->mask_type_idx;
+ if (mte->mask.action == 0)
+ continue;
+
+ pmatch = (u64 *) match->kv_40_8.key;
+ pmask = (u64 *) mte->mask.kv_40_8.key;
+ pkey = (u64 *) kv.key;
+
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey = *pmatch & *pmask;
+
+ int res =
+ clib_bihash_search_inline_2_40_8 (bihash_table, &kv, &result);
+ /* lookup the hash by each packet in the burst for this mask. */
+
+ if (res == 0)
+ {
+ /* There is a hit in the hash table. */
+ /* Find the policy with highest priority. */
+ /* Store the lookup results in a dedicated array. */
+
+ if (vec_len (result_val->fp_policies_ids) > 1)
+ {
+ u32 *policy_id;
+ vec_foreach (policy_id, result_val->fp_policies_ids)
+ {
+ policy = im->policies + *policy_id;
+
+ if (single_rule_in_match_5tuple (policy, match))
+ {
+ if (last_priority[i] < policy->priority)
+ {
+ last_priority[i] = policy->priority;
+ if (policies[i] == 0)
+ counter++;
+ policies[i] = policy;
+ }
+ break;
+ }
+ }
+ }
+ else
+ {
+ u32 *policy_id;
+ ASSERT (vec_len (result_val->fp_policies_ids) == 1);
+ policy_id = result_val->fp_policies_ids;
+ policy = im->policies + *policy_id;
+ if ((last_priority[i] < policy->priority) &&
+ (single_rule_in_match_5tuple (policy, match)))
+ {
+ last_priority[i] = policy->priority;
+ if (policies[i] == 0)
+ counter++;
+ policies[i] = policy;
+ }
+ }
+ }
+ }
+
+ i++;
+ n_left--;
+ match++;
+ }
+ return counter;
+}
+
+static_always_inline u32
+ipsec_fp_in_ip4_policy_match_n (void *spd_fp, ipsec_fp_5tuple_t *tuples,
+ ipsec_policy_t **policies, u32 n)
+
+{
+ u32 last_priority[n];
+ u32 i = 0;
+ u32 counter = 0;
+ ipsec_fp_mask_type_entry_t *mte;
+ ipsec_fp_mask_id_t *mti;
+ ipsec_fp_5tuple_t *match = tuples;
+ ipsec_policy_t *policy;
+ u32 n_left = n;
+ clib_bihash_kv_16_8_t kv;
+ /* result of the lookup */
+ clib_bihash_kv_16_8_t result;
+ ipsec_fp_lookup_value_t *result_val =
+ (ipsec_fp_lookup_value_t *) &result.value;
+ u64 *pkey, *pmatch, *pmask;
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_spd_fp_t *pspd_fp = (ipsec_spd_fp_t *) spd_fp;
+ ipsec_fp_mask_id_t *mask_type_ids = pspd_fp->fp_mask_ids[match->action];
+ clib_bihash_16_8_t *bihash_table = pool_elt_at_index (
+ im->fp_ip4_lookup_hashes_pool, pspd_fp->ip4_in_lookup_hash_idx);
+
+ /* clear the list of matched policies pointers */
+ clib_memset (policies, 0, n * sizeof (*policies));
+ clib_memset (last_priority, 0, n * sizeof (u32));
+ n_left = n;
+ while (n_left)
+ {
+ vec_foreach (mti, mask_type_ids)
+ {
+ mte = im->fp_mask_types + mti->mask_type_idx;
+ if (mte->mask.action == 0)
+ continue;
+ pmatch = (u64 *) match->kv_16_8.key;
+ pmask = (u64 *) mte->mask.kv_16_8.key;
+ pkey = (u64 *) kv.key;
+
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey = *pmatch & *pmask;
+
+ int res =
+ clib_bihash_search_inline_2_16_8 (bihash_table, &kv, &result);
+ /* lookup the hash by each packet in the burst for this mask. */
+
+ if (res == 0)
+ {
+ /* There is a hit in the hash table. */
+ /* Find the policy with highest priority. */
+ /* Store the lookup results in a dedicated array. */
+
+ if (vec_len (result_val->fp_policies_ids) > 1)
+ {
+ u32 *policy_id;
+ vec_foreach (policy_id, result_val->fp_policies_ids)
+ {
+ policy = im->policies + *policy_id;
+
+ if (single_rule_in_match_5tuple (policy, match))
+ {
+ if (last_priority[i] < policy->priority)
+ {
+ last_priority[i] = policy->priority;
+ if (policies[i] == 0)
+ counter++;
+ policies[i] = policy;
+ }
+ break;
+ }
+ }
+ }
+ else
+ {
+ u32 *policy_id;
+ ASSERT (vec_len (result_val->fp_policies_ids) == 1);
+ policy_id = result_val->fp_policies_ids;
+ policy = im->policies + *policy_id;
+ if ((last_priority[i] < policy->priority) &&
+ (single_rule_in_match_5tuple (policy, match)))
+ {
+ last_priority[i] = policy->priority;
+ if (policies[i] == 0)
+ counter++;
+ policies[i] = policy;
+ }
+ }
+ }
+ }
+
+ i++;
+ n_left--;
+ match++;
+ }
+ return counter;
+}
+
+/**
+ * @brief function handler to perform lookup in fastpath SPD
+ * for inbound traffic burst of n packets
+ **/
+
+static_always_inline u32
+ipsec_fp_in_policy_match_n (void *spd_fp, u8 is_ipv6,
+ ipsec_fp_5tuple_t *tuples,
+ ipsec_policy_t **policies, u32 n)
+{
+ if (is_ipv6)
+ return ipsec_fp_in_ip6_policy_match_n (spd_fp, tuples, policies, n);
+ else
+ return ipsec_fp_in_ip4_policy_match_n (spd_fp, tuples, policies, n);
+}
+
+static_always_inline u32
+ipsec_fp_out_ip6_policy_match_n (void *spd_fp, ipsec_fp_5tuple_t *tuples,
+ ipsec_policy_t **policies, u32 *ids, u32 n)
+
+{
+ u32 last_priority[n];
+ u32 i = 0;
+ u32 counter = 0;
+ ipsec_fp_mask_type_entry_t *mte;
+ ipsec_fp_mask_id_t *mti;
+ ipsec_fp_5tuple_t *match = tuples;
+ ipsec_policy_t *policy;
+
+ u32 n_left = n;
+ clib_bihash_kv_40_8_t kv;
+ /* result of the lookup */
+ clib_bihash_kv_40_8_t result;
+ ipsec_fp_lookup_value_t *result_val =
+ (ipsec_fp_lookup_value_t *) &result.value;
+ u64 *pkey, *pmatch, *pmask;
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_spd_fp_t *pspd_fp = (ipsec_spd_fp_t *) spd_fp;
+ ipsec_fp_mask_id_t *mask_type_ids =
+ pspd_fp->fp_mask_ids[IPSEC_SPD_POLICY_IP6_OUTBOUND];
+ clib_bihash_40_8_t *bihash_table = pool_elt_at_index (
+ im->fp_ip6_lookup_hashes_pool, pspd_fp->ip6_out_lookup_hash_idx);
+
+ /*clear the list of matched policies pointers */
+ clib_memset (policies, 0, n * sizeof (*policies));
+ clib_memset (last_priority, 0, n * sizeof (u32));
+ n_left = n;
+ while (n_left)
+ {
+ vec_foreach (mti, mask_type_ids)
+ {
+ mte = im->fp_mask_types + mti->mask_type_idx;
+ if (mte->mask.action != 0)
+ continue;
+
+ pmatch = (u64 *) match->kv_40_8.key;
+ pmask = (u64 *) mte->mask.kv_40_8.key;
+ pkey = (u64 *) kv.key;
+
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey = *pmatch & *pmask;
+
+ int res =
+ clib_bihash_search_inline_2_40_8 (bihash_table, &kv, &result);
+ /* lookup the hash by each packet in the burst for this mask. */
+
+ if (res == 0)
+ {
+ /* There is a hit in the hash table. */
+ /* Find the policy with highest priority. */
+ /* Store the lookup results in a dedicated array. */
+
+ if (vec_len (result_val->fp_policies_ids) > 1)
+ {
+ u32 *policy_id;
+ vec_foreach (policy_id, result_val->fp_policies_ids)
+ {
+ policy = im->policies + *policy_id;
+
+ if (single_rule_out_match_5tuple (policy, match))
+ {
+ if (last_priority[i] < policy->priority)
+ {
+ last_priority[i] = policy->priority;
+ if (policies[i] == 0)
+ counter++;
+ policies[i] = policy;
+ ids[i] = *policy_id;
+ }
+ break;
+ }
+ }
+ }
+ else
+ {
+ u32 *policy_id;
+ ASSERT (vec_len (result_val->fp_policies_ids) == 1);
+ policy_id = result_val->fp_policies_ids;
+ policy = im->policies + *policy_id;
+ if (single_rule_out_match_5tuple (policy, match))
+ {
+ if (last_priority[i] < policy->priority)
+ {
+ last_priority[i] = policy->priority;
+ if (policies[i] == 0)
+ counter++;
+ policies[i] = policy;
+ ids[i] = *policy_id;
+ }
+ }
+ }
+ }
+ }
+ n_left--;
+ match++;
+ i++;
+ }
+ return counter;
+}
+
+static_always_inline u32
+ipsec_fp_out_ip4_policy_match_n (void *spd_fp, ipsec_fp_5tuple_t *tuples,
+ ipsec_policy_t **policies, u32 *ids, u32 n)
+
+{
+ u32 last_priority[n];
+ u32 i = 0;
+ u32 counter = 0;
+ ipsec_fp_mask_type_entry_t *mte;
+ ipsec_fp_mask_id_t *mti;
+ ipsec_fp_5tuple_t *match = tuples;
+ ipsec_policy_t *policy;
+
+ u32 n_left = n;
+ clib_bihash_kv_16_8_t kv;
+ /* result of the lookup */
+ clib_bihash_kv_16_8_t result;
+ ipsec_fp_lookup_value_t *result_val =
+ (ipsec_fp_lookup_value_t *) &result.value;
+ u64 *pkey, *pmatch, *pmask;
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_spd_fp_t *pspd_fp = (ipsec_spd_fp_t *) spd_fp;
+ ipsec_fp_mask_id_t *mask_type_ids =
+ pspd_fp->fp_mask_ids[IPSEC_SPD_POLICY_IP4_OUTBOUND];
+ clib_bihash_16_8_t *bihash_table = pool_elt_at_index (
+ im->fp_ip4_lookup_hashes_pool, pspd_fp->ip4_out_lookup_hash_idx);
+
+ /* clear the list of matched policies pointers */
+ clib_memset (policies, 0, n * sizeof (*policies));
+ clib_memset (last_priority, 0, n * sizeof (u32));
+ n_left = n;
+ while (n_left)
+ {
+ vec_foreach (mti, mask_type_ids)
+ {
+ mte = im->fp_mask_types + mti->mask_type_idx;
+ if (mte->mask.action != 0)
+ continue;
+
+ pmatch = (u64 *) match->kv_16_8.key;
+ pmask = (u64 *) mte->mask.kv_16_8.key;
+ pkey = (u64 *) kv.key;
+
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey = *pmatch & *pmask;
+
+ int res =
+ clib_bihash_search_inline_2_16_8 (bihash_table, &kv, &result);
+ /* lookup the hash by each packet in the burst for this mask. */
+
+ if (res == 0)
+ {
+ /* There is a hit in the hash table. */
+ /* Find the policy with highest priority. */
+ /* Store the lookup results in a dedicated array. */
+
+ if (vec_len (result_val->fp_policies_ids) > 1)
+ {
+ u32 *policy_id;
+ vec_foreach (policy_id, result_val->fp_policies_ids)
+ {
+ policy = im->policies + *policy_id;
+
+ if (single_rule_out_match_5tuple (policy, match))
+ {
+ if (last_priority[i] < policy->priority)
+ {
+ last_priority[i] = policy->priority;
+ if (policies[i] == 0)
+ counter++;
+ policies[i] = policy;
+ ids[i] = *policy_id;
+ }
+ break;
+ }
+ }
+ }
+ else
+ {
+ u32 *policy_id;
+ ASSERT (vec_len (result_val->fp_policies_ids) == 1);
+ policy_id = result_val->fp_policies_ids;
+ policy = im->policies + *policy_id;
+ if ((last_priority[i] < policy->priority) &&
+ (single_rule_out_match_5tuple (policy, match)))
+ {
+ last_priority[i] = policy->priority;
+ if (policies[i] == 0)
+ counter++;
+ policies[i] = policy;
+ ids[i] = *policy_id;
+ }
+ }
+ }
+ }
+
+ i++;
+ n_left--;
+ match++;
+ }
+ return counter;
+}
+
+/**
+ * @brief function handler to perform lookup in fastpath SPD
+ * for outbound traffic burst of n packets
+ * returns number of successfully matched policies
+ **/
+
+static_always_inline u32
+ipsec_fp_out_policy_match_n (void *spd_fp, u8 is_ipv6,
+ ipsec_fp_5tuple_t *tuples,
+ ipsec_policy_t **policies, u32 *ids, u32 n)
+
+{
+ if (is_ipv6)
+ return ipsec_fp_out_ip6_policy_match_n (spd_fp, tuples, policies, ids, n);
+ else
+ return ipsec_fp_out_ip4_policy_match_n (spd_fp, tuples, policies, ids, n);
+}
+
+#endif /* !IPSEC_SPD_FP_LOOKUP_H */
diff --git a/src/vnet/ipsec/ipsec_spd_policy.c b/src/vnet/ipsec/ipsec_spd_policy.c
index 85acf7aea7b..af087689941 100644
--- a/src/vnet/ipsec/ipsec_spd_policy.c
+++ b/src/vnet/ipsec/ipsec_spd_policy.c
@@ -24,78 +24,6 @@ vlib_combined_counter_main_t ipsec_spd_policy_counters = {
.stat_segment_name = "/net/ipsec/policy",
};
-static int
-ipsec_policy_is_equal (ipsec_policy_t * p1, ipsec_policy_t * p2)
-{
- if (p1->priority != p2->priority)
- return 0;
- if (p1->type != p2->type)
- return (0);
- if (p1->policy != p2->policy)
- return (0);
- if (p1->sa_id != p2->sa_id)
- return (0);
- if (p1->protocol != p2->protocol)
- return (0);
- if (p1->lport.start != p2->lport.start)
- return (0);
- if (p1->lport.stop != p2->lport.stop)
- return (0);
- if (p1->rport.start != p2->rport.start)
- return (0);
- if (p1->rport.stop != p2->rport.stop)
- return (0);
- if (p1->is_ipv6 != p2->is_ipv6)
- return (0);
- if (p2->is_ipv6)
- {
- if (p1->laddr.start.ip6.as_u64[0] != p2->laddr.start.ip6.as_u64[0])
- return (0);
- if (p1->laddr.start.ip6.as_u64[1] != p2->laddr.start.ip6.as_u64[1])
- return (0);
- if (p1->laddr.stop.ip6.as_u64[0] != p2->laddr.stop.ip6.as_u64[0])
- return (0);
- if (p1->laddr.stop.ip6.as_u64[1] != p2->laddr.stop.ip6.as_u64[1])
- return (0);
- if (p1->raddr.start.ip6.as_u64[0] != p2->raddr.start.ip6.as_u64[0])
- return (0);
- if (p1->raddr.start.ip6.as_u64[1] != p2->raddr.start.ip6.as_u64[1])
- return (0);
- if (p1->raddr.stop.ip6.as_u64[0] != p2->raddr.stop.ip6.as_u64[0])
- return (0);
- if (p1->laddr.stop.ip6.as_u64[1] != p2->laddr.stop.ip6.as_u64[1])
- return (0);
- }
- else
- {
- if (p1->laddr.start.ip4.as_u32 != p2->laddr.start.ip4.as_u32)
- return (0);
- if (p1->laddr.stop.ip4.as_u32 != p2->laddr.stop.ip4.as_u32)
- return (0);
- if (p1->raddr.start.ip4.as_u32 != p2->raddr.start.ip4.as_u32)
- return (0);
- if (p1->raddr.stop.ip4.as_u32 != p2->raddr.stop.ip4.as_u32)
- return (0);
- }
- return (1);
-}
-
-static int
-ipsec_spd_entry_sort (void *a1, void *a2)
-{
- ipsec_main_t *im = &ipsec_main;
- u32 *id1 = a1;
- u32 *id2 = a2;
- ipsec_policy_t *p1, *p2;
-
- p1 = pool_elt_at_index (im->policies, *id1);
- p2 = pool_elt_at_index (im->policies, *id2);
- if (p1 && p2)
- return p2->priority - p1->priority;
-
- return 0;
-}
-
int
ipsec_policy_mk_type (bool is_outbound,
bool is_ipv6,
@@ -136,6 +64,44 @@ ipsec_policy_mk_type (bool is_outbound,
return (-1);
}
+static_always_inline int
+ipsec_is_policy_inbound (ipsec_policy_t *policy)
+{
+ if (policy->type == IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT ||
+ policy->type == IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS ||
+ policy->type == IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD ||
+ policy->type == IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT ||
+ policy->type == IPSEC_SPD_POLICY_IP6_INBOUND_BYPASS ||
+ policy->type == IPSEC_SPD_POLICY_IP6_INBOUND_DISCARD)
+ return 1;
+
+ return 0;
+}
+
+static_always_inline int
+ipsec_is_fp_enabled (ipsec_main_t *im, ipsec_spd_t *spd,
+ ipsec_policy_t *policy)
+{
+ if ((im->fp_spd_ipv4_out_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd->fp_spd.ip4_out_lookup_hash_idx) &&
+ policy->type == IPSEC_SPD_POLICY_IP4_OUTBOUND) ||
+ (im->fp_spd_ipv4_in_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd->fp_spd.ip4_in_lookup_hash_idx) &&
+ (policy->type == IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT ||
+ policy->type == IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS ||
+ policy->type == IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD)) ||
+ (im->fp_spd_ipv6_in_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd->fp_spd.ip6_in_lookup_hash_idx) &&
+ (policy->type == IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT ||
+ policy->type == IPSEC_SPD_POLICY_IP6_INBOUND_BYPASS ||
+ policy->type == IPSEC_SPD_POLICY_IP6_INBOUND_DISCARD)) ||
+ (im->fp_spd_ipv6_out_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd->fp_spd.ip6_out_lookup_hash_idx) &&
+ policy->type == IPSEC_SPD_POLICY_IP6_OUTBOUND))
+ return 1;
+ return 0;
+}
+
int
ipsec_add_del_policy (vlib_main_t * vm,
ipsec_policy_t * policy, int is_add, u32 * stat_index)
@@ -156,7 +122,7 @@ ipsec_add_del_policy (vlib_main_t * vm,
if (!spd)
return VNET_API_ERROR_SYSCALL_ERROR_1;
- if (im->flow_cache_flag && !policy->is_ipv6 &&
+ if (im->output_flow_cache_flag && !policy->is_ipv6 &&
policy->type == IPSEC_SPD_POLICY_IP4_OUTBOUND)
{
/*
@@ -179,9 +145,35 @@ ipsec_add_del_policy (vlib_main_t * vm,
clib_atomic_store_relax_n (&im->ipsec4_out_spd_flow_cache_entries, 0);
}
+ if ((policy->type == IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT ||
+ policy->type == IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS ||
+ policy->type == IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD) &&
+ im->input_flow_cache_flag && !policy->is_ipv6)
+ {
+ /*
+ * Flow cache entry is valid only when input_epoch_count value in control
+ * plane and data plane match. Otherwise, flow cache entry is considered
+ * stale. To avoid the race condition of using old input_epoch_count
+ * value in data plane after the roll over of input_epoch_count in
+ * control plane, entire flow cache is reset.
+ */
+ if (im->input_epoch_count == 0xFFFFFFFF)
+ {
+ /* Reset all the entries in flow cache */
+ clib_memset_u8 (im->ipsec4_in_spd_hash_tbl, 0,
+ im->ipsec4_in_spd_hash_num_buckets *
+ (sizeof (*(im->ipsec4_in_spd_hash_tbl))));
+ }
+ /* Increment epoch counter by 1 */
+ clib_atomic_fetch_add_relax (&im->input_epoch_count, 1);
+ /* Reset spd flow cache counter since all old entries are stale */
+ im->ipsec4_in_spd_flow_cache_entries = 0;
+ }
+
if (is_add)
{
u32 policy_index;
+ u32 i;
if (policy->policy == IPSEC_POLICY_ACTION_PROTECT)
{
@@ -194,6 +186,14 @@ ipsec_add_del_policy (vlib_main_t * vm,
else
policy->sa_index = INDEX_INVALID;
+ /**
+ * Try adding the policy into fast path SPD first. Only adding to
+ * traditional SPD when failed.
+ **/
+ if (ipsec_is_fp_enabled (im, spd, policy))
+ return ipsec_fp_add_del_policy ((void *) &spd->fp_spd, policy, 1,
+ stat_index);
+
pool_get (im->policies, vp);
clib_memcpy (vp, policy, sizeof (*vp));
policy_index = vp - im->policies;
@@ -202,22 +202,56 @@ ipsec_add_del_policy (vlib_main_t * vm,
policy_index);
vlib_zero_combined_counter (&ipsec_spd_policy_counters, policy_index);
- vec_add1 (spd->policies[policy->type], policy_index);
- vec_sort_with_function (spd->policies[policy->type],
- ipsec_spd_entry_sort);
+ vec_foreach_index (i, spd->policies[policy->type])
+ {
+ ipsec_policy_t *p =
+ pool_elt_at_index (im->policies, spd->policies[policy->type][i]);
+
+ if (p->priority <= vp->priority)
+ {
+ break;
+ }
+ }
+
+ vec_insert_elts (spd->policies[policy->type], &policy_index, 1, i);
+
*stat_index = policy_index;
}
else
{
u32 ii;
+ /**
+ * Try to delete the policy from the fast path SPD first. Delete from
+ * traditional SPD when fp delete fails.
+ **/
+
+ if (ipsec_is_fp_enabled (im, spd, policy))
+
+ {
+ if (policy->policy == IPSEC_POLICY_ACTION_PROTECT)
+ {
+ index_t sa_index = ipsec_sa_find_and_lock (policy->sa_id);
+
+ if (INDEX_INVALID == sa_index)
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+ policy->sa_index = sa_index;
+ ipsec_sa_unlock_id (policy->sa_id);
+ }
+ else
+ policy->sa_index = INDEX_INVALID;
+
+ return ipsec_fp_add_del_policy ((void *) &spd->fp_spd, policy, 0,
+ stat_index);
+ }
+
vec_foreach_index (ii, (spd->policies[policy->type]))
{
vp = pool_elt_at_index (im->policies,
spd->policies[policy->type][ii]);
if (ipsec_policy_is_equal (vp, policy))
{
- vec_del1 (spd->policies[policy->type], ii);
+ vec_delete (spd->policies[policy->type], 1, ii);
ipsec_sa_unlock (vp->sa_index);
pool_put (im->policies, vp);
break;
@@ -228,6 +262,673 @@ ipsec_add_del_policy (vlib_main_t * vm,
return 0;
}
+static_always_inline void
+ipsec_fp_release_mask_type (ipsec_main_t *im, u32 mask_type_index)
+{
+ ipsec_fp_mask_type_entry_t *mte =
+ pool_elt_at_index (im->fp_mask_types, mask_type_index);
+ mte->refcount--;
+ if (mte->refcount == 0)
+ {
+ /* this entry is not in use anymore */
+ ASSERT (clib_memset (mte, 0xae, sizeof (*mte)) == EOK);
+ pool_put (im->fp_mask_types, mte);
+ }
+}
+
+static_always_inline u32
+find_mask_type_index (ipsec_main_t *im, ipsec_fp_5tuple_t *mask)
+{
+ ipsec_fp_mask_type_entry_t *mte;
+
+ pool_foreach (mte, im->fp_mask_types)
+ {
+ if (memcmp (&mte->mask, mask, sizeof (*mask)) == 0)
+ return (mte - im->fp_mask_types);
+ }
+
+ return ~0;
+}
+
+static_always_inline void
+fill_ip6_hash_policy_kv (ipsec_fp_5tuple_t *match, ipsec_fp_5tuple_t *mask,
+ clib_bihash_kv_40_8_t *kv)
+{
+ ipsec_fp_lookup_value_t *kv_val = (ipsec_fp_lookup_value_t *) &kv->value;
+ u64 *pmatch = (u64 *) match->kv_40_8.key;
+ u64 *pmask = (u64 *) mask->kv_40_8.key;
+ u64 *pkey = (u64 *) kv->key;
+
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey = *pmatch & *pmask;
+
+ kv_val->as_u64 = 0;
+}
+
+static_always_inline void
+fill_ip4_hash_policy_kv (ipsec_fp_5tuple_t *match, ipsec_fp_5tuple_t *mask,
+ clib_bihash_kv_16_8_t *kv)
+{
+ ipsec_fp_lookup_value_t *kv_val = (ipsec_fp_lookup_value_t *) &kv->value;
+ u64 *pmatch = (u64 *) match->kv_16_8.key;
+ u64 *pmask = (u64 *) mask->kv_16_8.key;
+ u64 *pkey = (u64 *) kv->key;
+
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey = *pmatch & *pmask;
+
+ kv_val->as_u64 = 0;
+}
+
+static_always_inline u16
+mask_out_highest_set_bit_u16 (u16 x)
+{
+ x |= x >> 8;
+ x |= x >> 4;
+ x |= x >> 2;
+ x |= x >> 1;
+ return ~x;
+}
+
+static_always_inline u32
+mask_out_highest_set_bit_u32 (u32 x)
+{
+ x |= x >> 16;
+ x |= x >> 8;
+ x |= x >> 4;
+ x |= x >> 2;
+ x |= x >> 1;
+ return ~x;
+}
+
+static_always_inline u64
+mask_out_highest_set_bit_u64 (u64 x)
+{
+ x |= x >> 32;
+ x |= x >> 16;
+ x |= x >> 8;
+ x |= x >> 4;
+ x |= x >> 2;
+ x |= x >> 1;
+ return ~x;
+}
+
+static_always_inline void
+ipsec_fp_get_policy_ports_mask (ipsec_policy_t *policy,
+ ipsec_fp_5tuple_t *mask)
+{
+ if (PREDICT_TRUE ((policy->protocol == IP_PROTOCOL_TCP) ||
+ (policy->protocol == IP_PROTOCOL_UDP) ||
+ (policy->protocol == IP_PROTOCOL_SCTP)))
+ {
+ mask->lport = policy->lport.start ^ policy->lport.stop;
+ mask->rport = policy->rport.start ^ policy->rport.stop;
+
+ mask->lport = mask_out_highest_set_bit_u16 (mask->lport);
+
+ mask->rport = mask_out_highest_set_bit_u16 (mask->rport);
+ }
+ else
+ {
+ mask->lport = 0;
+ mask->rport = 0;
+ }
+
+ mask->protocol = (policy->protocol == IPSEC_POLICY_PROTOCOL_ANY) ? 0 : ~0;
+}
+
+static_always_inline void
+ipsec_fp_ip4_get_policy_mask (ipsec_policy_t *policy, ipsec_fp_5tuple_t *mask,
+ bool inbound)
+{
+ u32 *pladdr_start = (u32 *) &policy->laddr.start.ip4;
+ u32 *pladdr_stop = (u32 *) &policy->laddr.stop.ip4;
+ u32 *plmask = (u32 *) &mask->laddr;
+ u32 *praddr_start = (u32 *) &policy->raddr.start.ip4;
+ u32 *praddr_stop = (u32 *) &policy->raddr.stop.ip4;
+ u32 *prmask = (u32 *) &mask->raddr;
+
+ clib_memset_u8 (mask, 0xff, sizeof (ipsec_fp_5tuple_t));
+ clib_memset_u8 (&mask->l3_zero_pad, 0, sizeof (mask->l3_zero_pad));
+
+ if (inbound && (policy->type == IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT &&
+ policy->sa_index != INDEX_INVALID))
+ {
+ ipsec_sa_t *s = ipsec_sa_get (policy->sa_index);
+
+ if (ipsec_sa_is_set_IS_TUNNEL (s))
+ goto set_spi_mask;
+ }
+
+ /* find bits where start != stop */
+ *plmask = *pladdr_start ^ *pladdr_stop;
+ *prmask = *praddr_start ^ *praddr_stop;
+ /* Find most significant bit set (that is the first position
+ * start differs from stop). Mask out everything after that bit and
+ * the bit itself. Remember that policy stores start and stop in the net
+ * order.
+ */
+ *plmask = clib_host_to_net_u32 (
+ mask_out_highest_set_bit_u32 (clib_net_to_host_u32 (*plmask)));
+
+ *prmask = clib_host_to_net_u32 (
+ mask_out_highest_set_bit_u32 (clib_net_to_host_u32 (*prmask)));
+
+set_spi_mask:
+ if (inbound)
+ {
+ if (policy->type != IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT)
+ mask->spi = 0;
+
+ mask->protocol = 0;
+ }
+ else
+ {
+ mask->action = 0;
+ ipsec_fp_get_policy_ports_mask (policy, mask);
+ }
+}
+
+static_always_inline void
+ipsec_fp_ip6_get_policy_mask (ipsec_policy_t *policy, ipsec_fp_5tuple_t *mask,
+ bool inbound)
+{
+ u64 *pladdr_start = (u64 *) &policy->laddr.start;
+ u64 *pladdr_stop = (u64 *) &policy->laddr.stop;
+ u64 *plmask = (u64 *) &mask->ip6_laddr;
+ u64 *praddr_start = (u64 *) &policy->raddr.start;
+ u64 *praddr_stop = (u64 *) &policy->raddr.stop;
+ u64 *prmask = (u64 *) &mask->ip6_raddr;
+
+ clib_memset_u8 (mask, 0xff, sizeof (ipsec_fp_5tuple_t));
+
+ if (inbound && (policy->type == IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT &&
+ policy->sa_index != INDEX_INVALID))
+ {
+ ipsec_sa_t *s = ipsec_sa_get (policy->sa_index);
+
+ if (ipsec_sa_is_set_IS_TUNNEL (s))
+ goto set_spi_mask;
+ }
+
+ *plmask = (*pladdr_start++ ^ *pladdr_stop++);
+
+ *prmask = (*praddr_start++ ^ *praddr_stop++);
+
+ /* Find most significant bit set (that is the first position
+ * start differs from stop). Mask out everything after that bit and
+ * the bit itself. Remember that policy stores start and stop in the net
+ * order.
+ */
+ *plmask = clib_host_to_net_u64 (
+ mask_out_highest_set_bit_u64 (clib_net_to_host_u64 (*plmask)));
+
+ if (*plmask++ & clib_host_to_net_u64 (0x1))
+ {
+ *plmask = (*pladdr_start ^ *pladdr_stop);
+ *plmask = clib_host_to_net_u64 (
+ mask_out_highest_set_bit_u64 (clib_net_to_host_u64 (*plmask)));
+ }
+ else
+ *plmask = 0;
+
+ *prmask = clib_host_to_net_u64 (
+ mask_out_highest_set_bit_u64 (clib_net_to_host_u64 (*prmask)));
+
+ if (*prmask++ & clib_host_to_net_u64 (0x1))
+ {
+ *prmask = (*praddr_start ^ *praddr_stop);
+ *prmask = clib_host_to_net_u64 (
+ mask_out_highest_set_bit_u64 (clib_net_to_host_u64 (*prmask)));
+ }
+ else
+ *prmask = 0;
+set_spi_mask:
+ if (inbound)
+ {
+ if (policy->type != IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT)
+ mask->spi = 0;
+
+ mask->protocol = 0;
+ }
+ else
+ {
+ mask->action = 0;
+ ipsec_fp_get_policy_ports_mask (policy, mask);
+ }
+}
+
+static_always_inline void
+ipsec_fp_get_policy_5tuple (ipsec_policy_t *policy, ipsec_fp_5tuple_t *tuple,
+ bool inbound)
+{
+ memset (tuple, 0, sizeof (*tuple));
+ tuple->is_ipv6 = policy->is_ipv6;
+ if (tuple->is_ipv6)
+ {
+ tuple->ip6_laddr = policy->laddr.start.ip6;
+ tuple->ip6_raddr = policy->raddr.start.ip6;
+ }
+ else
+ {
+ tuple->laddr = policy->laddr.start.ip4;
+ tuple->raddr = policy->raddr.start.ip4;
+ }
+
+ if (inbound)
+ {
+
+ if ((policy->type == IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT ||
+ policy->type == IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT) &&
+ policy->sa_index != INDEX_INVALID)
+ {
+ ipsec_sa_t *s = ipsec_sa_get (policy->sa_index);
+
+ tuple->spi = s->spi;
+ if (ipsec_sa_is_set_IS_TUNNEL (s))
+ {
+ if (tuple->is_ipv6)
+ {
+ tuple->ip6_laddr = s->tunnel.t_dst.ip.ip6;
+ tuple->ip6_raddr = s->tunnel.t_src.ip.ip6;
+ }
+ else
+ {
+ tuple->laddr = s->tunnel.t_dst.ip.ip4;
+ tuple->raddr = s->tunnel.t_src.ip.ip4;
+ }
+ }
+ }
+ else
+ tuple->spi = INDEX_INVALID;
+ tuple->action = policy->type;
+ return;
+ }
+
+ tuple->protocol = policy->protocol;
+ tuple->lport = policy->lport.start;
+ tuple->rport = policy->rport.start;
+}
+
+static_always_inline int
+ipsec_fp_mask_type_idx_cmp (ipsec_fp_mask_id_t *mask_id, u32 *idx)
+{
+ return mask_id->mask_type_idx == *idx;
+}
+
+int
+ipsec_fp_ip4_add_policy (ipsec_main_t *im, ipsec_spd_fp_t *fp_spd,
+ ipsec_policy_t *policy, u32 *stat_index)
+{
+ u32 mask_index, searched_idx;
+ ipsec_policy_t *vp;
+ ipsec_fp_mask_type_entry_t *mte;
+ u32 policy_index;
+ clib_bihash_kv_16_8_t kv;
+ clib_bihash_kv_16_8_t result;
+ ipsec_fp_lookup_value_t *result_val =
+ (ipsec_fp_lookup_value_t *) &result.value;
+ ipsec_fp_lookup_value_t *key_val = (ipsec_fp_lookup_value_t *) &kv.value;
+
+ ipsec_fp_5tuple_t mask, policy_5tuple;
+ int res;
+ bool inbound = ipsec_is_policy_inbound (policy);
+ clib_bihash_16_8_t *bihash_table =
+ inbound ? pool_elt_at_index (im->fp_ip4_lookup_hashes_pool,
+ fp_spd->ip4_in_lookup_hash_idx) :
+ pool_elt_at_index (im->fp_ip4_lookup_hashes_pool,
+ fp_spd->ip4_out_lookup_hash_idx);
+
+ ipsec_fp_ip4_get_policy_mask (policy, &mask, inbound);
+ pool_get (im->policies, vp);
+ policy_index = vp - im->policies;
+ vlib_validate_combined_counter (&ipsec_spd_policy_counters, policy_index);
+ vlib_zero_combined_counter (&ipsec_spd_policy_counters, policy_index);
+ *stat_index = policy_index;
+ mask_index = find_mask_type_index (im, &mask);
+
+ if (mask_index == ~0)
+ {
+ /* mask type not found, we need to create a new entry */
+ pool_get (im->fp_mask_types, mte);
+ mask_index = mte - im->fp_mask_types;
+ mte->refcount = 0;
+ }
+ else
+ mte = im->fp_mask_types + mask_index;
+
+ policy->fp_mask_type_id = mask_index;
+ ipsec_fp_get_policy_5tuple (policy, &policy_5tuple, inbound);
+
+ fill_ip4_hash_policy_kv (&policy_5tuple, &mask, &kv);
+
+ res = clib_bihash_search_inline_2_16_8 (bihash_table, &kv, &result);
+ if (res != 0)
+ {
+ /* key was not found crate a new entry */
+ vec_add1 (key_val->fp_policies_ids, policy_index);
+ res = clib_bihash_add_del_16_8 (bihash_table, &kv, 1);
+
+ if (res != 0)
+ goto error;
+ }
+ else
+ {
+ u32 i;
+ u32 *old_fp_policies_ids = result_val->fp_policies_ids;
+
+ vec_foreach_index (i, result_val->fp_policies_ids)
+ {
+ ipsec_policy_t *p =
+ pool_elt_at_index (im->policies, result_val->fp_policies_ids[i]);
+
+ if (p->priority <= policy->priority)
+ {
+ break;
+ }
+ }
+
+ vec_insert_elts (result_val->fp_policies_ids, &policy_index, 1, i);
+
+ if (result_val->fp_policies_ids != old_fp_policies_ids)
+ {
+ res = clib_bihash_add_del_16_8 (bihash_table, &result, 1);
+
+ if (res != 0)
+ goto error;
+ }
+ }
+
+ if (mte->refcount == 0)
+ {
+ clib_memcpy (&mte->mask, &mask, sizeof (mask));
+ mte->refcount = 0;
+ }
+
+ searched_idx =
+ vec_search_with_function (fp_spd->fp_mask_ids[policy->type], &mask_index,
+ ipsec_fp_mask_type_idx_cmp);
+ if (~0 == searched_idx)
+ {
+ ipsec_fp_mask_id_t mask_id = { mask_index, 1 };
+ vec_add1 (fp_spd->fp_mask_ids[policy->type], mask_id);
+ }
+ else
+ (fp_spd->fp_mask_ids[policy->type] + searched_idx)->refcount++;
+
+ mte->refcount++;
+ clib_memcpy (vp, policy, sizeof (*vp));
+
+ return 0;
+
+error:
+ pool_put (im->policies, vp);
+ ipsec_fp_release_mask_type (im, mask_index);
+ return -1;
+}
+
+int
+ipsec_fp_ip6_add_policy (ipsec_main_t *im, ipsec_spd_fp_t *fp_spd,
+ ipsec_policy_t *policy, u32 *stat_index)
+{
+
+ u32 mask_index, searched_idx;
+ ipsec_policy_t *vp;
+ ipsec_fp_mask_type_entry_t *mte;
+ u32 policy_index;
+ clib_bihash_kv_40_8_t kv;
+ clib_bihash_kv_40_8_t result;
+ ipsec_fp_lookup_value_t *result_val =
+ (ipsec_fp_lookup_value_t *) &result.value;
+ ipsec_fp_lookup_value_t *key_val = (ipsec_fp_lookup_value_t *) &kv.value;
+
+ ipsec_fp_5tuple_t mask, policy_5tuple;
+ int res;
+ bool inbound = ipsec_is_policy_inbound (policy);
+
+ ipsec_fp_ip6_get_policy_mask (policy, &mask, inbound);
+ pool_get (im->policies, vp);
+ policy_index = vp - im->policies;
+ vlib_validate_combined_counter (&ipsec_spd_policy_counters, policy_index);
+ vlib_zero_combined_counter (&ipsec_spd_policy_counters, policy_index);
+ *stat_index = policy_index;
+ mask_index = find_mask_type_index (im, &mask);
+ clib_bihash_40_8_t *bihash_table =
+ inbound ? pool_elt_at_index (im->fp_ip6_lookup_hashes_pool,
+ fp_spd->ip6_in_lookup_hash_idx) :
+ pool_elt_at_index (im->fp_ip6_lookup_hashes_pool,
+ fp_spd->ip6_out_lookup_hash_idx);
+
+ if (mask_index == ~0)
+ {
+ /* mask type not found, we need to create a new entry */
+ pool_get (im->fp_mask_types, mte);
+ mask_index = mte - im->fp_mask_types;
+ mte->refcount = 0;
+ }
+ else
+ mte = im->fp_mask_types + mask_index;
+
+ policy->fp_mask_type_id = mask_index;
+ ipsec_fp_get_policy_5tuple (policy, &policy_5tuple, inbound);
+
+ fill_ip6_hash_policy_kv (&policy_5tuple, &mask, &kv);
+
+ res = clib_bihash_search_inline_2_40_8 (bihash_table, &kv, &result);
+ if (res != 0)
+ {
+ /* key was not found crate a new entry */
+ vec_add1 (key_val->fp_policies_ids, policy_index);
+ res = clib_bihash_add_del_40_8 (bihash_table, &kv, 1);
+ if (res != 0)
+ goto error;
+ }
+ else
+ {
+ u32 i;
+ u32 *old_fp_policies_ids = result_val->fp_policies_ids;
+
+ vec_foreach_index (i, result_val->fp_policies_ids)
+ {
+ ipsec_policy_t *p =
+ pool_elt_at_index (im->policies, result_val->fp_policies_ids[i]);
+
+ if (p->priority <= policy->priority)
+ {
+ break;
+ }
+ }
+
+ vec_insert_elts (result_val->fp_policies_ids, &policy_index, 1, i);
+
+ if (result_val->fp_policies_ids != old_fp_policies_ids)
+ {
+ res = clib_bihash_add_del_40_8 (bihash_table, &result, 1);
+
+ if (res != 0)
+ goto error;
+ }
+ }
+
+ if (mte->refcount == 0)
+ {
+ clib_memcpy (&mte->mask, &mask, sizeof (mask));
+ mte->refcount = 0;
+ }
+
+ searched_idx =
+ vec_search_with_function (fp_spd->fp_mask_ids[policy->type], &mask_index,
+ ipsec_fp_mask_type_idx_cmp);
+ if (~0 == searched_idx)
+ {
+ ipsec_fp_mask_id_t mask_id = { mask_index, 1 };
+ vec_add1 (fp_spd->fp_mask_ids[policy->type], mask_id);
+ }
+ else
+ (fp_spd->fp_mask_ids[policy->type] + searched_idx)->refcount++;
+
+ mte->refcount++;
+ clib_memcpy (vp, policy, sizeof (*vp));
+
+ return 0;
+
+error:
+ pool_put (im->policies, vp);
+ ipsec_fp_release_mask_type (im, mask_index);
+ return -1;
+}
+
+int
+ipsec_fp_ip6_del_policy (ipsec_main_t *im, ipsec_spd_fp_t *fp_spd,
+ ipsec_policy_t *policy)
+{
+ int res;
+ ipsec_fp_5tuple_t mask = { 0 }, policy_5tuple;
+ clib_bihash_kv_40_8_t kv;
+ clib_bihash_kv_40_8_t result;
+ ipsec_fp_lookup_value_t *result_val =
+ (ipsec_fp_lookup_value_t *) &result.value;
+ bool inbound = ipsec_is_policy_inbound (policy);
+ clib_bihash_40_8_t *bihash_table =
+ inbound ? pool_elt_at_index (im->fp_ip6_lookup_hashes_pool,
+ fp_spd->ip6_in_lookup_hash_idx) :
+ pool_elt_at_index (im->fp_ip6_lookup_hashes_pool,
+ fp_spd->ip6_out_lookup_hash_idx);
+
+ ipsec_policy_t *vp;
+ u32 ii, imt;
+
+ ipsec_fp_ip6_get_policy_mask (policy, &mask, inbound);
+ ipsec_fp_get_policy_5tuple (policy, &policy_5tuple, inbound);
+ fill_ip6_hash_policy_kv (&policy_5tuple, &mask, &kv);
+ res = clib_bihash_search_inline_2_40_8 (bihash_table, &kv, &result);
+ if (res != 0)
+ return -1;
+
+ vec_foreach_index (ii, result_val->fp_policies_ids)
+ {
+ vp =
+ pool_elt_at_index (im->policies, *(result_val->fp_policies_ids + ii));
+ if (ipsec_policy_is_equal (vp, policy))
+ {
+ if (vec_len (result_val->fp_policies_ids) == 1)
+ {
+ vec_free (result_val->fp_policies_ids);
+ clib_bihash_add_del_40_8 (bihash_table, &result, 0);
+ }
+ else
+ vec_delete (result_val->fp_policies_ids, 1, ii);
+
+ vec_foreach_index (imt, fp_spd->fp_mask_ids[policy->type])
+ {
+ if ((fp_spd->fp_mask_ids[policy->type] + imt)->mask_type_idx ==
+ vp->fp_mask_type_id)
+ {
+
+ if ((fp_spd->fp_mask_ids[policy->type] + imt)->refcount-- ==
+ 1)
+ vec_del1 (fp_spd->fp_mask_ids[policy->type], imt);
+
+ break;
+ }
+ }
+
+ ipsec_fp_release_mask_type (im, vp->fp_mask_type_id);
+ ipsec_sa_unlock (vp->sa_index);
+ pool_put (im->policies, vp);
+ return 0;
+ }
+ }
+ return -1;
+}
+
+int
+ipsec_fp_ip4_del_policy (ipsec_main_t *im, ipsec_spd_fp_t *fp_spd,
+ ipsec_policy_t *policy)
+{
+ int res;
+ ipsec_fp_5tuple_t mask = { 0 }, policy_5tuple;
+ clib_bihash_kv_16_8_t kv;
+ clib_bihash_kv_16_8_t result;
+ ipsec_fp_lookup_value_t *result_val =
+ (ipsec_fp_lookup_value_t *) &result.value;
+ bool inbound = ipsec_is_policy_inbound (policy);
+ ipsec_policy_t *vp;
+ u32 ii, imt;
+ clib_bihash_16_8_t *bihash_table =
+ inbound ? pool_elt_at_index (im->fp_ip4_lookup_hashes_pool,
+ fp_spd->ip4_in_lookup_hash_idx) :
+ pool_elt_at_index (im->fp_ip4_lookup_hashes_pool,
+ fp_spd->ip4_out_lookup_hash_idx);
+
+ ipsec_fp_ip4_get_policy_mask (policy, &mask, inbound);
+ ipsec_fp_get_policy_5tuple (policy, &policy_5tuple, inbound);
+ fill_ip4_hash_policy_kv (&policy_5tuple, &mask, &kv);
+ res = clib_bihash_search_inline_2_16_8 (bihash_table, &kv, &result);
+
+ if (res != 0)
+ return -1;
+
+ vec_foreach_index (ii, result_val->fp_policies_ids)
+ {
+ vp =
+ pool_elt_at_index (im->policies, *(result_val->fp_policies_ids + ii));
+ if (ipsec_policy_is_equal (vp, policy))
+ {
+ if (vec_len (result_val->fp_policies_ids) == 1)
+ {
+ vec_free (result_val->fp_policies_ids);
+ clib_bihash_add_del_16_8 (bihash_table, &result, 0);
+ }
+ else
+ vec_delete (result_val->fp_policies_ids, 1, ii);
+
+ vec_foreach_index (imt, fp_spd->fp_mask_ids[policy->type])
+ {
+ if ((fp_spd->fp_mask_ids[policy->type] + imt)->mask_type_idx ==
+ vp->fp_mask_type_id)
+ {
+
+ if ((fp_spd->fp_mask_ids[policy->type] + imt)->refcount-- ==
+ 1)
+ vec_del1 (fp_spd->fp_mask_ids[policy->type], imt);
+
+ break;
+ }
+ }
+ ipsec_fp_release_mask_type (im, vp->fp_mask_type_id);
+ ipsec_sa_unlock (vp->sa_index);
+ pool_put (im->policies, vp);
+ return 0;
+ }
+ }
+ return -1;
+}
+
+int
+ipsec_fp_add_del_policy (void *fp_spd, ipsec_policy_t *policy, int is_add,
+ u32 *stat_index)
+{
+ ipsec_main_t *im = &ipsec_main;
+
+ if (is_add)
+ if (policy->is_ipv6)
+ return ipsec_fp_ip6_add_policy (im, (ipsec_spd_fp_t *) fp_spd, policy,
+ stat_index);
+ else
+ return ipsec_fp_ip4_add_policy (im, (ipsec_spd_fp_t *) fp_spd, policy,
+ stat_index);
+
+ else if (policy->is_ipv6)
+
+ return ipsec_fp_ip6_del_policy (im, (ipsec_spd_fp_t *) fp_spd, policy);
+ else
+ return ipsec_fp_ip4_del_policy (im, (ipsec_spd_fp_t *) fp_spd, policy);
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/ipsec/ipsec_spd_policy.h b/src/vnet/ipsec/ipsec_spd_policy.h
index 6d6b69592b0..34f444efb9c 100644
--- a/src/vnet/ipsec/ipsec_spd_policy.h
+++ b/src/vnet/ipsec/ipsec_spd_policy.h
@@ -15,7 +15,30 @@
#ifndef __IPSEC_SPD_POLICY_H__
#define __IPSEC_SPD_POLICY_H__
+#include <vppinfra/bihash_40_8.h>
+#include <vppinfra/bihash_16_8.h>
#include <vnet/ipsec/ipsec_spd.h>
+/**
+ * calculated as max number of flows (2^10) divided by KVP_PER_PAGE (4)
+ */
+#define IPSEC_FP_HASH_LOOKUP_HASH_BUCKETS (1 << 8)
+
+#define IPSEC_POLICY_PROTOCOL_ANY IP_PROTOCOL_RESERVED
+
+/**
+ * This number is calculated as ceil power of 2 for the number
+ * sizeof(clib_bihash_kv_16_8_t)=24 * BIHASH_KVP_PER_PAGE=4 * COLLISIONS_NO=8
+ *
+ */
+
+#define IPSEC_FP_IP4_HASH_MEM_PER_BUCKET 1024
+
+/**
+ * This number is calculated as ceil power of 2 for the number
+ * sizeof(clib_bihash_kv_40_8_t)=48 * BIHASH_KVP_PER_PAGE=4 * COLLISIONS_NO=8
+ *
+ */
+#define IPSEC_FP_IP6_HASH_MEM_PER_BUCKET 2048
#define foreach_ipsec_policy_action \
_ (0, BYPASS, "bypass") \
@@ -71,6 +94,7 @@ typedef struct ipsec_policy_t_
ipsec_policy_action_t policy;
u32 sa_id;
u32 sa_index;
+ u32 fp_mask_type_id;
} ipsec_policy_t;
/**
@@ -91,6 +115,135 @@ extern int ipsec_policy_mk_type (bool is_outbound,
ipsec_policy_action_t action,
ipsec_spd_policy_type_t * type);
+/* A 5-tuple used to calculate the bihash entry */
+typedef union
+{
+ struct
+ {
+ union
+ {
+ struct
+ {
+ u32 l3_zero_pad[6];
+ ip4_address_t laddr;
+ ip4_address_t raddr;
+ };
+ struct
+ {
+ ip6_address_t ip6_laddr;
+ ip6_address_t ip6_raddr;
+ };
+ };
+ union
+ {
+ struct
+ {
+ u16 lport;
+ u16 rport;
+ };
+ u32 spi;
+ };
+ u8 protocol;
+ u8 action;
+ u16 is_ipv6;
+ };
+ /* for ipv6 */
+ clib_bihash_kv_40_8_t kv_40_8;
+ /* for ipv4 */
+ struct
+ {
+ u64 padding_for_kv_16_8[3];
+ clib_bihash_kv_16_8_t kv_16_8;
+ };
+} ipsec_fp_5tuple_t;
+
+/*
+ * An element describing a particular policy mask,
+ * and refcount of policies with same mask.
+ */
+typedef struct
+{
+ /** Required for pool_get_aligned */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ ipsec_fp_5tuple_t mask;
+ u32 refcount; /* counts how many policies use this mask */
+} ipsec_fp_mask_type_entry_t;
+
+/*
+ * Bihash lookup value,
+ * contains an unordered vector of policies indices in policy pool.
+ */
+typedef union
+{
+ u64 as_u64;
+ struct
+ {
+ u32 *fp_policies_ids;
+ };
+} ipsec_fp_lookup_value_t;
+
+/**
+ * @brief add or delete a fast path policy
+ */
+int ipsec_fp_add_del_policy (void *fp_spd, ipsec_policy_t *policy, int is_add,
+ u32 *stat_index);
+
+static_always_inline int
+ipsec_policy_is_equal (ipsec_policy_t *p1, ipsec_policy_t *p2)
+{
+ if (p1->priority != p2->priority)
+ return 0;
+ if (p1->type != p2->type)
+ return (0);
+ if (p1->policy != p2->policy)
+ return (0);
+ if (p1->sa_id != p2->sa_id)
+ return (0);
+ if (p1->protocol != p2->protocol)
+ return (0);
+ if (p1->lport.start != p2->lport.start)
+ return (0);
+ if (p1->lport.stop != p2->lport.stop)
+ return (0);
+ if (p1->rport.start != p2->rport.start)
+ return (0);
+ if (p1->rport.stop != p2->rport.stop)
+ return (0);
+ if (p1->is_ipv6 != p2->is_ipv6)
+ return (0);
+ if (p2->is_ipv6)
+ {
+ if (p1->laddr.start.ip6.as_u64[0] != p2->laddr.start.ip6.as_u64[0])
+ return (0);
+ if (p1->laddr.start.ip6.as_u64[1] != p2->laddr.start.ip6.as_u64[1])
+ return (0);
+ if (p1->laddr.stop.ip6.as_u64[0] != p2->laddr.stop.ip6.as_u64[0])
+ return (0);
+ if (p1->laddr.stop.ip6.as_u64[1] != p2->laddr.stop.ip6.as_u64[1])
+ return (0);
+ if (p1->raddr.start.ip6.as_u64[0] != p2->raddr.start.ip6.as_u64[0])
+ return (0);
+ if (p1->raddr.start.ip6.as_u64[1] != p2->raddr.start.ip6.as_u64[1])
+ return (0);
+ if (p1->raddr.stop.ip6.as_u64[0] != p2->raddr.stop.ip6.as_u64[0])
+ return (0);
+ if (p1->laddr.stop.ip6.as_u64[1] != p2->laddr.stop.ip6.as_u64[1])
+ return (0);
+ }
+ else
+ {
+ if (p1->laddr.start.ip4.as_u32 != p2->laddr.start.ip4.as_u32)
+ return (0);
+ if (p1->laddr.stop.ip4.as_u32 != p2->laddr.stop.ip4.as_u32)
+ return (0);
+ if (p1->raddr.start.ip4.as_u32 != p2->raddr.start.ip4.as_u32)
+ return (0);
+ if (p1->raddr.stop.ip4.as_u32 != p2->raddr.stop.ip4.as_u32)
+ return (0);
+ }
+ return (1);
+}
+
#endif /* __IPSEC_SPD_POLICY_H__ */
/*
diff --git a/src/vnet/ipsec/ipsec_test.c b/src/vnet/ipsec/ipsec_test.c
index f399032eb9a..86d09f18a5c 100644
--- a/src/vnet/ipsec/ipsec_test.c
+++ b/src/vnet/ipsec/ipsec_test.c
@@ -26,6 +26,10 @@
#include <vnet/ipsec/ipsec.api.h>
#undef vl_endianfun
+#define vl_calcsizefun
+#include <vnet/ipsec/ipsec.api.h>
+#undef vl_calcsizefun
+
typedef struct
{
/* API message ID base */
@@ -69,7 +73,103 @@ api_ipsec_spd_entry_add_del (vat_main_t *vam)
unformat_input_t *i = vam->input;
vl_api_ipsec_spd_entry_add_del_t *mp;
u8 is_add = 1, is_outbound = 0;
- u32 spd_id = 0, sa_id = 0, protocol = 0, policy = 0;
+ u32 spd_id = 0, sa_id = 0, protocol = IPSEC_POLICY_PROTOCOL_ANY, policy = 0;
+ i32 priority = 0;
+ u32 rport_start = 0, rport_stop = (u32) ~0;
+ u32 lport_start = 0, lport_stop = (u32) ~0;
+ vl_api_address_t laddr_start = {}, laddr_stop = {}, raddr_start = {},
+ raddr_stop = {};
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "del"))
+ is_add = 0;
+ if (unformat (i, "outbound"))
+ is_outbound = 1;
+ if (unformat (i, "inbound"))
+ is_outbound = 0;
+ else if (unformat (i, "spd_id %d", &spd_id))
+ ;
+ else if (unformat (i, "sa_id %d", &sa_id))
+ ;
+ else if (unformat (i, "priority %d", &priority))
+ ;
+ else if (unformat (i, "protocol %d", &protocol))
+ ;
+ else if (unformat (i, "lport_start %d", &lport_start))
+ ;
+ else if (unformat (i, "lport_stop %d", &lport_stop))
+ ;
+ else if (unformat (i, "rport_start %d", &rport_start))
+ ;
+ else if (unformat (i, "rport_stop %d", &rport_stop))
+ ;
+ else if (unformat (i, "laddr_start %U", unformat_vl_api_address,
+ &laddr_start))
+ ;
+ else if (unformat (i, "laddr_stop %U", unformat_vl_api_address,
+ &laddr_stop))
+ ;
+ else if (unformat (i, "raddr_start %U", unformat_vl_api_address,
+ &raddr_start))
+ ;
+ else if (unformat (i, "raddr_stop %U", unformat_vl_api_address,
+ &raddr_stop))
+ ;
+ else if (unformat (i, "action %U", unformat_ipsec_policy_action,
+ &policy))
+ {
+ if (policy == IPSEC_POLICY_ACTION_RESOLVE)
+ {
+ clib_warning ("unsupported action: 'resolve'");
+ return -99;
+ }
+ }
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M (IPSEC_SPD_ENTRY_ADD_DEL, mp);
+
+ mp->is_add = is_add;
+
+ mp->entry.spd_id = ntohl (spd_id);
+ mp->entry.priority = ntohl (priority);
+ mp->entry.is_outbound = is_outbound;
+
+ clib_memcpy (&mp->entry.remote_address_start, &raddr_start,
+ sizeof (vl_api_address_t));
+ clib_memcpy (&mp->entry.remote_address_stop, &raddr_stop,
+ sizeof (vl_api_address_t));
+ clib_memcpy (&mp->entry.local_address_start, &laddr_start,
+ sizeof (vl_api_address_t));
+ clib_memcpy (&mp->entry.local_address_stop, &laddr_stop,
+ sizeof (vl_api_address_t));
+
+ mp->entry.protocol = protocol ? (u8) protocol : IPSEC_POLICY_PROTOCOL_ANY;
+ mp->entry.local_port_start = ntohs ((u16) lport_start);
+ mp->entry.local_port_stop = ntohs ((u16) lport_stop);
+ mp->entry.remote_port_start = ntohs ((u16) rport_start);
+ mp->entry.remote_port_stop = ntohs ((u16) rport_stop);
+ mp->entry.policy = (u8) policy;
+ mp->entry.sa_id = ntohl (sa_id);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ipsec_spd_entry_add_del_v2 (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ipsec_spd_entry_add_del_t *mp;
+ u8 is_add = 1, is_outbound = 0;
+ u32 spd_id = 0, sa_id = 0, protocol = IPSEC_POLICY_PROTOCOL_ANY, policy = 0;
i32 priority = 0;
u32 rport_start = 0, rport_stop = (u32) ~0;
u32 lport_start = 0, lport_stop = (u32) ~0;
@@ -182,12 +282,30 @@ vl_api_ipsec_sad_entry_add_reply_t_handler (
{
}
+static void
+vl_api_ipsec_sad_entry_add_v2_reply_t_handler (
+ vl_api_ipsec_sad_entry_add_reply_t *mp)
+{
+}
+
static int
api_ipsec_sad_entry_del (vat_main_t *vat)
{
return -1;
}
+static int
+api_ipsec_sad_bind (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ipsec_sad_unbind (vat_main_t *vat)
+{
+ return -1;
+}
+
static void
vl_api_ipsec_sad_entry_add_del_v2_reply_t_handler (
vl_api_ipsec_sad_entry_add_del_v2_reply_t *mp)
@@ -207,6 +325,12 @@ api_ipsec_sad_entry_add_del_v3 (vat_main_t *vat)
}
static int
+api_ipsec_sad_entry_update (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
api_ipsec_tunnel_protect_update (vat_main_t *vat)
{
return -1;
@@ -224,6 +348,18 @@ api_ipsec_sa_v3_dump (vat_main_t *vat)
}
static int
+api_ipsec_sa_v4_dump (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ipsec_sa_v5_dump (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
api_ipsec_tunnel_protect_dump (vat_main_t *vat)
{
return -1;
@@ -247,12 +383,24 @@ api_ipsec_sad_entry_add (vat_main_t *vat)
return -1;
}
+static int
+api_ipsec_sad_entry_add_v2 (vat_main_t *vat)
+{
+ return -1;
+}
+
static void
vl_api_ipsec_spd_entry_add_del_reply_t_handler (
vl_api_ipsec_spd_entry_add_del_reply_t *mp)
{
}
+static void
+vl_api_ipsec_spd_entry_add_del_v2_reply_t_handler (
+ vl_api_ipsec_spd_entry_add_del_v2_reply_t *mp)
+{
+}
+
static int
api_ipsec_spds_dump (vat_main_t *vam)
{
@@ -270,6 +418,16 @@ vl_api_ipsec_sa_v3_details_t_handler (vl_api_ipsec_sa_v3_details_t *mp)
{
}
+static void
+vl_api_ipsec_sa_v4_details_t_handler (vl_api_ipsec_sa_v4_details_t *mp)
+{
+}
+
+static void
+vl_api_ipsec_sa_v5_details_t_handler (vl_api_ipsec_sa_v5_details_t *mp)
+{
+}
+
static int
api_ipsec_spd_interface_dump (vat_main_t *vat)
{
diff --git a/src/vnet/ipsec/ipsec_tun.c b/src/vnet/ipsec/ipsec_tun.c
index 1a9a25783ae..ecda291e985 100644
--- a/src/vnet/ipsec/ipsec_tun.c
+++ b/src/vnet/ipsec/ipsec_tun.c
@@ -22,6 +22,7 @@
#include <vnet/adj/adj_delegate.h>
#include <vnet/adj/adj_midchain.h>
#include <vnet/teib/teib.h>
+#include <vnet/mpls/mpls.h>
/* instantiate the bihash functions */
#include <vppinfra/bihash_8_16.h>
@@ -100,14 +101,12 @@ ipsec_tun_register_nodes (ip_address_family_t af)
if (0 == ipsec_tun_node_regs[af]++)
{
if (AF_IP4 == af)
- {
- ipsec_register_udp_port (UDP_DST_PORT_ipsec);
- ip4_register_protocol (IP_PROTOCOL_IPSEC_ESP,
- ipsec4_tun_input_node.index);
- }
+ ip4_register_protocol (IP_PROTOCOL_IPSEC_ESP,
+ ipsec4_tun_input_node.index);
else
ip6_register_protocol (IP_PROTOCOL_IPSEC_ESP,
ipsec6_tun_input_node.index);
+ ipsec_register_udp_port (UDP_DST_PORT_ipsec, (AF_IP4 == af));
}
}
@@ -118,12 +117,10 @@ ipsec_tun_unregister_nodes (ip_address_family_t af)
if (0 == --ipsec_tun_node_regs[af])
{
if (AF_IP4 == af)
- {
- ipsec_unregister_udp_port (UDP_DST_PORT_ipsec);
- ip4_unregister_protocol (IP_PROTOCOL_IPSEC_ESP);
- }
+ ip4_unregister_protocol (IP_PROTOCOL_IPSEC_ESP);
else
ip6_unregister_protocol (IP_PROTOCOL_IPSEC_ESP);
+ ipsec_unregister_udp_port (UDP_DST_PORT_ipsec, (AF_IP4 == af));
}
}
@@ -137,12 +134,14 @@ ipsec_tun_protect_from_const_base (const adj_delegate_t * ad)
static u32
ipsec_tun_protect_get_adj_next (vnet_link_t linkt,
- const ipsec_tun_protect_t * itp)
+ const ipsec_tun_protect_t *itp)
{
ipsec_main_t *im;
- ipsec_sa_t *sa;
u32 next;
+ im = &ipsec_main;
+ next = 0;
+
if (!(itp->itp_flags & IPSEC_PROTECT_ITF))
{
if (ip46_address_is_ip4 (&itp->itp_tun.src))
@@ -151,42 +150,42 @@ ipsec_tun_protect_get_adj_next (vnet_link_t linkt,
linkt = VNET_LINK_IP6;
}
- sa = ipsec_sa_get (itp->itp_out_sa);
- im = &ipsec_main;
- next = 0;
-
- if ((sa->crypto_alg == IPSEC_CRYPTO_ALG_NONE &&
- sa->integ_alg == IPSEC_INTEG_ALG_NONE) &&
- !(itp->itp_flags & IPSEC_PROTECT_ITF))
- next = (VNET_LINK_IP4 == linkt ? im->esp4_no_crypto_tun_node_index :
- im->esp6_no_crypto_tun_node_index);
- else if (itp->itp_flags & IPSEC_PROTECT_L2)
- next = (VNET_LINK_IP4 == linkt ? im->esp4_encrypt_l2_tun_node_index :
- im->esp6_encrypt_l2_tun_node_index);
- else
+ switch (linkt)
{
- switch (linkt)
- {
- case VNET_LINK_IP4:
- next = im->esp4_encrypt_tun_node_index;
- break;
- case VNET_LINK_IP6:
- next = im->esp6_encrypt_tun_node_index;
- break;
- case VNET_LINK_MPLS:
- next = im->esp_mpls_encrypt_tun_node_index;
- break;
- case VNET_LINK_ARP:
- case VNET_LINK_NSH:
- case VNET_LINK_ETHERNET:
- ASSERT (0);
- break;
- }
+ case VNET_LINK_IP4:
+ next = im->esp4_encrypt_tun_node_index;
+ break;
+ case VNET_LINK_IP6:
+ next = im->esp6_encrypt_tun_node_index;
+ break;
+ case VNET_LINK_MPLS:
+ next = im->esp_mpls_encrypt_tun_node_index;
+ break;
+ case VNET_LINK_ARP:
+ case VNET_LINK_NSH:
+ case VNET_LINK_ETHERNET:
+ ASSERT (0);
+ break;
}
+
return (next);
}
static void
+ipsec_tun_setup_tx_nodes (u32 sw_if_index, const ipsec_tun_protect_t *itp)
+{
+ vnet_feature_modify_end_node (
+ ip4_main.lookup_main.output_feature_arc_index, sw_if_index,
+ ipsec_tun_protect_get_adj_next (VNET_LINK_IP4, itp));
+ vnet_feature_modify_end_node (
+ ip6_main.lookup_main.output_feature_arc_index, sw_if_index,
+ ipsec_tun_protect_get_adj_next (VNET_LINK_IP6, itp));
+ vnet_feature_modify_end_node (
+ mpls_main.output_feature_arc_index, sw_if_index,
+ ipsec_tun_protect_get_adj_next (VNET_LINK_MPLS, itp));
+}
+
+static void
ipsec_tun_protect_add_adj (adj_index_t ai, const ipsec_tun_protect_t * itp)
{
vec_validate_init_empty (ipsec_tun_protect_sa_by_adj_index, ai,
@@ -200,8 +199,8 @@ ipsec_tun_protect_add_adj (adj_index_t ai, const ipsec_tun_protect_t * itp)
else
{
ipsec_tun_protect_sa_by_adj_index[ai] = itp->itp_out_sa;
- adj_nbr_midchain_update_next_node
- (ai, ipsec_tun_protect_get_adj_next (adj_get_link_type (ai), itp));
+ adj_nbr_midchain_update_next_node (
+ ai, ipsec_tun_protect_get_adj_next (adj_get_link_type (ai), itp));
}
}
@@ -237,7 +236,6 @@ ipsec_tun_protect_rx_db_add (ipsec_main_t * im,
if (ip46_address_is_zero (&itp->itp_crypto.dst))
return;
- /* *INDENT-OFF* */
FOR_EACH_IPSEC_PROTECT_INPUT_SAI(itp, sai,
({
sa = ipsec_sa_get (sai);
@@ -292,7 +290,6 @@ ipsec_tun_protect_rx_db_add (ipsec_main_t * im,
ipsec_tun_register_nodes (AF_IP6);
}
}))
- /* *INDENT-ON* */
}
static adj_walk_rc_t
@@ -329,7 +326,7 @@ ipsec_tun_protect_tx_db_add (ipsec_tun_protect_t * itp)
{
if (INDEX_INVALID == idi->id_itp)
{
- // ipsec_tun_protect_feature_set (itp, 1);
+ ipsec_tun_setup_tx_nodes (itp->itp_sw_if_index, itp);
}
idi->id_itp = itp - ipsec_tun_protect_pool;
@@ -347,7 +344,7 @@ ipsec_tun_protect_tx_db_add (ipsec_tun_protect_t * itp)
* enable the encrypt feature for egress if this is the first addition
* on this interface
*/
- // ipsec_tun_protect_feature_set (itp, 1);
+ ipsec_tun_setup_tx_nodes (itp->itp_sw_if_index, itp);
}
hash_set_mem (idi->id_hash, itp->itp_key, itp - ipsec_tun_protect_pool);
@@ -372,7 +369,6 @@ ipsec_tun_protect_rx_db_remove (ipsec_main_t * im,
{
const ipsec_sa_t *sa;
- /* *INDENT-OFF* */
FOR_EACH_IPSEC_PROTECT_INPUT_SA(itp, sa,
({
if (ip46_address_is_ip4 (&itp->itp_crypto.dst))
@@ -406,7 +402,6 @@ ipsec_tun_protect_rx_db_remove (ipsec_main_t * im,
}
}
}));
- /* *INDENT-ON* */
}
static adj_walk_rc_t
@@ -435,7 +430,7 @@ ipsec_tun_protect_tx_db_remove (ipsec_tun_protect_t * itp)
if (vnet_sw_interface_is_p2p (vnet_get_main (), itp->itp_sw_if_index))
{
- // ipsec_tun_protect_feature_set (itp, 0);
+ ipsec_itf_reset_tx_nodes (itp->itp_sw_if_index);
idi->id_itp = INDEX_INVALID;
FOR_EACH_FIB_IP_PROTOCOL (nh_proto)
@@ -451,7 +446,7 @@ ipsec_tun_protect_tx_db_remove (ipsec_tun_protect_t * itp)
if (0 == hash_elts (idi->id_hash))
{
- // ipsec_tun_protect_feature_set (itp, 0);
+ ipsec_itf_reset_tx_nodes (itp->itp_sw_if_index);
hash_free (idi->id_hash);
idi->id_hash = NULL;
}
@@ -465,7 +460,6 @@ ipsec_tun_protect_set_crypto_addr (ipsec_tun_protect_t * itp)
{
ipsec_sa_t *sa;
- /* *INDENT-OFF* */
FOR_EACH_IPSEC_PROTECT_INPUT_SA(itp, sa,
({
if (ipsec_sa_is_set_IS_TUNNEL (sa))
@@ -485,7 +479,6 @@ ipsec_tun_protect_set_crypto_addr (ipsec_tun_protect_t * itp)
itp->itp_flags &= ~IPSEC_PROTECT_ENCAPED;
}
}));
- /* *INDENT-ON* */
}
static void
@@ -502,13 +495,14 @@ ipsec_tun_protect_config (ipsec_main_t * im,
ipsec_sa_lock (itp->itp_out_sa);
- /* *INDENT-OFF* */
+ if (itp->itp_flags & IPSEC_PROTECT_ITF)
+ ipsec_sa_set_NO_ALGO_NO_DROP (ipsec_sa_get (itp->itp_out_sa));
+
FOR_EACH_IPSEC_PROTECT_INPUT_SAI(itp, sai,
({
ipsec_sa_lock(sai);
}));
ipsec_tun_protect_set_crypto_addr(itp);
- /* *INDENT-ON* */
/*
* add to the DB against each SA
@@ -525,7 +519,6 @@ ipsec_tun_protect_unconfig (ipsec_main_t * im, ipsec_tun_protect_t * itp)
ipsec_sa_t *sa;
index_t sai;
- /* *INDENT-OFF* */
FOR_EACH_IPSEC_PROTECT_INPUT_SA(itp, sa,
({
ipsec_sa_unset_IS_PROTECT (sa);
@@ -534,13 +527,13 @@ ipsec_tun_protect_unconfig (ipsec_main_t * im, ipsec_tun_protect_t * itp)
ipsec_tun_protect_rx_db_remove (im, itp);
ipsec_tun_protect_tx_db_remove (itp);
+ ipsec_sa_unset_NO_ALGO_NO_DROP (ipsec_sa_get (itp->itp_out_sa));
ipsec_sa_unlock(itp->itp_out_sa);
FOR_EACH_IPSEC_PROTECT_INPUT_SAI(itp, sai,
({
ipsec_sa_unlock(sai);
}));
- /* *INDENT-ON* */
ITP_DBG (itp, "unconfigured");
}
@@ -748,12 +741,10 @@ ipsec_tun_protect_walk (ipsec_tun_protect_walk_cb_t fn, void *ctx)
{
index_t itpi;
- /* *INDENT-OFF* */
pool_foreach_index (itpi, ipsec_tun_protect_pool)
{
fn (itpi, ctx);
}
- /* *INDENT-ON* */
}
void
@@ -769,12 +760,10 @@ ipsec_tun_protect_walk_itf (u32 sw_if_index,
idi = &itp_db.id_itf[sw_if_index];
- /* *INDENT-OFF* */
hash_foreach(key, itpi, idi->id_hash,
({
fn (itpi, ctx);
}));
- /* *INDENT-ON* */
if (INDEX_INVALID != idi->id_itp)
fn (idi->id_itp, ctx);
}
@@ -802,19 +791,27 @@ ipsec_tun_feature_update (u32 sw_if_index, u8 arc_index, u8 is_enable,
ipsec_main.esp4_decrypt_tun_node_index :
ipsec_main.esp6_decrypt_tun_node_index;
- vnet_feature_modify_end_node (
- feature_main.device_input_feature_arc_index, sw_if_index, decrypt_tun);
- itp->itp_flags |= IPSEC_PROTECT_FEAT;
+ if (!(itp->itp_flags & IPSEC_PROTECT_FEAT))
+ {
+ itp->itp_flags |= IPSEC_PROTECT_FEAT;
+ vnet_feature_modify_end_node (
+ feature_main.device_input_feature_arc_index, sw_if_index,
+ decrypt_tun);
+ }
}
else
{
- u32 eth_in =
- vlib_get_node_by_name (vlib_get_main (), (u8 *) "ethernet-input")
- ->index;
+ if (itp->itp_flags & IPSEC_PROTECT_FEAT)
+ {
+ itp->itp_flags &= ~IPSEC_PROTECT_FEAT;
+
+ u32 eth_in =
+ vlib_get_node_by_name (vlib_get_main (), (u8 *) "ethernet-input")
+ ->index;
- vnet_feature_modify_end_node (
- feature_main.device_input_feature_arc_index, sw_if_index, eth_in);
- itp->itp_flags &= ~IPSEC_PROTECT_FEAT;
+ vnet_feature_modify_end_node (
+ feature_main.device_input_feature_arc_index, sw_if_index, eth_in);
+ }
}
/* Propagate flag change into lookup entries */
@@ -848,6 +845,9 @@ ipsec_tun_protect_adj_delegate_adj_created (adj_index_t ai)
if (!adj_is_midchain (ai))
return;
+ vec_validate_init_empty (ipsec_tun_protect_sa_by_adj_index, ai,
+ INDEX_INVALID);
+
adj = adj_get (ai);
ip_address_from_46 (&adj->sub_type.midchain.next_hop,
@@ -956,16 +956,6 @@ ipsec_tunnel_protect_init (vlib_main_t *vm)
IPSEC_TUN_DEFAULT_HASH_NUM_BUCKETS,
IPSEC_TUN_DEFAULT_HASH_MEMORY_SIZE);
- /* set up feature nodes to drop outbound packets with no crypto alg set */
- im->esp4_no_crypto_tun_node_index =
- vlib_get_node_by_name (vm, (u8 *) "esp4-no-crypto")->index;
- im->esp6_no_crypto_tun_node_index =
- vlib_get_node_by_name (vm, (u8 *) "esp6-no-crypto")->index;
- im->esp6_encrypt_l2_tun_node_index =
- vlib_get_node_by_name (vm, (u8 *) "esp6-encrypt-tun")->index;
- im->esp4_encrypt_l2_tun_node_index =
- vlib_get_node_by_name (vm, (u8 *) "esp4-encrypt-tun")->index;
-
ipsec_tun_adj_delegate_type =
adj_delegate_register_new_type (&ipsec_tun_adj_delegate_vft);
diff --git a/src/vnet/ipsec/ipsec_tun.h b/src/vnet/ipsec/ipsec_tun.h
index f452fa4354c..9d8a124443d 100644
--- a/src/vnet/ipsec/ipsec_tun.h
+++ b/src/vnet/ipsec/ipsec_tun.h
@@ -182,7 +182,6 @@ always_inline index_t
ipsec_tun_protect_get_sa_out (adj_index_t ai)
{
ASSERT (vec_len (ipsec_tun_protect_sa_by_adj_index) > ai);
- ASSERT (INDEX_INVALID != ipsec_tun_protect_sa_by_adj_index[ai]);
return (ipsec_tun_protect_sa_by_adj_index[ai]);
}
diff --git a/src/vnet/ipsec/ipsec_tun_in.c b/src/vnet/ipsec/ipsec_tun_in.c
index 4f8af006d2b..c82de3ebaff 100644
--- a/src/vnet/ipsec/ipsec_tun_in.c
+++ b/src/vnet/ipsec/ipsec_tun_in.c
@@ -24,31 +24,10 @@
#include <vnet/ipsec/ipsec_io.h>
#include <vnet/ipsec/ipsec_punt.h>
#include <vnet/ipsec/ipsec_tun.h>
+#include <vnet/ipsec/ipsec.api_enum.h>
#include <vnet/ip/ip4_input.h>
-/* Statistics (not really errors) */
-#define foreach_ipsec_tun_protect_input_error \
- _(RX, "good packets received") \
- _(DISABLED, "ipsec packets received on disabled interface") \
- _(NO_TUNNEL, "no matching tunnel") \
- _(TUNNEL_MISMATCH, "SPI-tunnel mismatch") \
- _(NAT_KEEPALIVE, "NAT Keepalive") \
- _(TOO_SHORT, "Too Short") \
- _(SPI_0, "SPI 0")
-
-static char *ipsec_tun_protect_input_error_strings[] = {
-#define _(sym,string) string,
- foreach_ipsec_tun_protect_input_error
-#undef _
-};
-
-typedef enum
-{
-#define _(sym,str) IPSEC_TUN_PROTECT_INPUT_ERROR_##sym,
- foreach_ipsec_tun_protect_input_error
-#undef _
- IPSEC_TUN_PROTECT_INPUT_N_ERROR,
-} ipsec_tun_protect_input_error_t;
+typedef vl_counter_ipsec_tun_enum_t ipsec_tun_protect_input_error_t;
typedef enum ipsec_tun_next_t_
{
@@ -93,25 +72,35 @@ ipsec_ip4_if_no_tunnel (vlib_node_runtime_t * node,
{
if (PREDICT_FALSE (0 == esp->spi))
{
- b->error = node->errors[IPSEC_TUN_PROTECT_INPUT_ERROR_SPI_0];
+ b->error = node->errors[IPSEC_TUN_ERROR_SPI_0];
b->punt_reason = ipsec_punt_reason[(ip4->protocol == IP_PROTOCOL_UDP ?
IPSEC_PUNT_IP4_SPI_UDP_0 :
IPSEC_PUNT_IP4_NO_SUCH_TUNNEL)];
}
else
{
- b->error = node->errors[IPSEC_TUN_PROTECT_INPUT_ERROR_NO_TUNNEL];
+ b->error = node->errors[IPSEC_TUN_ERROR_NO_TUNNEL];
b->punt_reason = ipsec_punt_reason[IPSEC_PUNT_IP4_NO_SUCH_TUNNEL];
}
return VNET_DEVICE_INPUT_NEXT_PUNT;
}
always_inline u16
-ipsec_ip6_if_no_tunnel (vlib_node_runtime_t * node,
- vlib_buffer_t * b, const esp_header_t * esp)
+ipsec_ip6_if_no_tunnel (vlib_node_runtime_t *node, vlib_buffer_t *b,
+ const esp_header_t *esp, const ip6_header_t *ip6)
{
- b->error = node->errors[IPSEC_TUN_PROTECT_INPUT_ERROR_NO_TUNNEL];
- b->punt_reason = ipsec_punt_reason[IPSEC_PUNT_IP6_NO_SUCH_TUNNEL];
+ if (PREDICT_FALSE (0 == esp->spi))
+ {
+ b->error = node->errors[IPSEC_TUN_ERROR_SPI_0];
+ b->punt_reason = ipsec_punt_reason[(ip6->protocol == IP_PROTOCOL_UDP ?
+ IPSEC_PUNT_IP6_SPI_UDP_0 :
+ IPSEC_PUNT_IP6_NO_SUCH_TUNNEL)];
+ }
+ else
+ {
+ b->error = node->errors[IPSEC_TUN_ERROR_NO_TUNNEL];
+ b->punt_reason = ipsec_punt_reason[IPSEC_PUNT_IP6_NO_SUCH_TUNNEL];
+ }
return VNET_DEVICE_INPUT_NEXT_PUNT;
}
@@ -167,8 +156,8 @@ ipsec_tun_protect_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
while (n_left_from > 0)
{
u32 sw_if_index0, len0, hdr_sz0;
- clib_bihash_kv_24_16_t bkey60;
- clib_bihash_kv_8_16_t bkey40;
+ clib_bihash_kv_24_16_t bkey60 = { 0 };
+ clib_bihash_kv_8_16_t bkey40 = { 0 };
ipsec4_tunnel_kv_t *key40;
ipsec6_tunnel_kv_t *key60;
ip4_header_t *ip40;
@@ -185,19 +174,62 @@ ipsec_tun_protect_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if (is_ip6)
{
ip60 = (ip6_header_t *) ip40;
- esp0 = (esp_header_t *) (ip60 + 1);
- hdr_sz0 = sizeof (ip6_header_t);
+ if (ip60->protocol == IP_PROTOCOL_UDP)
+ {
+ /* NAT UDP port 4500 case, don't advance any more */
+ esp0 = (esp_header_t *) ((u8 *) ip60 + sizeof (ip6_header_t) +
+ sizeof (udp_header_t));
+ hdr_sz0 = 0;
+ buf_rewind0 = sizeof (ip6_header_t) + sizeof (udp_header_t);
+
+ const udp_header_t *udp0 =
+ (udp_header_t *) ((u8 *) ip60 + sizeof (ip6_header_t));
+
+ /* length 9 = sizeof(udp_header) + 1 byte of special SPI */
+ if (clib_net_to_host_u16 (udp0->length) == 9 &&
+ esp0->spi_bytes[0] == 0xff)
+ {
+ b[0]->error = node->errors[IPSEC_TUN_ERROR_NAT_KEEPALIVE];
+
+ next[0] = VNET_DEVICE_INPUT_NEXT_IP6_DROP;
+ len0 = 0;
+
+ vlib_buffer_advance (b[0], -buf_rewind0);
+ goto trace00;
+ }
+ }
+ else
+ {
+ esp0 = (esp_header_t *) (ip60 + 1);
+ buf_rewind0 = hdr_sz0 = sizeof (ip6_header_t);
+ }
}
else
{
- /* NAT UDP port 4500 case, don't advance any more */
if (ip40->protocol == IP_PROTOCOL_UDP)
{
+ /* NAT UDP port 4500 case, don't advance any more */
esp0 =
(esp_header_t *) ((u8 *) ip40 + ip4_header_bytes (ip40) +
sizeof (udp_header_t));
hdr_sz0 = 0;
buf_rewind0 = ip4_header_bytes (ip40) + sizeof (udp_header_t);
+
+ const udp_header_t *udp0 =
+ (udp_header_t *) ((u8 *) ip40 + ip4_header_bytes (ip40));
+
+ /* length 9 = sizeof(udp_header) + 1 byte of special SPI */
+ if (clib_net_to_host_u16 (udp0->length) == 9 &&
+ esp0->spi_bytes[0] == 0xff)
+ {
+ b[0]->error = node->errors[IPSEC_TUN_ERROR_NAT_KEEPALIVE];
+
+ next[0] = VNET_DEVICE_INPUT_NEXT_IP4_DROP;
+ len0 = 0;
+
+ vlib_buffer_advance (b[0], -buf_rewind0);
+ goto trace00;
+ }
}
else
{
@@ -213,15 +245,11 @@ ipsec_tun_protect_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if (len0 < sizeof (esp_header_t))
{
- if (esp0->spi_bytes[0] == 0xff)
- b[0]->error =
- node->errors[IPSEC_TUN_PROTECT_INPUT_ERROR_NAT_KEEPALIVE];
- else
- b[0]->error =
- node->errors[IPSEC_TUN_PROTECT_INPUT_ERROR_TOO_SHORT];
+ b[0]->error = node->errors[IPSEC_TUN_ERROR_TOO_SHORT];
next[0] = is_ip6 ? VNET_DEVICE_INPUT_NEXT_IP6_DROP :
VNET_DEVICE_INPUT_NEXT_IP4_DROP;
+ vlib_buffer_advance (b[0], -buf_rewind0);
goto trace00;
}
@@ -249,7 +277,8 @@ ipsec_tun_protect_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
else
{
- next[0] = ipsec_ip6_if_no_tunnel (node, b[0], esp0);
+ next[0] = ipsec_ip6_if_no_tunnel (node, b[0], esp0, ip60);
+ vlib_buffer_advance (b[0], -buf_rewind0);
n_no_tunnel++;
goto trace00;
}
@@ -296,7 +325,7 @@ ipsec_tun_protect_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_increment_combined_counter
(drop_counter, thread_index, sw_if_index0, 1, len0);
n_disabled++;
- b[0]->error = node->errors[IPSEC_TUN_PROTECT_INPUT_ERROR_DISABLED];
+ b[0]->error = node->errors[IPSEC_TUN_ERROR_DISABLED];
next[0] = is_ip6 ? VNET_DEVICE_INPUT_NEXT_IP6_DROP :
VNET_DEVICE_INPUT_NEXT_IP4_DROP;
goto trace00;
@@ -364,12 +393,10 @@ ipsec_tun_protect_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
thread_index,
last_sw_if_index, n_packets, n_bytes);
- vlib_node_increment_counter (vm, node->node_index,
- IPSEC_TUN_PROTECT_INPUT_ERROR_RX,
- from_frame->n_vectors - (n_disabled +
- n_no_tunnel));
- vlib_node_increment_counter (vm, node->node_index,
- IPSEC_TUN_PROTECT_INPUT_ERROR_NO_TUNNEL,
+ vlib_node_increment_counter (vm, node->node_index, IPSEC_TUN_ERROR_RX,
+ from_frame->n_vectors -
+ (n_disabled + n_no_tunnel));
+ vlib_node_increment_counter (vm, node->node_index, IPSEC_TUN_ERROR_NO_TUNNEL,
n_no_tunnel);
vlib_buffer_enqueue_to_next (vm, node, from, nexts, from_frame->n_vectors);
@@ -384,17 +411,15 @@ VLIB_NODE_FN (ipsec4_tun_input_node) (vlib_main_t * vm,
return ipsec_tun_protect_input_inline (vm, node, from_frame, 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ipsec4_tun_input_node) = {
.name = "ipsec4-tun-input",
.vector_size = sizeof (u32),
.format_trace = format_ipsec_tun_protect_input_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN (ipsec_tun_protect_input_error_strings),
- .error_strings = ipsec_tun_protect_input_error_strings,
+ .n_errors = IPSEC_TUN_N_ERROR,
+ .error_counters = ipsec_tun_error_counters,
.sibling_of = "device-input",
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ipsec6_tun_input_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -403,17 +428,15 @@ VLIB_NODE_FN (ipsec6_tun_input_node) (vlib_main_t * vm,
return ipsec_tun_protect_input_inline (vm, node, from_frame, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ipsec6_tun_input_node) = {
.name = "ipsec6-tun-input",
.vector_size = sizeof (u32),
.format_trace = format_ipsec_tun_protect_input_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN (ipsec_tun_protect_input_error_strings),
- .error_strings = ipsec_tun_protect_input_error_strings,
+ .n_errors = IPSEC_TUN_N_ERROR,
+ .error_counters = ipsec_tun_error_counters,
.sibling_of = "device-input",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ipsec/ipsec_types.api b/src/vnet/ipsec/ipsec_types.api
index ed04f470fd2..37c1141ab46 100644
--- a/src/vnet/ipsec/ipsec_types.api
+++ b/src/vnet/ipsec/ipsec_types.api
@@ -36,6 +36,10 @@ enum ipsec_crypto_alg
IPSEC_API_CRYPTO_ALG_AES_GCM_256,
IPSEC_API_CRYPTO_ALG_DES_CBC,
IPSEC_API_CRYPTO_ALG_3DES_CBC,
+ IPSEC_API_CRYPTO_ALG_CHACHA20_POLY1305 [backwards_compatible],
+ IPSEC_API_CRYPTO_ALG_AES_NULL_GMAC_128 [backwards_compatible],
+ IPSEC_API_CRYPTO_ALG_AES_NULL_GMAC_192 [backwards_compatible],
+ IPSEC_API_CRYPTO_ALG_AES_NULL_GMAC_256 [backwards_compatible],
};
/*
@@ -95,10 +99,103 @@ typedef key
u8 data[128];
};
+enum ipsec_spd_action
+{
+ /* bypass - no IPsec processing */
+ IPSEC_API_SPD_ACTION_BYPASS = 0,
+ /* discard - discard packet with ICMP processing */
+ IPSEC_API_SPD_ACTION_DISCARD,
+ /* resolve - send request to control plane for SA resolving */
+ IPSEC_API_SPD_ACTION_RESOLVE,
+ /* protect - apply IPsec policy using following parameters */
+ IPSEC_API_SPD_ACTION_PROTECT,
+};
+
+/** \brief IPsec: Security Policy Database entry
+
+ See RFC 4301, 4.4.1.1 on how to match packet to selectors
+
+ @param spd_id - SPD instance id (control plane allocated)
+ @param priority - priority of SPD entry (non-unique value). Used to order SPD matching - higher priorities match before lower
+ @param is_outbound - entry applies to outbound traffic if non-zero, otherwise applies to inbound traffic
+ @param remote_address_start - start of remote address range to match
+ @param remote_address_stop - end of remote address range to match
+ @param local_address_start - start of local address range to match
+ @param local_address_stop - end of local address range to match
+ @param protocol - protocol type to match [0 means any] otherwise IANA value
+ @param remote_port_start - start of remote port range to match ...
+ @param remote_port_stop - end of remote port range to match [0 to 65535 means ANY, 65535 to 0 means OPAQUE]
+ @param local_port_start - start of local port range to match ...
+ @param local_port_stop - end of remote port range to match [0 to 65535 means ANY, 65535 to 0 means OPAQUE]
+ @param policy - action to perform on match
+ @param sa_id - SAD instance id (control plane allocated)
+*/
+typedef ipsec_spd_entry
+{
+ u32 spd_id;
+ i32 priority;
+ bool is_outbound;
+
+ u32 sa_id;
+ vl_api_ipsec_spd_action_t policy;
+ /* Which protocol?? */
+ u8 protocol;
+
+ // Selector
+ vl_api_address_t remote_address_start;
+ vl_api_address_t remote_address_stop;
+ vl_api_address_t local_address_start;
+ vl_api_address_t local_address_stop;
+
+ u16 remote_port_start;
+ u16 remote_port_stop;
+ u16 local_port_start;
+ u16 local_port_stop;
+};
+
+/** \brief IPsec: Security Policy Database entry v2
+
+ See RFC 4301, 4.4.1.1 on how to match packet to selectors
+
+ @param spd_id - SPD instance id (control plane allocated)
+ @param priority - priority of SPD entry (non-unique value). Used to order SPD matching - higher priorities match before lower
+ @param is_outbound - entry applies to outbound traffic if non-zero, otherwise applies to inbound traffic
+ @param remote_address_start - start of remote address range to match
+ @param remote_address_stop - end of remote address range to match
+ @param local_address_start - start of local address range to match
+ @param local_address_stop - end of local address range to match
+ @param protocol - protocol type to match [255 means any] otherwise IANA value
+ @param remote_port_start - start of remote port range to match ...
+ @param remote_port_stop - end of remote port range to match [0 to 65535 means ANY, 65535 to 0 means OPAQUE]
+ @param local_port_start - start of local port range to match ...
+ @param local_port_stop - end of remote port range to match [0 to 65535 means ANY, 65535 to 0 means OPAQUE]
+ @param policy - action to perform on match
+ @param sa_id - SAD instance id (control plane allocated)
+*/
+typedef ipsec_spd_entry_v2
+{
+ u32 spd_id;
+ i32 priority;
+ bool is_outbound;
+
+ u32 sa_id;
+ vl_api_ipsec_spd_action_t policy;
+ u8 protocol;
+
+ // Selector
+ vl_api_address_t remote_address_start;
+ vl_api_address_t remote_address_stop;
+ vl_api_address_t local_address_start;
+ vl_api_address_t local_address_stop;
+
+ u16 remote_port_start;
+ u16 remote_port_stop;
+ u16 local_port_start;
+ u16 local_port_stop;
+};
+
+
/** \brief IPsec: Security Association Database entry
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param is_add - add SAD entry if non-zero, else delete
@param sad_id - sad id
@param spi - security parameter index
@param protocol - 0 = AH, 1 = ESP
@@ -106,6 +203,7 @@ typedef key
@param crypto_key - crypto keying material
@param integrity_algorithm - one of the supported algorithms
@param integrity_key - integrity keying material
+ @param flags - SA flags (see ipsec_sad_flags above)
@param tunnel_src_address - IPsec tunnel source address IPv6 if is_tunnel_ipv6 is non-zero, else IPv4. Only valid if is_tunnel is non-zero
@param tunnel_dst_address - IPsec tunnel destination address IPv6 if is_tunnel_ipv6 is non-zero, else IPv4. Only valid if is_tunnel is non-zero
@param tx_table_id - the FIB id used for encapsulated packets
@@ -117,6 +215,7 @@ typedef key
@param tunnel_flags - Flags controlling the copying of encap/decap value
@param dscp - Fixed DSCP vaule for tunnel encap
*/
+
typedef ipsec_sad_entry
{
u32 sad_id;
@@ -189,6 +288,46 @@ typedef ipsec_sad_entry_v3
u16 udp_dst_port [default=4500];
};
+/** \brief IPsec: Security Association Database entry
+ @param sad_id - sad id
+ @param spi - security parameter index
+ @param protocol - 0 = AH, 1 = ESP
+ @param crypto_algorithm - a supported crypto algorithm
+ @param crypto_key - crypto keying material
+ @param integrity_algorithm - one of the supported algorithms
+ @param integrity_key - integrity keying material
+ @param flags - SA flags (see ipsec_sad_flags above)
+ @param tunnel - tunnel description (see vnet/tunnel/tunnel_types.api)
+ @param salt - for use with counter mode ciphers
+ @param udp_src_port - If using UDP Encapsulation, use this source port for
+ TX. It is ignored for RX.
+ @param udp_dst_port - If using UDP Encapsulation, use this destination port
+ for TX. Expect traffic on this port for RX.
+ @param anti_replay_window_size - AR window size to use. The supplied value is round up to the nearest power of 2.
+ */
+typedef ipsec_sad_entry_v4
+{
+ u32 sad_id;
+ u32 spi;
+
+ vl_api_ipsec_proto_t protocol;
+
+ vl_api_ipsec_crypto_alg_t crypto_algorithm;
+ vl_api_key_t crypto_key;
+
+ vl_api_ipsec_integ_alg_t integrity_algorithm;
+ vl_api_key_t integrity_key;
+
+ vl_api_ipsec_sad_flags_t flags;
+
+ vl_api_tunnel_t tunnel;
+
+ u32 salt;
+ u16 udp_src_port [default=4500];
+ u16 udp_dst_port [default=4500];
+
+ u32 anti_replay_window_size [default=64];
+};
/*
* Local Variables:
diff --git a/src/vnet/l2/feat_bitmap.c b/src/vnet/l2/feat_bitmap.c
index 349ec67462b..507fe365f07 100644
--- a/src/vnet/l2/feat_bitmap.c
+++ b/src/vnet/l2/feat_bitmap.c
@@ -155,7 +155,6 @@ feat_bitmap_drop_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (feat_bitmap_drop_init);
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (feat_bitmap_drop_node,static) = {
.function = feat_bitmap_drop_node_fn,
.name = "feature-bitmap-drop",
@@ -173,7 +172,6 @@ VLIB_REGISTER_NODE (feat_bitmap_drop_node,static) = {
[FEAT_BITMAP_DROP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2.api b/src/vnet/l2/l2.api
index b0ac23f705a..ccba9aa3df1 100644
--- a/src/vnet/l2/l2.api
+++ b/src/vnet/l2/l2.api
@@ -1,6 +1,7 @@
/* Hey Emacs use -*- mode: C -*- */
/*
* Copyright (c) 2016 Cisco and/or its affiliates.
+ * Copyright (c) 2022 Nordix Foundation.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -14,7 +15,7 @@
* limitations under the License.
*/
-option version = "3.1.0";
+option version = "3.2.0";
import "vnet/ip/ip_types.api";
import "vnet/ethernet/ethernet_types.api";
@@ -304,7 +305,7 @@ autoreply define bridge_domain_set_learn_limit
u32 learn_limit;
};
-/** \brief L2 bridge domain add or delete request
+/** \brief L2 bridge domain add or delete request - will be deprecated
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@param bd_id - the bridge domain to create
@@ -319,6 +320,7 @@ autoreply define bridge_domain_set_learn_limit
*/
autoreply define bridge_domain_add_del
{
+ option deprecated;
u32 client_index;
u32 context;
u32 bd_id;
@@ -333,6 +335,49 @@ autoreply define bridge_domain_add_del
bool is_add [default=true];
};
+/** \brief L2 bridge domain add delete request version 2
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bd_id - if the id == ~0 creates a bridge domain with an unused id
+ if the id != ~0 the id of the bridge domain to create/delete
+ @param flood - enable/disable bcast/mcast flooding in the bd
+ @param uu_flood - enable/disable unknown unicast flood in the bd
+ @param forward - enable/disable forwarding on all interfaces in the bd
+ @param learn - enable/disable learning on all interfaces in the bd
+ @param arp_term - enable/disable arp termination in the bd
+ @param arp_ufwd - enable/disable arp unicast forwarding in the bd
+ @param mac_age - mac aging time in min, 0 for disabled
+ @param is_add - add or delete flag
+*/
+define bridge_domain_add_del_v2
+{
+ u32 client_index;
+ u32 context;
+ u32 bd_id;
+ bool flood;
+ bool uu_flood;
+ bool forward;
+ bool learn;
+ bool arp_term;
+ bool arp_ufwd;
+ u8 mac_age;
+ string bd_tag[64];
+ bool is_add [default=true];
+};
+
+/** \brief L2 bridge domain add delete version 2 response
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the set bridge flags request
+ @param resulting_id - the id for the new bridge domain
+*/
+define bridge_domain_add_del_v2_reply
+{
+ u32 context;
+ i32 retval;
+ u32 bd_id;
+};
+
+
/** \brief L2 bridge domain request operational state details
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c
index 5a0432de43d..035542d298d 100644
--- a/src/vnet/l2/l2_api.c
+++ b/src/vnet/l2/l2_api.c
@@ -3,6 +3,7 @@
* l2_api.c - layer 2 forwarding api
*
* Copyright (c) 2016 Cisco and/or its affiliates.
+ * Copyright (c) 2022 Nordix Foundation.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -67,7 +68,6 @@ vl_api_l2_xconnect_dump_t_handler (vl_api_l2_xconnect_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
vec_foreach_index (sw_if_index, l2im->configs)
{
config = vec_elt_at_index (l2im->configs, sw_if_index);
@@ -75,7 +75,6 @@ vl_api_l2_xconnect_dump_t_handler (vl_api_l2_xconnect_dump_t * mp)
send_l2_xconnect_details (reg, mp->context, sw_if_index,
config->output_sw_if_index);
}
- /* *INDENT-ON* */
}
static void
@@ -413,12 +412,10 @@ vl_api_l2_flags_t_handler (vl_api_l2_flags_t * mp)
BAD_SW_IF_INDEX_LABEL;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_L2_FLAGS_REPLY,
({
rmp->resulting_feature_bitmap = ntohl(rbm);
}));
- /* *INDENT-ON* */
}
static void
@@ -511,6 +508,37 @@ vl_api_bridge_domain_add_del_t_handler (vl_api_bridge_domain_add_del_t * mp)
}
static void
+vl_api_bridge_domain_add_del_v2_t_handler (
+ vl_api_bridge_domain_add_del_v2_t *mp)
+{
+ vl_api_bridge_domain_add_del_v2_reply_t *rmp;
+ u32 bd_id = ntohl (mp->bd_id);
+ int rv = 0;
+
+ if ((~0 == bd_id) && (mp->is_add))
+ bd_id = bd_get_unused_id ();
+
+ if ((~0 == bd_id) && (mp->is_add))
+ rv = VNET_API_ERROR_EAGAIN;
+ else
+ {
+ l2_bridge_domain_add_del_args_t a = { .is_add = mp->is_add,
+ .flood = mp->flood,
+ .uu_flood = mp->uu_flood,
+ .forward = mp->forward,
+ .learn = mp->learn,
+ .arp_term = mp->arp_term,
+ .arp_ufwd = mp->arp_ufwd,
+ .mac_age = mp->mac_age,
+ .bd_id = bd_id,
+ .bd_tag = mp->bd_tag };
+ rv = bd_add_del (&a);
+ }
+ REPLY_MACRO2 (VL_API_BRIDGE_DOMAIN_ADD_DEL_V2_REPLY,
+ ({ rmp->bd_id = htonl (bd_id); }));
+}
+
+static void
send_bridge_domain_details (l2input_main_t * l2im,
vl_api_registration_t * reg,
l2_bridge_domain_t * bd_config,
@@ -651,12 +679,10 @@ vl_api_bridge_flags_t_handler (vl_api_bridge_flags_t * mp)
bitmap = bd_set_flags (vm, bd_index, flags, mp->is_set);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_BRIDGE_FLAGS_REPLY,
({
rmp->resulting_feature_bitmap = ntohl(bitmap);
}));
- /* *INDENT-ON* */
}
static void
@@ -918,7 +944,6 @@ vl_api_bd_ip_mac_dump_t_handler (vl_api_bd_ip_mac_dump_t * mp)
u64 mac64;
bd_id = bd_config->bd_id;
- /* *INDENT-OFF* */
hash_foreach (ip4_addr.as_u32, mac64, bd_config->mac_by_ip4,
({
ip46_address_t ip = {
@@ -940,7 +965,6 @@ vl_api_bd_ip_mac_dump_t_handler (vl_api_bd_ip_mac_dump_t * mp)
send_bd_ip_mac_entry (am, reg, bd_id, &ip, IP46_TYPE_IP6,
&mac, mp->context);
}));
- /* *INDENT-ON* */
}
}
}
@@ -1094,12 +1118,10 @@ vl_api_bvi_create_t_handler (vl_api_bvi_create_t * mp)
rv = l2_bvi_create (ntohl (mp->user_instance), &mac, &sw_if_index);
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_BVI_CREATE_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -1193,13 +1215,11 @@ l2_arp_term_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_arp_term_process_node) = {
.function = l2_arp_term_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "l2-arp-term-publisher",
};
-/* *INDENT-ON* */
static void
vl_api_want_l2_arp_term_events_t_handler (vl_api_want_l2_arp_term_events_t *
@@ -1280,14 +1300,15 @@ l2_api_hookup (vlib_main_t * vm)
{
api_main_t *am = vlibapi_get_main ();
- /* Mark VL_API_BRIDGE_DOMAIN_DUMP as mp safe */
- am->is_mp_safe[VL_API_BRIDGE_DOMAIN_DUMP] = 1;
-
/*
* Set up the (msg_name, crc, message-id) table
*/
REPLY_MSG_ID_BASE = setup_message_id_table ();
+ /* Mark VL_API_BRIDGE_DOMAIN_DUMP as mp safe */
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_BRIDGE_DOMAIN_DUMP, 1);
+
return 0;
}
diff --git a/src/vnet/l2/l2_arp_term.c b/src/vnet/l2/l2_arp_term.c
index 17c8b1d84d0..eed9b7af7c3 100644
--- a/src/vnet/l2/l2_arp_term.c
+++ b/src/vnet/l2/l2_arp_term.c
@@ -25,6 +25,7 @@
#include <vnet/ip/ip6_packet.h>
#include <vnet/ip/icmp6.h>
#include <vnet/ip/ip6.h>
+#include <vnet/ip/ip.api_enum.h>
#include <vnet/ip/format.h>
#include <vnet/ethernet/arp_packet.h>
@@ -289,6 +290,9 @@ arp_term_l2bd (vlib_main_t * vm,
ethertype0 = clib_net_to_host_u16 (*(u16 *) (l3h0 - 2));
arp0 = (ethernet_arp_header_t *) l3h0;
+ if (p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)
+ goto next_l2_feature;
+
if (ethertype0 != ETHERNET_TYPE_ARP)
goto check_ip6_nd;
@@ -445,7 +449,6 @@ arp_term_l2bd (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = {
.function = arp_term_l2bd,
.name = "arp-term-l2bd",
@@ -460,7 +463,6 @@ VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = {
.format_buffer = format_ethernet_arp_header,
.format_trace = format_arp_term_input_trace,
};
-/* *INDENT-ON* */
clib_error_t *
arp_term_init (vlib_main_t * vm)
diff --git a/src/vnet/l2/l2_bd.c b/src/vnet/l2/l2_bd.c
index 7e6ea60b440..c7392c03b58 100644
--- a/src/vnet/l2/l2_bd.c
+++ b/src/vnet/l2/l2_bd.c
@@ -102,12 +102,10 @@ bd_free_ip_mac_tables (l2_bridge_domain_t * bd)
ip6_address_t *ip6_addr_key;
hash_free (bd->mac_by_ip4);
- /* *INDENT-OFF* */
hash_foreach_mem (ip6_addr_key, mac_addr, bd->mac_by_ip6,
({
clib_mem_free (ip6_addr_key); /* free memory used for ip6 addr key */
}));
- /* *INDENT-ON* */
hash_free (bd->mac_by_ip6);
}
@@ -454,13 +452,11 @@ done:
* Example of how to disable learning (where 200 is the bridge-domain-id):
* @cliexcmd{set bridge-domain learn 200 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_learn_cli, static) = {
.path = "set bridge-domain learn",
.short_help = "set bridge-domain learn <bridge-domain-id> [disable]",
.function = bd_learn,
};
-/* *INDENT-ON* */
static clib_error_t *
bd_default_learn_limit (vlib_main_t *vm, unformat_input_t *input,
@@ -547,13 +543,11 @@ done:
* Example of how to disable forwarding (where 200 is the bridge-domain-id):
* @cliexcmd{set bridge-domain forward 200 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_fwd_cli, static) = {
.path = "set bridge-domain forward",
.short_help = "set bridge-domain forward <bridge-domain-id> [disable]",
.function = bd_fwd,
};
-/* *INDENT-ON* */
/**
Set bridge-domain flood enable/disable.
@@ -612,13 +606,11 @@ done:
* Example of how to disable flooding (where 200 is the bridge-domain-id):
* @cliexcmd{set bridge-domain flood 200 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_flood_cli, static) = {
.path = "set bridge-domain flood",
.short_help = "set bridge-domain flood <bridge-domain-id> [disable]",
.function = bd_flood,
};
-/* *INDENT-ON* */
/**
Set bridge-domain unknown-unicast flood enable/disable.
@@ -677,13 +669,11 @@ done:
* Example of how to disable unknown-unicast flooding (where 200 is the bridge-domain-id):
* @cliexcmd{set bridge-domain uu-flood 200 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_uu_flood_cli, static) = {
.path = "set bridge-domain uu-flood",
.short_help = "set bridge-domain uu-flood <bridge-domain-id> [disable]",
.function = bd_uu_flood,
};
-/* *INDENT-ON* */
/**
Set bridge-domain arp-unicast forward enable/disable.
@@ -742,13 +732,11 @@ done:
* Example of how to disable arp-unicast forwarding (where 200 is the bridge-domain-id):
* @cliexcmd{set bridge-domain arp-ufwd 200 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_arp_ufwd_cli, static) = {
.path = "set bridge-domain arp-ufwd",
.short_help = "set bridge-domain arp-ufwd <bridge-domain-id> [disable]",
.function = bd_arp_ufwd,
};
-/* *INDENT-ON* */
/**
Set bridge-domain arp term enable/disable.
@@ -854,13 +842,11 @@ done:
* Example of how to disable mac aging (where 200 is the bridge-domain-id):
* @cliexcmd{set bridge-domain flood 200 0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_mac_age_cli, static) = {
.path = "set bridge-domain mac-age",
.short_help = "set bridge-domain mac-age <bridge-domain-id> <mins>",
.function = bd_mac_age,
};
-/* *INDENT-ON* */
static clib_error_t *
bd_learn_limit (vlib_main_t *vm, unformat_input_t *input,
@@ -921,13 +907,11 @@ VLIB_CLI_COMMAND (bd_learn_limit_cli, static) = {
* Example of how to disable ARP termination (where 200 is the bridge-domain-id):
* @cliexcmd{set bridge-domain arp term 200 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_arp_term_cli, static) = {
.path = "set bridge-domain arp term",
.short_help = "set bridge-domain arp term <bridge-domain-id> [disable]",
.function = bd_arp_term,
};
-/* *INDENT-ON* */
/**
@@ -1119,13 +1103,11 @@ done:
* Example of how to delete an ARP entry (where 200 is the bridge-domain-id):
* @cliexcmd{set bridge-domain arp entry 200 192.168.72.45 52:54:00:3b:83:1a del}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_arp_entry_cli, static) = {
.path = "set bridge-domain arp entry",
.short_help = "set bridge-domain arp entry <bridge-domain-id> [<ip-addr> <mac-addr> [del] | del-all]",
.function = bd_arp_entry,
};
-/* *INDENT-ON* */
static u8 *
format_uu_cfg (u8 * s, va_list * args)
@@ -1289,7 +1271,6 @@ bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
vlib_cli_output (vm,
"\n IP4/IP6 to MAC table for ARP Termination");
- /* *INDENT-OFF* */
hash_foreach (ip4_addr, mac_addr, bd_config->mac_by_ip4,
({
vlib_cli_output (vm, "%=40U => %=20U",
@@ -1303,7 +1284,6 @@ bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
format_ip6_address, ip6_addr,
format_ethernet_address, &mac_addr);
}));
- /* *INDENT-ON* */
}
if ((detail || bd_tag) && (bd_config->bd_tag))
@@ -1349,13 +1329,11 @@ done:
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_show_cli, static) = {
.path = "show bridge-domain",
.short_help = "show bridge-domain [bridge-domain-id [detail|int|arp|bd-tag]]",
.function = bd_show,
};
-/* *INDENT-ON* */
int
bd_add_del (l2_bridge_domain_add_del_args_t * a)
@@ -1493,8 +1471,15 @@ bd_add_del_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (bd_id == ~0)
{
- error = clib_error_return (0, "bridge-domain-id not specified");
- goto done;
+ if (is_add)
+ {
+ bd_id = bd_get_unused_id ();
+ }
+ else
+ {
+ error = clib_error_return (0, "bridge-domain-id not specified");
+ goto done;
+ }
}
if (bd_id == 0)
@@ -1587,7 +1572,6 @@ done:
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_create_cli, static) = {
.path = "create bridge-domain",
.short_help = "create bridge-domain <bridge-domain-id>"
@@ -1595,9 +1579,38 @@ VLIB_CLI_COMMAND (bd_create_cli, static) = {
" [arp-ufwd <0|1>] [mac-age <nn>] [bd-tag <tag>] [del]",
.function = bd_add_del_command_fn,
};
-/* *INDENT-ON* */
+/*
+ * Returns an unused bridge domain id, and ~0 if it can't find one.
+ */
+u32
+bd_get_unused_id (void)
+{
+ bd_main_t *bdm = &bd_main;
+ int i, j;
+ static u32 seed = 0;
+ /* limit to 1M tries */
+ for (j = 0; j < 1 << 10; j++)
+ {
+ seed = random_u32 (&seed);
+ for (i = 0; i < 1 << 10; i++)
+ {
+ /*
+ * iterate seed+0, seed+1, seed-1, seed+2, seed-2, ... to generate id
+ */
+ seed += (2 * (i % 2) - 1) * i;
+ /* bd_id must be (1 <= bd_id <= L2_BD_ID_MAX) */
+ seed &= L2_BD_ID_MAX;
+ if (seed == 0)
+ continue;
+ if (bd_find_index (bdm, seed) == ~0)
+ return seed;
+ }
+ }
+
+ return ~0;
+}
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2_bd.h b/src/vnet/l2/l2_bd.h
index 0d77292519d..082d210b972 100644
--- a/src/vnet/l2/l2_bd.h
+++ b/src/vnet/l2/l2_bd.h
@@ -2,6 +2,7 @@
* l2_bd.h : layer 2 bridge domain
*
* Copyright (c) 2013 Cisco and/or its affiliates.
+ * Copyright (c) 2022 Nordix Foundation.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -166,7 +167,7 @@ u32 bd_set_flags (vlib_main_t * vm, u32 bd_index, bd_flags_t flags,
void bd_set_mac_age (vlib_main_t * vm, u32 bd_index, u8 age);
void bd_set_learn_limit (vlib_main_t *vm, u32 bd_index, u32 learn_limit);
int bd_add_del (l2_bridge_domain_add_del_args_t * args);
-
+u32 bd_get_unused_id (void);
/**
* \brief Get a bridge domain.
*
diff --git a/src/vnet/l2/l2_bvi.c b/src/vnet/l2/l2_bvi.c
index e5623682657..e39c4aae39d 100644
--- a/src/vnet/l2/l2_bvi.c
+++ b/src/vnet/l2/l2_bvi.c
@@ -58,14 +58,12 @@ bvi_mac_change (vnet_hw_interface_t * hi,
return (NULL);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (bvi_device_class) = {
.name = "BVI",
.format_device_name = format_bvi_name,
.admin_up_down_function = bvi_admin_up_down,
.mac_addr_change_function = bvi_mac_change,
};
-/* *INDENT-ON* */
/*
* Maintain a bitmap of allocated bvi instance numbers.
@@ -138,13 +136,11 @@ l2_bvi_create (u32 user_instance,
{
vnet_main_t *vnm = vnet_get_main ();
vlib_main_t *vm = vlib_get_main ();
+ vnet_eth_interface_registration_t eir = {};
u32 instance, hw_if_index, slot;
vnet_hw_interface_t *hw_if;
- clib_error_t *error;
mac_address_t mac;
- int rv = 0;
-
ASSERT (sw_if_indexp);
*sw_if_indexp = (u32) ~ 0;
@@ -178,17 +174,10 @@ l2_bvi_create (u32 user_instance,
mac_address_copy (&mac, mac_in);
}
- error = ethernet_register_interface (vnm,
- bvi_device_class.index,
- instance, mac.bytes, &hw_if_index,
- /* flag change */ 0);
-
- if (error)
- {
- rv = VNET_API_ERROR_INVALID_REGISTRATION;
- clib_error_report (error);
- return rv;
- }
+ eir.dev_class_index = bvi_device_class.index;
+ eir.dev_instance = instance;
+ eir.address = mac.bytes;
+ hw_if_index = vnet_eth_register_interface (vnm, &eir);
hw_if = vnet_get_hw_interface (vnm, hw_if_index);
@@ -282,13 +271,11 @@ l2_bvi_create_cli (vlib_main_t * vm,
* Example of how to create a bvi interface:
* @cliexcmd{bvi create}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_bvi_create_command, static) = {
.path = "bvi create",
.short_help = "bvi create [mac <mac-addr>] [instance <instance>]",
.function = l2_bvi_create_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
l2_bvi_delete_cli (vlib_main_t * vm,
@@ -333,13 +320,11 @@ l2_bvi_delete_cli (vlib_main_t * vm,
* Example of how to create a bvi interface:
* @cliexcmd{bvi delete bvi0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_bvi_delete_command, static) = {
.path = "bvi delete",
.short_help = "bvi delete <interface>",
.function = l2_bvi_delete_cli,
};
-/* *INDENT-ON* */
/*
diff --git a/src/vnet/l2/l2_classify.h b/src/vnet/l2/l2_classify.h
index 68a2bb98e64..3c86fb5ca86 100644
--- a/src/vnet/l2/l2_classify.h
+++ b/src/vnet/l2/l2_classify.h
@@ -39,7 +39,6 @@ typedef enum
L2_INPUT_CLASSIFY_NEXT_ETHERNET_INPUT,
L2_INPUT_CLASSIFY_NEXT_IP4_INPUT,
L2_INPUT_CLASSIFY_NEXT_IP6_INPUT,
- L2_INPUT_CLASSIFY_NEXT_LI,
L2_INPUT_CLASSIFY_N_NEXT,
} l2_input_classify_next_t;
diff --git a/src/vnet/l2/l2_efp_filter.c b/src/vnet/l2/l2_efp_filter.c
index ad325b83df2..47256ffa5d3 100644
--- a/src/vnet/l2/l2_efp_filter.c
+++ b/src/vnet/l2/l2_efp_filter.c
@@ -461,7 +461,6 @@ VLIB_NODE_FN (l2_efp_filter_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_efp_filter_node) = {
.name = "l2-efp-filter",
.vector_size = sizeof (u32),
@@ -478,7 +477,6 @@ VLIB_REGISTER_NODE (l2_efp_filter_node) = {
[L2_EFP_FILTER_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
clib_error_t *
@@ -559,13 +557,11 @@ done:
* Example of how to disable a Layer 2 efp-filter on a sub-interface:
* @cliexcmd{set interface l2 efp-filter GigabitEthernet0/8/0.200 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_l2_efp_filter_cli, static) = {
.path = "set interface l2 efp-filter",
.short_help = "set interface l2 efp-filter <interface> [disable]",
.function = int_l2_efp_filter,
};
-/* *INDENT-ON* */
#endif /* CLIB_MARCH_VARIANT */
diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c
index d1ee82273b1..3dcd1e7ae26 100644
--- a/src/vnet/l2/l2_fib.c
+++ b/src/vnet/l2/l2_fib.c
@@ -95,8 +95,7 @@ format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args)
if (!swif)
return format (s, "Stale");
- return format (s, "%U", format_vnet_sw_interface_name, vnm,
- vnet_get_sw_interface_or_null (vnm, sw_if_index));
+ return format (s, "%U", format_vnet_sw_if_index_name, vnm, sw_if_index);
}
typedef struct l2fib_dump_walk_ctx_t_
@@ -353,13 +352,11 @@ show_l2fib (vlib_main_t * vm,
* 3 l2fib entries
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_l2fib_cli, static) = {
.path = "show l2fib",
.short_help = "show l2fib [all] | [bd_id <nn> | bd_index <nn>] [learn | add] | [raw]",
.function = show_l2fib,
};
-/* *INDENT-ON* */
void
l2fib_table_init (void)
@@ -416,13 +413,11 @@ clear_l2fib (vlib_main_t * vm,
* no l2fib entries
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_l2fib_cli, static) = {
.path = "clear l2fib",
.short_help = "clear l2fib",
.function = clear_l2fib,
};
-/* *INDENT-ON* */
static l2fib_seq_num_t
l2fib_cur_seq_num (u32 bd_index, u32 sw_if_index)
@@ -593,20 +588,18 @@ done:
* 3 l2fib entries
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2fib_add_cli, static) = {
.path = "l2fib add",
.short_help = "l2fib add <mac> <bridge-domain-id> filter | <intf> [static | bvi]",
.function = l2fib_add,
};
-/* *INDENT-ON* */
static clib_error_t *
l2fib_test_command_fn (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
- u8 mac[6], save_mac[6];
+ u8 mac[8], save_mac[6];
u32 bd_index = 0;
u32 sw_if_index = 8;
u32 is_add = 0;
@@ -724,13 +717,11 @@ l2fib_test_command_fn (vlib_main_t * vm,
* @cliexcmd{test l2fib del mac 52:54:00:53:00:00 count 4}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2fib_test_command, static) = {
.path = "test l2fib",
.short_help = "test l2fib [add|del|check] mac <base-addr> count <nn>",
.function = l2fib_test_command_fn,
};
-/* *INDENT-ON* */
/**
@@ -833,13 +824,11 @@ done:
* Example of how to delete a MAC Address entry from the L2 FIB table of a bridge-domain (where 200 is the bridge-domain-id):
* @cliexcmd{l2fib del 52:54:00:53:18:33 200}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2fib_del_cli, static) = {
.path = "l2fib del",
.short_help = "l2fib del <mac> <bridge-domain-id> []",
.function = l2fib_del,
};
-/* *INDENT-ON* */
static clib_error_t *
l2fib_set_scan_delay (vlib_main_t *vm, unformat_input_t *input,
@@ -977,13 +966,11 @@ l2fib_flush_mac_all (vlib_main_t * vm,
* Example of how to flush MAC Address entries learned on an interface from the L2 FIB table:
* @cliexcmd{l2fib flush-mac interface GigabitEthernet2/1/0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2fib_flush_mac_all_cli, static) = {
.path = "l2fib flush-mac all",
.short_help = "l2fib flush-mac all",
.function = l2fib_flush_mac_all,
};
-/* *INDENT-ON* */
/*?
* This command kick off ager to delete all existing MAC Address entries,
@@ -993,13 +980,11 @@ VLIB_CLI_COMMAND (l2fib_flush_mac_all_cli, static) = {
* Example of how to flush MAC Address entries learned on an interface from the L2 FIB table:
* @cliexcmd{l2fib flush-mac interface GigabitEthernet2/1/0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2fib_flush_mac_int_cli, static) = {
.path = "l2fib flush-mac interface",
.short_help = "l2fib flush-mac interface <if-name>",
.function = l2fib_flush_mac_int,
};
-/* *INDENT-ON* */
/**
Flush bridge-domain MACs except static ones.
@@ -1042,13 +1027,11 @@ done:
* Example of how to flush MAC Address entries learned in a bridge domain from the L2 FIB table:
* @cliexcmd{l2fib flush-mac bridge-domain 1000}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2fib_flush_mac_bd_cli, static) = {
.path = "l2fib flush-mac bridge-domain",
.short_help = "l2fib flush-mac bridge-domain <bd-id>",
.function = l2fib_flush_mac_bd,
};
-/* *INDENT-ON* */
clib_error_t *
l2fib_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
@@ -1149,7 +1132,7 @@ l2fib_scan (vlib_main_t * vm, f64 start_time, u8 event_only)
{
for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
{
- if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL)
+ if (BV (clib_bihash_is_free) (&v->kvp[k]))
continue;
l2fib_entry_key_t key = {.raw = v->kvp[k].key };
@@ -1366,13 +1349,11 @@ l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2fib_mac_age_scanner_process_node) = {
.function = l2fib_mac_age_scanner_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "l2fib-mac-age-scanner-process",
};
-/* *INDENT-ON* */
clib_error_t *
l2fib_init (vlib_main_t * vm)
diff --git a/src/vnet/l2/l2_fib.h b/src/vnet/l2/l2_fib.h
index 202e2d9a33b..e24d427b4e2 100644
--- a/src/vnet/l2/l2_fib.h
+++ b/src/vnet/l2/l2_fib.h
@@ -240,29 +240,9 @@ l2fib_compute_hash_bucket (l2fib_entry_key_t * key)
always_inline u64
l2fib_make_key (const u8 * mac_address, u16 bd_index)
{
- u64 temp;
-
- /*
- * The mac address in memory is A:B:C:D:E:F
- * The bd id in register is H:L
- */
-#if CLIB_ARCH_IS_LITTLE_ENDIAN
- /*
- * Create the in-register key as F:E:D:C:B:A:H:L
- * In memory the key is L:H:A:B:C:D:E:F
- */
- temp = CLIB_MEM_OVERFLOW_LOAD ((u64 *) mac_address) << 16;
- temp = (temp & ~0xffff) | (u64) (bd_index);
-#else
- /*
- * Create the in-register key as H:L:A:B:C:D:E:F
- * In memory the key is H:L:A:B:C:D:E:F
- */
- temp = CLIB_MEM_OVERFLOW_LOAD ((u64 *) mac_address) >> 16;
- temp = temp | (((u64) bd_index) << 48);
-#endif
-
- return temp;
+ l2fib_entry_key_t key = { .fields.bd_index = bd_index };
+ clib_memcpy_fast (&key.fields.mac, mac_address, sizeof (key.fields.mac));
+ return key.raw;
}
diff --git a/src/vnet/l2/l2_flood.c b/src/vnet/l2/l2_flood.c
index c0d7bf8dfab..f8cb3cb5687 100644
--- a/src/vnet/l2/l2_flood.c
+++ b/src/vnet/l2/l2_flood.c
@@ -362,7 +362,6 @@ VLIB_NODE_FN (l2flood_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2flood_node) = {
.name = "l2-flood",
.vector_size = sizeof (u32),
@@ -380,7 +379,6 @@ VLIB_REGISTER_NODE (l2flood_node) = {
[L2FLOOD_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
clib_error_t *
@@ -468,13 +466,11 @@ done:
* Example of how to disable flooding:
* @cliexcmd{set interface l2 flood GigabitEthernet0/8/0 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_flood_cli, static) = {
.path = "set interface l2 flood",
.short_help = "set interface l2 flood <interface> [disable]",
.function = int_flood,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2_fwd.c b/src/vnet/l2/l2_fwd.c
index 3414f6c490e..503dfc27957 100644
--- a/src/vnet/l2/l2_fwd.c
+++ b/src/vnet/l2/l2_fwd.c
@@ -215,8 +215,7 @@ l2fwd_process (vlib_main_t * vm,
* unless some other feature is inserted before uu_flood
*/
if (vnet_buffer (b0)->l2.feature_bitmap &
- (L2INPUT_FEAT_UU_FLOOD |
- L2INPUT_FEAT_UU_FWD | L2INPUT_FEAT_GBP_FWD))
+ (L2INPUT_FEAT_UU_FLOOD | L2INPUT_FEAT_UU_FWD))
{
*next0 = vnet_l2_feature_next (b0, msm->feat_next_node_index,
L2INPUT_FEAT_FWD);
@@ -289,7 +288,6 @@ l2fwd_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
#ifdef COUNTERS
em->counters[node_counter_base_index + L2FWD_ERROR_L2FWD] += 4;
#endif
- /* *INDENT-OFF* */
l2fib_lookup_4 (msm->mac_table, &cached_key, &cached_result,
h0->dst_address, h1->dst_address,
h2->dst_address, h3->dst_address,
@@ -305,7 +303,6 @@ l2fwd_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
&result1,
&result2,
&result3);
- /* *INDENT-ON* */
l2fwd_process (vm, node, msm, em, b[0], sw_if_index0, &result0, next);
l2fwd_process (vm, node, msm, em, b[1], sw_if_index1, &result1,
next + 1);
@@ -415,7 +412,6 @@ VLIB_NODE_FN (l2fwd_node) (vlib_main_t * vm,
return l2fwd_node_inline (vm, node, frame, 0 /* do_trace */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2fwd_node) = {
.name = "l2-fwd",
.vector_size = sizeof (u32),
@@ -433,7 +429,6 @@ VLIB_REGISTER_NODE (l2fwd_node) = {
[L2FWD_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
clib_error_t *
@@ -528,13 +523,11 @@ done:
* Example of how to disable forwarding:
* @cliexcmd{set interface l2 forward GigabitEthernet0/8/0 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_fwd_cli, static) = {
.path = "set interface l2 forward",
.short_help = "set interface l2 forward <interface> [disable]",
.function = int_fwd,
};
-/* *INDENT-ON* */
#endif
diff --git a/src/vnet/l2/l2_in_out_acl.c b/src/vnet/l2/l2_in_out_acl.c
index f8293c1feee..2e2cb1e7f36 100644
--- a/src/vnet/l2/l2_in_out_acl.c
+++ b/src/vnet/l2/l2_in_out_acl.c
@@ -278,7 +278,7 @@ l2_in_out_acl_node_fn (vlib_main_t * vm,
u32 table_index0;
vnet_classify_table_t *t0;
vnet_classify_entry_t *e0;
- u64 hash0;
+ u32 hash0;
u8 *h0;
u8 error0;
@@ -288,7 +288,7 @@ l2_in_out_acl_node_fn (vlib_main_t * vm,
vlib_buffer_t *p1 = vlib_get_buffer (vm, from[3]);
vnet_classify_table_t *tp1;
u32 table_index1;
- u64 phash1;
+ u32 phash1;
table_index1 = vnet_buffer (p1)->l2_classify.table_index;
@@ -464,7 +464,6 @@ VLIB_NODE_FN (l2_outacl_node) (vlib_main_t * vm,
IN_OUT_ACL_OUTPUT_TABLE_GROUP);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_inacl_node) = {
.name = "l2-input-acl",
.vector_size = sizeof (u32),
@@ -498,7 +497,6 @@ VLIB_REGISTER_NODE (l2_outacl_node) = {
[ACL_NEXT_INDEX_DENY] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
diff --git a/src/vnet/l2/l2_in_out_feat_arc.c b/src/vnet/l2/l2_in_out_feat_arc.c
index b3b4a8cbb73..26fbd3eb776 100644
--- a/src/vnet/l2/l2_in_out_feat_arc.c
+++ b/src/vnet/l2/l2_in_out_feat_arc.c
@@ -257,7 +257,8 @@ l2_in_out_feat_arc_node_fn (vlib_main_t * vm,
sw_if_index = sw_if_indices;
n_left = frame->n_vectors;
- CLIB_PREFETCH (next_node_indices, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (next_node_indices,
+ sizeof (fam->feat_next_node_index[is_output]), LOAD);
while (n_left > 3 * L2_FEAT_ARC_VEC_SIZE)
{
@@ -395,7 +396,6 @@ vnet_l2_in_out_feat_arc_enable_disable (u32 sw_if_index, int is_output,
}
#endif /* CLIB_MARCH_VARIANT */
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (l2_in_ip4_arc, static) =
{
.arc_name = "l2-input-ip4",
@@ -437,10 +437,8 @@ VNET_FEATURE_ARC_INIT (l2_in_nonip_arc, static) =
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_in_feat_arc_node) = {
.name = "l2-input-feat-arc",
.vector_size = sizeof (u32),
@@ -520,7 +518,6 @@ VNET_FEATURE_INIT (l2_out_nonip_arc_end, static) =
.node_name = "l2-output-feat-arc-end",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
diff --git a/src/vnet/l2/l2_input.c b/src/vnet/l2/l2_input.c
index de22cef600e..23bd5cc9958 100644
--- a/src/vnet/l2/l2_input.c
+++ b/src/vnet/l2/l2_input.c
@@ -646,13 +646,11 @@ done:
* Example of how to remove an interface from a Layer2 bridge-domain:
* @cliexcmd{set interface l3 GigabitEthernet0/a/0.200}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_l2_bridge_cli, static) = {
.path = "set interface l2 bridge",
.short_help = "set interface l2 bridge <interface> <bridge-domain-id> [bvi|uu-fwd] [shg]",
.function = int_l2_bridge,
};
-/* *INDENT-ON* */
/**
* Set subinterface in xconnect mode with another interface.
@@ -712,13 +710,11 @@ done:
* @cliexcmd{set interface l3 GigabitEthernet0/8/0.300}
* @cliexcmd{set interface l3 GigabitEthernet0/9/0.300}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_l2_xc_cli, static) = {
.path = "set interface l2 xconnect",
.short_help = "set interface l2 xconnect <interface> <peer interface>",
.function = int_l2_xc,
};
-/* *INDENT-ON* */
/**
* Set subinterface in L3 mode.
@@ -762,13 +758,11 @@ done:
* Example of how to set the mode of an interface to Layer 3:
* @cliexcmd{set interface l3 GigabitEthernet0/8/0.200}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_l3_cli, static) = {
.path = "set interface l3",
.short_help = "set interface l3 <interface>",
.function = int_l3,
};
-/* *INDENT-ON* */
/**
* Show interface mode.
@@ -809,10 +803,8 @@ show_int_mode (vlib_main_t * vm,
{
/* Gather interfaces. */
sis = vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces));
- _vec_len (sis) = 0;
- /* *INDENT-OFF* */
+ vec_set_len (sis, 0);
pool_foreach (si, im->sw_interfaces) { vec_add1 (sis, si[0]); }
- /* *INDENT-ON* */
}
vec_foreach (si, sis)
@@ -878,13 +870,11 @@ done:
* l2 bridge GigabitEthernet0/8/0.200 bd_id 200 shg 0
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_l2_mode, static) = {
.path = "show mode",
.short_help = "show mode [<if-name1> <if-name2> ...]",
.function = show_int_mode,
};
-/* *INDENT-ON* */
#define foreach_l2_init_function \
_(feat_bitmap_drop_init) \
diff --git a/src/vnet/l2/l2_input.h b/src/vnet/l2/l2_input.h
index ba4c4b6ed31..3de1537b45e 100644
--- a/src/vnet/l2/l2_input.h
+++ b/src/vnet/l2/l2_input.h
@@ -27,6 +27,7 @@
#include <vnet/ethernet/packet.h>
#include <vnet/ip/ip4_inlines.h>
#include <vnet/ip/ip6_inlines.h>
+#include <vnet/mpls/mpls_lookup.h>
/* l2 connection type */
typedef enum l2_input_flags_t_
@@ -136,17 +137,10 @@ l2input_bd_config (u32 bd_index)
_(ARP_UFWD, "l2-uu-fwd") \
_(ARP_TERM, "arp-term-l2bd") \
_(UU_FLOOD, "l2-flood") \
- _(GBP_FWD, "gbp-fwd") \
_(UU_FWD, "l2-uu-fwd") \
_(FWD, "l2-fwd") \
_(RW, "l2-rw") \
_(LEARN, "l2-learn") \
- _(L2_EMULATION, "l2-emulation") \
- _(GBP_LEARN, "gbp-learn-l2") \
- _(GBP_LPM_ANON_CLASSIFY, "l2-gbp-lpm-anon-classify") \
- _(GBP_NULL_CLASSIFY, "gbp-null-classify") \
- _(GBP_SRC_CLASSIFY, "gbp-src-classify") \
- _(GBP_LPM_CLASSIFY, "l2-gbp-lpm-classify") \
_(VTR, "l2-input-vtr") \
_(L2_IP_QOS_RECORD, "l2-ip-qos-record") \
_(VPATH, "vpath-input-l2") \
@@ -334,7 +328,7 @@ vnet_update_l2_len (vlib_buffer_t *b)
/*
* Compute flow hash of an ethernet packet, use 5-tuple hash if L3 packet
- * is ip4 or ip6. Otherwise hash on smac/dmac/etype.
+ * is ip4, ip6, or mpls. Otherwise hash on smac/dmac/etype.
* The vlib buffer current pointer is expected to be at ethernet header
* and vnet l2.l2_len is expected to be setup already.
*/
@@ -349,6 +343,9 @@ vnet_l2_compute_flow_hash (vlib_buffer_t * b)
return ip4_compute_flow_hash ((ip4_header_t *) l3h, IP_FLOW_HASH_DEFAULT);
else if (ethertype == ETHERNET_TYPE_IP6)
return ip6_compute_flow_hash ((ip6_header_t *) l3h, IP_FLOW_HASH_DEFAULT);
+ else if (ethertype == ETHERNET_TYPE_MPLS)
+ return mpls_compute_flow_hash ((mpls_unicast_header_t *) l3h,
+ IP_FLOW_HASH_DEFAULT);
else
{
u32 a, b, c;
diff --git a/src/vnet/l2/l2_input_classify.c b/src/vnet/l2/l2_input_classify.c
index 53d46399daf..cc031bd46a5 100644
--- a/src/vnet/l2/l2_input_classify.c
+++ b/src/vnet/l2/l2_input_classify.c
@@ -179,8 +179,7 @@ VLIB_NODE_FN (l2_input_classify_node) (vlib_main_t * vm,
int type_index0, type_index1;
vnet_classify_table_t *t0, *t1;
u32 table_index0, table_index1;
- u64 hash0, hash1;
-
+ u32 hash0, hash1;
/* prefetch next iteration */
{
@@ -265,7 +264,7 @@ VLIB_NODE_FN (l2_input_classify_node) (vlib_main_t * vm,
u32 type_index0;
vnet_classify_table_t *t0;
u32 table_index0;
- u64 hash0;
+ u32 hash0;
bi0 = from[0];
b0 = vlib_get_buffer (vm, bi0);
@@ -316,14 +315,14 @@ VLIB_NODE_FN (l2_input_classify_node) (vlib_main_t * vm,
u32 next0 = ~0; /* next l2 input feature, please... */
ethernet_header_t *h0;
u32 table_index0;
- u64 hash0;
+ u32 hash0;
vnet_classify_table_t *t0;
vnet_classify_entry_t *e0;
if (PREDICT_TRUE (n_left_from > 2))
{
vlib_buffer_t *p2 = vlib_get_buffer (vm, from[2]);
- u64 phash2;
+ u32 phash2;
u32 table_index2;
vnet_classify_table_t *tp2;
@@ -443,7 +442,6 @@ VLIB_NODE_FN (l2_input_classify_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_input_classify_node) = {
.name = "l2-input-classify",
.vector_size = sizeof (u32),
@@ -463,10 +461,8 @@ VLIB_REGISTER_NODE (l2_input_classify_node) = {
[L2_INPUT_CLASSIFY_NEXT_ETHERNET_INPUT] = "ethernet-input-not-l2",
[L2_INPUT_CLASSIFY_NEXT_IP4_INPUT] = "ip4-input",
[L2_INPUT_CLASSIFY_NEXT_IP6_INPUT] = "ip6-input",
- [L2_INPUT_CLASSIFY_NEXT_LI] = "li-hit",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
/** l2 input classsifier feature initialization. */
@@ -643,7 +639,6 @@ int_l2_input_classify_command_fn (vlib_main_t * vm,
* @todo This is incomplete. This needs a detailed description and a
* practical example.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_l2_input_classify_cli, static) = {
.path = "set interface l2 input classify",
.short_help =
@@ -651,7 +646,6 @@ VLIB_CLI_COMMAND (int_l2_input_classify_cli, static) = {
" [ip6-table <n>] [other-table <n>]",
.function = int_l2_input_classify_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2_input_node.c b/src/vnet/l2/l2_input_node.c
index 3638a8aa00d..76b94809eb3 100644
--- a/src/vnet/l2/l2_input_node.c
+++ b/src/vnet/l2/l2_input_node.c
@@ -141,9 +141,8 @@ classify_and_dispatch (l2input_main_t * msm, vlib_buffer_t * b0, u16 * next0)
u8 protocol = ((ip6_header_t *) l3h0)->protocol;
/* Disable bridge forwarding (flooding will execute instead if not xconnect) */
- feat_mask &= ~(L2INPUT_FEAT_FWD |
- L2INPUT_FEAT_UU_FLOOD |
- L2INPUT_FEAT_UU_FWD | L2INPUT_FEAT_GBP_FWD);
+ feat_mask &=
+ ~(L2INPUT_FEAT_FWD | L2INPUT_FEAT_UU_FLOOD | L2INPUT_FEAT_UU_FWD);
if (ethertype != ETHERNET_TYPE_ARP)
feat_mask &= ~(L2INPUT_FEAT_ARP_UFWD);
@@ -252,11 +251,11 @@ l2input_node_inline (vlib_main_t * vm,
/* Prefetch next iteration. */
{
- /* Prefetch the buffer header and packet for the N+2 loop iteration */
- vlib_prefetch_buffer_header (b[4], LOAD);
- vlib_prefetch_buffer_header (b[5], LOAD);
- vlib_prefetch_buffer_header (b[6], LOAD);
- vlib_prefetch_buffer_header (b[7], LOAD);
+ /* Prefetch the buffer header for the N+2 loop iteration */
+ clib_prefetch_store (b[4]);
+ clib_prefetch_store (b[5]);
+ clib_prefetch_store (b[6]);
+ clib_prefetch_store (b[7]);
clib_prefetch_store (b[4]->data);
clib_prefetch_store (b[5]->data);
@@ -366,7 +365,6 @@ VLIB_NODE_FN (l2input_node) (vlib_main_t * vm,
return l2input_node_inline (vm, node, frame, 0 /* do_trace */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2input_node) = {
.name = "l2-input",
.vector_size = sizeof (u32),
@@ -386,7 +384,6 @@ VLIB_REGISTER_NODE (l2input_node) = {
[L2INPUT_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2_input_vtr.c b/src/vnet/l2/l2_input_vtr.c
index 3c1235bfa32..ccf3efa2390 100644
--- a/src/vnet/l2/l2_input_vtr.c
+++ b/src/vnet/l2/l2_input_vtr.c
@@ -319,7 +319,6 @@ VLIB_NODE_FN (l2_invtr_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_invtr_node) = {
.name = "l2-input-vtr",
.vector_size = sizeof (u32),
@@ -336,7 +335,6 @@ VLIB_REGISTER_NODE (l2_invtr_node) = {
[L2_INVTR_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
clib_error_t *
diff --git a/src/vnet/l2/l2_learn.c b/src/vnet/l2/l2_learn.c
index 6d90cee62a7..24b5389e55a 100644
--- a/src/vnet/l2/l2_learn.c
+++ b/src/vnet/l2/l2_learn.c
@@ -439,7 +439,6 @@ VLIB_NODE_FN (l2learn_node) (vlib_main_t * vm,
return l2learn_node_inline (vm, node, frame, 0 /* do_trace */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2learn_node) = {
.name = "l2-learn",
.vector_size = sizeof (u32),
@@ -457,7 +456,6 @@ VLIB_REGISTER_NODE (l2learn_node) = {
[L2LEARN_NEXT_L2FWD] = "l2-fwd",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
clib_error_t *
@@ -540,13 +538,11 @@ done:
* Example of how to disable learning:
* @cliexcmd{set interface l2 learn GigabitEthernet0/8/0 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_learn_cli, static) = {
.path = "set interface l2 learn",
.short_help = "set interface l2 learn <interface> [disable]",
.function = int_learn,
};
-/* *INDENT-ON* */
static clib_error_t *
diff --git a/src/vnet/l2/l2_output.c b/src/vnet/l2/l2_output.c
index ba40de316d1..7c70cf9f4c7 100644
--- a/src/vnet/l2/l2_output.c
+++ b/src/vnet/l2/l2_output.c
@@ -22,6 +22,7 @@
#include <vppinfra/error.h>
#include <vppinfra/hash.h>
+#include <vppinfra/vector/count_equal.h>
#include <vnet/l2/feat_bitmap.h>
#include <vnet/l2/l2_output.h>
@@ -442,7 +443,6 @@ VLIB_NODE_FN (l2output_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2output_node) = {
.name = "l2-output",
.vector_size = sizeof (u32),
@@ -460,7 +460,6 @@ VLIB_REGISTER_NODE (l2output_node) = {
[L2OUTPUT_NEXT_BAD_INTF] = "l2-output-bad-intf",
},
};
-/* *INDENT-ON* */
#define foreach_l2output_bad_intf_error \
@@ -548,7 +547,6 @@ VLIB_NODE_FN (l2output_bad_intf_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2output_bad_intf_node) = {
.name = "l2-output-bad-intf",
.vector_size = sizeof (u32),
@@ -564,7 +562,6 @@ VLIB_REGISTER_NODE (l2output_bad_intf_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
static clib_error_t *
l2output_init (vlib_main_t * vm)
diff --git a/src/vnet/l2/l2_output.h b/src/vnet/l2/l2_output.h
index 1cc1e738841..201f5e195a4 100644
--- a/src/vnet/l2/l2_output.h
+++ b/src/vnet/l2/l2_output.h
@@ -81,9 +81,6 @@ extern vlib_node_registration_t l2output_node;
#define foreach_l2output_feat \
_(OUTPUT, "interface-output") \
_(SPAN, "span-l2-output") \
- _(GBP_POLICY_LPM, "gbp-policy-lpm") \
- _(GBP_POLICY_PORT, "gbp-policy-port") \
- _(GBP_POLICY_MAC, "gbp-policy-mac") \
_(CFM, "feature-bitmap-drop") \
_(QOS, "feature-bitmap-drop") \
_(ACL, "l2-output-acl") \
diff --git a/src/vnet/l2/l2_output_classify.c b/src/vnet/l2/l2_output_classify.c
index 96d0b14753a..33a7c927386 100644
--- a/src/vnet/l2/l2_output_classify.c
+++ b/src/vnet/l2/l2_output_classify.c
@@ -172,8 +172,7 @@ VLIB_NODE_FN (l2_output_classify_node) (vlib_main_t * vm,
int type_index0, type_index1;
vnet_classify_table_t *t0, *t1;
u32 table_index0, table_index1;
- u64 hash0, hash1;
-
+ u32 hash0, hash1;
/* prefetch next iteration */
{
@@ -257,7 +256,7 @@ VLIB_NODE_FN (l2_output_classify_node) (vlib_main_t * vm,
u32 type_index0;
vnet_classify_table_t *t0;
u32 table_index0;
- u64 hash0;
+ u32 hash0;
bi0 = from[0];
b0 = vlib_get_buffer (vm, bi0);
@@ -308,14 +307,14 @@ VLIB_NODE_FN (l2_output_classify_node) (vlib_main_t * vm,
u32 next0 = ~0;
ethernet_header_t *h0;
u32 table_index0;
- u64 hash0;
+ u32 hash0;
vnet_classify_table_t *t0;
vnet_classify_entry_t *e0;
if (PREDICT_TRUE (n_left_from > 2))
{
vlib_buffer_t *p2 = vlib_get_buffer (vm, from[2]);
- u64 phash2;
+ u32 phash2;
u32 table_index2;
vnet_classify_table_t *tp2;
@@ -436,7 +435,6 @@ VLIB_NODE_FN (l2_output_classify_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_output_classify_node) = {
.name = "l2-output-classify",
.vector_size = sizeof (u32),
@@ -455,7 +453,6 @@ VLIB_REGISTER_NODE (l2_output_classify_node) = {
[L2_OUTPUT_CLASSIFY_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
/** l2 output classsifier feature initialization. */
@@ -635,7 +632,6 @@ int_l2_output_classify_command_fn (vlib_main_t * vm,
* @todo This is incomplete. This needs a detailed description and a
* practical example.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_l2_output_classify_cli, static) = {
.path = "set interface l2 output classify",
.short_help =
@@ -643,7 +639,6 @@ VLIB_CLI_COMMAND (int_l2_output_classify_cli, static) = {
" [ip6-table <n>] [other-table <n>]",
.function = int_l2_output_classify_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2_patch.c b/src/vnet/l2/l2_patch.c
index 6de4e50a298..f85938ed799 100644
--- a/src/vnet/l2/l2_patch.c
+++ b/src/vnet/l2/l2_patch.c
@@ -206,7 +206,6 @@ VLIB_NODE_FN (l2_patch_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_patch_node) = {
.name = "l2-patch",
.vector_size = sizeof (u32),
@@ -223,7 +222,6 @@ VLIB_REGISTER_NODE (l2_patch_node) = {
[L2_PATCH_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
extern int
vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index, int is_add);
@@ -270,6 +268,8 @@ vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index, int is_add)
vnet_feature_enable_disable ("device-input", "l2-patch",
rxhi->sw_if_index, 1, 0, 0);
+ vnet_feature_enable_disable ("port-rx-eth", "l2-patch",
+ rxhi->sw_if_index, 1, 0, 0);
}
else
{
@@ -278,6 +278,8 @@ vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index, int is_add)
vnet_feature_enable_disable ("device-input", "l2-patch",
rxhi->sw_if_index, 0, 0, 0);
+ vnet_feature_enable_disable ("port-rx-eth", "l2-patch",
+ rxhi->sw_if_index, 0, 0, 0);
if (vec_len (l2pm->tx_next_by_rx_sw_if_index) > rx_sw_if_index)
{
l2pm->tx_next_by_rx_sw_if_index[rx_sw_if_index] = ~0;
@@ -369,13 +371,11 @@ done:
* @todo This is incomplete. This needs a detailed description and a
* practical example.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_patch_command, static) = {
.path = "test l2patch",
.short_help = "test l2patch rx <intfc> tx <intfc> [del]",
.function = test_patch_command_fn,
};
-/* *INDENT-ON* */
/** Display the contents of the l2patch table. */
static clib_error_t *
@@ -421,13 +421,11 @@ show_l2patch (vlib_main_t * vm,
* @todo This is incomplete. This needs a detailed description and a
* practical example.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_l2patch_cli, static) = {
.path = "show l2patch",
.short_help = "Show l2 interface cross-connect entries",
.function = show_l2patch,
};
-/* *INDENT-ON* */
static clib_error_t *
l2_patch_init (vlib_main_t * vm)
diff --git a/src/vnet/l2/l2_rw.c b/src/vnet/l2/l2_rw.c
index 2c008794c1b..c0e8ec489fc 100644
--- a/src/vnet/l2/l2_rw.c
+++ b/src/vnet/l2/l2_rw.c
@@ -109,6 +109,7 @@ l2_rw_rewrite (l2_rw_entry_t * rwe, u8 * h)
/* FALLTHROUGH */
case 1:
d[0] = (d[0] & ~rwe->mask[0]) | rwe->value[0];
+ rwe->hit_count++;
break;
default:
abort ();
@@ -332,6 +333,7 @@ l2_rw_mod_entry (u32 * index,
return 0;
}
+ e->hit_count = 0;
e->skip_n_vectors = skip / sizeof (u32x4);
skip -= e->skip_n_vectors * sizeof (u32x4);
e->rewrite_n_vectors = (skip + len - 1) / sizeof (u32x4) + 1;
@@ -398,17 +400,19 @@ l2_rw_entry_cli_fn (vlib_main_t * vm,
* the provisioned mask and value, modifies the packet header.
*
* @cliexpar
- * @todo This is incomplete. This needs a detailed description and a
- * practical example.
+ * Example of how to add an l2 rewrite entry to change the destination mac of
+ * the packet to 00:8a:00:0d:0e:02 (where parameter mask is Ethernet header's
+mask,
+ * parameter value is Ethernet header's value):
+ * @cliexcmd{l2 rewrite entry mask ffffffffffff00000000000000000000 value
+008a000d0e0200000000000000000000}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_rw_entry_cli, static) = {
.path = "l2 rewrite entry",
.short_help =
"l2 rewrite entry [index <index>] [mask <hex-mask>] [value <hex-value>] [skip <n_bytes>] [del]",
.function = l2_rw_entry_cli_fn,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
int
@@ -468,21 +472,36 @@ l2_rw_interface_cli_fn (vlib_main_t * vm,
}
/*?
- * Layer 2-Rewrite node uses classify tables to match packets. Then, using
- * the provisioned mask and value, modifies the packet header.
+ * Apply the rule to the interface. The following example shows how to use
+classify
+ * entry and Layer 2-Rewrite entry to modify the packet ethernet header on the
+ * interface.
*
* @cliexpar
- * @todo This is incomplete. This needs a detailed description and a
- * practical example.
+ * Example use the classify to filter packets that do not need to be modified
+(where
+ * 192.168.68.34 is the destination ip of the data packet, 8080 is the
+destination port
+ * of the packet):
+ * @cliexcmd{classify table mask l3 ip4 dst l4 dst_port}
+ * @cliexcmd{classify session acl-hit-next permit table-index 0 match l3 ip4
+dst 192.168.68.34 l4 dst_port 8080}
+ *
+ * @cliexpar
+ * Example apply classify and l2 rewrite rules to the interface (where
+YusurK2Eth6/0/1/3
+ * is interface, \"table 0\" means Table Id is 0, \"miss 0\" means the packet
+that matches
+ * the classify. miss will be modified according to the l2 rewrite entry with
+index 0):
+ * @cliexcmd{set interface l2 rewrite YusurK2Eth6/0/1/3 table 0 miss-index 0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_rw_interface_cli, static) = {
.path = "set interface l2 rewrite",
.short_help =
"set interface l2 rewrite <interface> [table <table index>] [miss-index <entry-index>]",
.function = l2_rw_interface_cli_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
l2_rw_show_interfaces_cli_fn (vlib_main_t * vm,
@@ -494,30 +513,27 @@ l2_rw_show_interfaces_cli_fn (vlib_main_t * vm,
vlib_cli_output (vm, "No interface is currently using l2 rewrite\n");
uword i;
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, rw->configs_bitmap) {
vlib_cli_output (vm, "sw_if_index:%d %U\n", i, format_l2_rw_config, &rw->configs[i]);
}
- /* *INDENT-ON* */
return 0;
}
/*?
- * Layer 2-Rewrite node uses classify tables to match packets. Then, using
- * the provisioned mask and value, modifies the packet header.
+ * This command displays the l2 rewrite entries of the interfaces.
*
* @cliexpar
- * @todo This is incomplete. This needs a detailed description and a
- * practical example.
+ * Example of how to display the l2 rewrite rules on the interface:
+ * @cliexstart{show l2 rewrite interfaces}
+ * sw_if_index:4 table-index:0 miss-index:0
+ * @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_rw_show_interfaces_cli, static) = {
.path = "show l2 rewrite interfaces",
.short_help =
"show l2 rewrite interfaces",
.function = l2_rw_show_interfaces_cli_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
l2_rw_show_entries_cli_fn (vlib_main_t * vm,
@@ -528,30 +544,29 @@ l2_rw_show_entries_cli_fn (vlib_main_t * vm,
if (pool_elts (rw->entries) == 0)
vlib_cli_output (vm, "No entries\n");
- /* *INDENT-OFF* */
pool_foreach (e, rw->entries) {
vlib_cli_output (vm, "%U\n", format_l2_rw_entry, e);
}
- /* *INDENT-ON* */
return 0;
}
/*?
- * Layer 2-Rewrite node uses classify tables to match packets. Then, using
- * the provisioned mask and value, modifies the packet header.
+ * This command displays all l2 rewrite entries.
*
* @cliexpar
- * @todo This is incomplete. This needs a detailed description and a
- * practical example.
+ * Example of how to display all l2 rewrite entries:
+ * @cliexstart{show l2 rewrite entries}
+ * 0 - mask:ffffffffffff00000000000000000000
+value:aabbccddeeff00000000000000000000
+ * hits:0 skip_bytes:0
+ * @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_rw_show_entries_cli, static) = {
.path = "show l2 rewrite entries",
.short_help =
"show l2 rewrite entries",
.function = l2_rw_show_entries_cli_fn,
};
-/* *INDENT-ON* */
static int
l2_rw_enable_disable (u32 bridge_domain, u8 disable)
@@ -587,21 +602,22 @@ l2_rw_set_cli_fn (vlib_main_t * vm,
}
/*?
- * Layer 2-Rewrite node uses classify tables to match packets. Then, using
- * the provisioned mask and value, modifies the packet header.
+ * Layer 2 rewrite can be enabled and disabled on each interface and on each
+bridge-domain.
+ * Use this command to manage l2 rewrite on bridge-domain.
*
* @cliexpar
- * @todo This is incomplete. This needs a detailed description and a
- * practical example.
+ * Example of how to enable rewrite (where 100 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain rewrite 100}
+ * Example of how to disable rewrite (where 100 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain rewrite 100 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_rw_set_cli, static) = {
.path = "set bridge-domain rewrite",
.short_help =
"set bridge-domain rewrite <bridge-domain> [disable]",
.function = l2_rw_set_cli_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
l2_rw_init (vlib_main_t * vm)
@@ -643,7 +659,6 @@ static char *l2_rw_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_rw_node) = {
.name = "l2-rw",
.vector_size = sizeof (u32),
@@ -655,7 +670,6 @@ VLIB_REGISTER_NODE (l2_rw_node) = {
.n_next_nodes = L2_RW_N_NEXT,
.next_nodes = { [L2_RW_NEXT_DROP] = "error-drop"},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2_rw.h b/src/vnet/l2/l2_rw.h
index f9b10333f43..6d12a21fe55 100644
--- a/src/vnet/l2/l2_rw.h
+++ b/src/vnet/l2/l2_rw.h
@@ -27,7 +27,6 @@
#include <vnet/l2/l2_input.h>
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct _l2_rw_entry {
u16 skip_n_vectors;
u16 rewrite_n_vectors;
@@ -35,15 +34,12 @@ typedef CLIB_PACKED(struct _l2_rw_entry {
u32x4 *mask;
u32x4 *value;
}) l2_rw_entry_t;
-/* *INDENT-ON* */
/* l2_rw configuration for one interface */
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct _l2_rw_config {
u32 table_index; /* Which classify table to use */
u32 miss_index; /* Rewrite entry to use if table does not match */
}) l2_rw_config_t;
-/* *INDENT-ON* */
typedef struct
{
diff --git a/src/vnet/l2/l2_test.c b/src/vnet/l2/l2_test.c
index ba8802da1b6..b78e388a9f1 100644
--- a/src/vnet/l2/l2_test.c
+++ b/src/vnet/l2/l2_test.c
@@ -1,5 +1,6 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright(c) 2021 Cisco Systems, Inc.
+ * Copyright(c) 2022 Nordix Foundation.
*/
#include <vat/vat.h>
@@ -28,6 +29,10 @@
#include <vnet/l2/l2.api.h>
#undef vl_endianfun
+#define vl_calcsizefun
+#include <vnet/l2/l2.api.h>
+#undef vl_calcsizefun
+
typedef struct
{
/* API message ID base */
@@ -188,7 +193,7 @@ api_l2fib_add_del (vat_main_t *vam)
unformat_input_t *i = vam->input;
vl_api_l2fib_add_del_t *mp;
f64 timeout;
- u8 mac[6] = { 0 };
+ u8 mac[8] = { 0 };
u8 mac_set = 0;
u32 bd_id;
u8 bd_id_set = 0;
@@ -630,6 +635,18 @@ done:
return ret;
}
+static int
+api_bridge_domain_add_del_v2 (vat_main_t *vam)
+{
+ return -1;
+}
+
+static void
+vl_api_bridge_domain_add_del_v2_reply_t_handler (
+ vl_api_bridge_domain_add_del_v2_reply_t *mp)
+{
+}
+
#define foreach_pbb_vtr_op \
_ ("disable", L2_VTR_DISABLED) \
_ ("pop", L2_VTR_POP_2) \
diff --git a/src/vnet/l2/l2_uu_fwd.c b/src/vnet/l2/l2_uu_fwd.c
index fb3571d159c..4a510b658d7 100644
--- a/src/vnet/l2/l2_uu_fwd.c
+++ b/src/vnet/l2/l2_uu_fwd.c
@@ -211,7 +211,6 @@ VLIB_NODE_FN (l2_uu_fwd_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_uu_fwd_node) = {
.name = "l2-uu-fwd",
.vector_size = sizeof (u32),
@@ -228,7 +227,6 @@ VLIB_REGISTER_NODE (l2_uu_fwd_node) = {
[L2_UU_FWD_NEXT_L2_OUTPUT] = "l2-output",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2_vtr.c b/src/vnet/l2/l2_vtr.c
index bfd1dcb9280..4053c0fc1cb 100644
--- a/src/vnet/l2/l2_vtr.c
+++ b/src/vnet/l2/l2_vtr.c
@@ -670,13 +670,11 @@ done:
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_l2_vtr_cli, static) = {
.path = "set interface l2 tag-rewrite",
.short_help = "set interface l2 tag-rewrite <interface> [disable | pop {1|2} | push {dot1q|dot1ad} <tag> <tag>]",
.function = int_l2_vtr,
};
-/* *INDENT-ON* */
/**
* Get pbb tag rewrite on the given interface.
@@ -816,13 +814,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_l2_pbb_vtr_cli, static) = {
.path = "set interface l2 pbb-tag-rewrite",
.short_help = "set interface l2 pbb-tag-rewrite <interface> [disable | pop | push | translate_pbb_stag <outer_tag> dmac <address> smac <address> s_id <nn> [b_vlanid <nn>]]",
.function = int_l2_pbb_vtr,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2_xcrw.c b/src/vnet/l2/l2_xcrw.c
index c2c325a796f..9edd8b6ba57 100644
--- a/src/vnet/l2/l2_xcrw.c
+++ b/src/vnet/l2/l2_xcrw.c
@@ -238,7 +238,6 @@ VLIB_NODE_FN (l2_xcrw_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_xcrw_node) = {
.name = "l2-xcrw",
.vector_size = sizeof (u32),
@@ -255,7 +254,6 @@ VLIB_REGISTER_NODE (l2_xcrw_node) = {
[L2_XCRW_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
clib_error_t *
@@ -279,18 +277,17 @@ format_xcrw_name (u8 * s, va_list * args)
return format (s, "xcrw%d", dev_instance);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (xcrw_device_class,static) = {
.name = "Xcrw",
.format_device_name = format_xcrw_name,
};
-/* *INDENT-ON* */
/* Create a sham tunnel interface and return its sw_if_index */
static u32
create_xcrw_interface (vlib_main_t * vm)
{
vnet_main_t *vnm = vnet_get_main ();
+ vnet_eth_interface_registration_t eir = {};
static u32 instance;
u8 address[6];
u32 hw_if_index;
@@ -301,10 +298,9 @@ create_xcrw_interface (vlib_main_t * vm)
clib_memset (address, 0, sizeof (address));
address[2] = 0x12;
- /* can returns error iff phy != 0 */
- (void) ethernet_register_interface
- (vnm, xcrw_device_class.index, instance++, address, &hw_if_index,
- /* flag change */ 0);
+ eir.dev_class_index = xcrw_device_class.index;
+ eir.dev_instance = instance++, eir.address = address;
+ hw_if_index = vnet_eth_register_interface (vnm, &eir);
hi = vnet_get_hw_interface (vnm, hw_if_index);
sw_if_index = hi->sw_if_index;
@@ -496,7 +492,6 @@ done:
* @todo This is incomplete. This needs a detailed description and a
* practical example.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_l2_xcrw_command, static) = {
.path = "set interface l2 xcrw",
.short_help =
@@ -504,7 +499,6 @@ VLIB_CLI_COMMAND (set_l2_xcrw_command, static) = {
" [del] [tx-fib-id <id>] [ipv6] rw <hex-bytes>",
.function = set_l2_xcrw_command_fn,
};
-/* *INDENT-ON* */
#endif /* CLIB_MARCH_VARIANT */
@@ -568,12 +562,10 @@ show_l2xcrw_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "%U", format_l2xcrw, 0, 0);
- /* *INDENT-OFF* */
pool_foreach (t, xcm->tunnels)
{
vlib_cli_output (vm, "%U", format_l2xcrw, vnm, t);
}
- /* *INDENT-ON* */
return 0;
}
@@ -585,13 +577,11 @@ show_l2xcrw_command_fn (vlib_main_t * vm,
* @todo This is incomplete. This needs a detailed description and a
* practical example.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_l2xcrw_command, static) = {
.path = "show l2xcrw",
.short_help = "show l2xcrw",
.function = show_l2xcrw_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/lawful-intercept/lawful_intercept.c b/src/vnet/lawful-intercept/lawful_intercept.c
deleted file mode 100644
index fff44fc3a67..00000000000
--- a/src/vnet/lawful-intercept/lawful_intercept.c
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vnet/lawful-intercept/lawful_intercept.h>
-
-li_main_t li_main;
-
-static clib_error_t *
-set_li_command_fn (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- li_main_t *lm = &li_main;
- ip4_address_t collector;
- u8 collector_set = 0;
- ip4_address_t src;
- u8 src_set = 0;
- u32 tmp;
- u16 udp_port = 0;
- u8 is_add = 1;
- int i;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "collector %U", unformat_ip4_address, &collector))
- collector_set = 1;
- if (unformat (input, "src %U", unformat_ip4_address, &src))
- src_set = 1;
- else if (unformat (input, "udp-port %d", &tmp))
- udp_port = tmp;
- else if (unformat (input, "del"))
- is_add = 0;
- else
- break;
- }
-
- if (collector_set == 0)
- return clib_error_return (0, "collector must be set...");
- if (src_set == 0)
- return clib_error_return (0, "src must be set...");
- if (udp_port == 0)
- return clib_error_return (0, "udp-port must be set...");
-
- if (is_add == 1)
- {
- for (i = 0; i < vec_len (lm->collectors); i++)
- {
- if (lm->collectors[i].as_u32 == collector.as_u32)
- {
- if (lm->ports[i] == udp_port)
- return clib_error_return (
- 0, "collector %U:%d already configured", format_ip4_address,
- &collector, udp_port);
- else
- return clib_error_return (
- 0, "collector %U already configured with port %d",
- format_ip4_address, &collector, (int) (lm->ports[i]));
- }
- }
- vec_add1 (lm->collectors, collector);
- vec_add1 (lm->ports, udp_port);
- vec_add1 (lm->src_addrs, src);
- return 0;
- }
- else
- {
- for (i = 0; i < vec_len (lm->collectors); i++)
- {
- if ((lm->collectors[i].as_u32 == collector.as_u32)
- && lm->ports[i] == udp_port)
- {
- vec_delete (lm->collectors, 1, i);
- vec_delete (lm->ports, 1, i);
- vec_delete (lm->src_addrs, 1, i);
- return 0;
- }
- }
- return clib_error_return (0, "collector %U:%d not configured",
- &collector, udp_port);
- }
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (set_li_command, static) = {
- .path = "set li",
- .short_help =
- "set li src <ip4-address> collector <ip4-address> udp-port <nnnn>",
- .function = set_li_command_fn,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-li_init (vlib_main_t * vm)
-{
- li_main_t *lm = &li_main;
-
- lm->vlib_main = vm;
- lm->vnet_main = vnet_get_main ();
- lm->hit_node_index = li_hit_node.index;
- return 0;
-}
-
-VLIB_INIT_FUNCTION (li_init);
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/lawful-intercept/lawful_intercept.h b/src/vnet/lawful-intercept/lawful_intercept.h
deleted file mode 100644
index e39fa0d0752..00000000000
--- a/src/vnet/lawful-intercept/lawful_intercept.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __lawful_intercept_h__
-#define __lawful_intercept_h__
-
-#include <vnet/vnet.h>
-#include <vnet/ip/ip.h>
-
-typedef struct
-{
- /* LI collector info */
- ip4_address_t *src_addrs;
- ip4_address_t *collectors;
- u16 *ports;
-
- /* Hit node index */
- u32 hit_node_index;
-
- /* convenience */
- vlib_main_t *vlib_main;
- vnet_main_t *vnet_main;
-} li_main_t;
-
-extern li_main_t li_main;
-
-/* *INDENT-OFF* */
-typedef CLIB_PACKED(struct {
- ip4_header_t ip4;
- udp_header_t udp;
-}) ip4_udp_header_t;
-/* *INDENT-ON* */
-
-extern vlib_node_registration_t li_hit_node;
-
-#endif /* __lawful_intercept_h__ */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/lawful-intercept/node.c b/src/vnet/lawful-intercept/node.c
deleted file mode 100644
index c5328e672d0..00000000000
--- a/src/vnet/lawful-intercept/node.c
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vlib/vlib.h>
-#include <vnet/vnet.h>
-#include <vppinfra/error.h>
-
-#include <vnet/lawful-intercept/lawful_intercept.h>
-
-#include <vppinfra/error.h>
-#include <vppinfra/elog.h>
-
-extern vlib_node_registration_t li_hit_node;
-
-typedef struct
-{
- u32 next_index;
-} li_hit_trace_t;
-
-/* packet trace format function */
-static u8 *
-format_li_hit_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- li_hit_trace_t *t = va_arg (*args, li_hit_trace_t *);
-
- s = format (s, "LI_HIT: next index %d", t->next_index);
-
- return s;
-}
-
-#define foreach_li_hit_error \
-_(HITS, "LI packets processed") \
-_(NO_COLLECTOR, "No collector configured") \
-_(BUFFER_ALLOCATION_FAILURE, "Buffer allocation failure")
-
-typedef enum
-{
-#define _(sym,str) LI_HIT_ERROR_##sym,
- foreach_li_hit_error
-#undef _
- LI_HIT_N_ERROR,
-} li_hit_error_t;
-
-static char *li_hit_error_strings[] = {
-#define _(sym,string) string,
- foreach_li_hit_error
-#undef _
-};
-
-typedef enum
-{
- LI_HIT_NEXT_ETHERNET,
- LI_HIT_N_NEXT,
-} li_hit_next_t;
-
-VLIB_NODE_FN (li_hit_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame)
-{
- u32 n_left_from, *from, *to_next;
- li_hit_next_t next_index;
- vlib_frame_t *int_frame = 0;
- u32 *to_int_next = 0;
- li_main_t *lm = &li_main;
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
-
- if (PREDICT_FALSE (vec_len (lm->collectors) == 0))
- {
- vlib_node_increment_counter (vm, li_hit_node.index,
- LI_HIT_ERROR_NO_COLLECTOR, n_left_from);
- }
- else
- {
- /* The intercept frame... */
- int_frame = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
- to_int_next = vlib_frame_vector_args (int_frame);
- }
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-#if 0
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- u32 next0 = LI_HIT_NEXT_INTERFACE_OUTPUT;
- u32 next1 = LI_HIT_NEXT_INTERFACE_OUTPUT;
- u32 sw_if_index0, sw_if_index1;
- u8 tmp0[6], tmp1[6];
- ethernet_header_t *en0, *en1;
- u32 bi0, bi1;
- vlib_buffer_t *b0, *b1;
-
- /* Prefetch next iteration. */
- {
- vlib_buffer_t *p2, *p3;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
-
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
-
- clib_prefetch_store (p2->data);
- clib_prefetch_store (p3->data);
- }
-
- /* speculatively enqueue b0 and b1 to the current next frame */
- to_next[0] = bi0 = from[0];
- to_next[1] = bi1 = from[1];
- from += 2;
- to_next += 2;
- n_left_from -= 2;
- n_left_to_next -= 2;
-
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
-
- /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
- ASSERT (b0->current_data == 0);
- ASSERT (b1->current_data == 0);
-
- en0 = vlib_buffer_get_current (b0);
- en1 = vlib_buffer_get_current (b1);
-
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
- sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
-
- /* Send pkt back out the RX interface */
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
- vnet_buffer (b1)->sw_if_index[VLIB_TX] = sw_if_index1;
-
- /* $$$$$ End of processing 2 x packets $$$$$ */
-
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
- {
- if (b0->flags & VLIB_BUFFER_IS_TRACED)
- {
- li_hit_trace_t *t =
- vlib_add_trace (vm, node, b0, sizeof (*t));
- t->sw_if_index = sw_if_index0;
- t->next_index = next0;
- }
- if (b1->flags & VLIB_BUFFER_IS_TRACED)
- {
- li_hit_trace_t *t =
- vlib_add_trace (vm, node, b1, sizeof (*t));
- t->sw_if_index = sw_if_index1;
- t->next_index = next1;
- }
- }
-
- /* verify speculative enqueues, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
-#endif /* $$$ dual-loop off */
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0;
- vlib_buffer_t *b0;
- vlib_buffer_t *c0;
- ip4_udp_header_t *iu0;
- ip4_header_t *ip0;
- udp_header_t *udp0;
- u32 next0 = LI_HIT_NEXT_ETHERNET;
-
- /* speculatively enqueue b0 to the current next frame */
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- if (PREDICT_TRUE (to_int_next != 0))
- {
- /* Make an intercept copy. This can fail. */
- c0 = vlib_buffer_copy (vm, b0);
-
- if (PREDICT_FALSE (c0 == 0))
- {
- vlib_node_increment_counter
- (vm, node->node_index,
- LI_HIT_ERROR_BUFFER_ALLOCATION_FAILURE, 1);
- goto skip;
- }
-
- vlib_buffer_advance (c0, -sizeof (*iu0));
-
- iu0 = vlib_buffer_get_current (c0);
- ip0 = &iu0->ip4;
-
- ip0->ip_version_and_header_length = 0x45;
- ip0->ttl = 254;
- ip0->protocol = IP_PROTOCOL_UDP;
-
- ip0->src_address.as_u32 = lm->src_addrs[0].as_u32;
- ip0->dst_address.as_u32 = lm->collectors[0].as_u32;
- ip0->length = vlib_buffer_length_in_chain (vm, c0);
- ip0->checksum = ip4_header_checksum (ip0);
-
- udp0 = &iu0->udp;
- udp0->src_port = udp0->dst_port =
- clib_host_to_net_u16 (lm->ports[0]);
- udp0->checksum = 0;
- udp0->length =
- clib_net_to_host_u16 (vlib_buffer_length_in_chain (vm, b0));
-
- to_int_next[0] = vlib_get_buffer_index (vm, c0);
- to_int_next++;
- }
-
- skip:
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- li_hit_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
- t->next_index = next0;
- }
-
- /* verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- if (int_frame)
- {
- int_frame->n_vectors = frame->n_vectors;
- vlib_put_frame_to_node (vm, ip4_lookup_node.index, int_frame);
- }
-
- vlib_node_increment_counter (vm, li_hit_node.index,
- LI_HIT_ERROR_HITS, frame->n_vectors);
- return frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (li_hit_node) = {
- .name = "li-hit",
- .vector_size = sizeof (u32),
- .format_trace = format_li_hit_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(li_hit_error_strings),
- .error_strings = li_hit_error_strings,
-
- .n_next_nodes = LI_HIT_N_NEXT,
-
- /* edit / add dispositions here */
- .next_nodes = {
- [LI_HIT_NEXT_ETHERNET] = "ethernet-input-not-l2",
- },
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/llc/llc.c b/src/vnet/llc/llc.c
index 4a7fdf9d9ba..4cbf17d48df 100644
--- a/src/vnet/llc/llc.c
+++ b/src/vnet/llc/llc.c
@@ -181,14 +181,12 @@ llc_build_rewrite (vnet_main_t * vnm,
return (rewrite);
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (llc_hw_interface_class) = {
.name = "LLC",
.format_header = format_llc_header_with_length,
.unformat_header = unformat_llc_header,
.build_rewrite = llc_build_rewrite,
};
-/* *INDENT-ON* */
static void
add_protocol (llc_main_t * pm, llc_protocol_t protocol, char *protocol_name)
diff --git a/src/vnet/llc/node.c b/src/vnet/llc/node.c
index 086925bd305..d1ee6948269 100644
--- a/src/vnet/llc/node.c
+++ b/src/vnet/llc/node.c
@@ -246,7 +246,6 @@ static char *llc_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (llc_input_node) = {
.function = llc_input,
.name = "llc-input",
@@ -267,7 +266,6 @@ VLIB_REGISTER_NODE (llc_input_node) = {
.format_trace = format_llc_input_trace,
.unformat_buffer = unformat_llc_header,
};
-/* *INDENT-ON* */
static void
llc_setup_node (vlib_main_t *vm, u32 node_index)
diff --git a/src/vnet/mfib/ip4_mfib.c b/src/vnet/mfib/ip4_mfib.c
index e71b7db7e5f..2ad873f82bf 100644
--- a/src/vnet/mfib/ip4_mfib.c
+++ b/src/vnet/mfib/ip4_mfib.c
@@ -42,6 +42,18 @@ static const mfib_prefix_t ip4_specials[] =
.fp_proto = FIB_PROTOCOL_IP4,
},
};
+static const fib_route_path_t ip4_special_path =
+ {
+ .frp_proto = DPO_PROTO_IP4,
+ .frp_addr = {
+ .ip4.data_u32 = 0x0,
+ },
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_flags = FIB_ROUTE_PATH_LOCAL,
+ .frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
+ };
static u32
ip4_create_mfib_with_table_id (u32 table_id,
@@ -76,15 +88,6 @@ ip4_create_mfib_with_table_id (u32 table_id,
MFIB_RPF_ID_NONE,
MFIB_ENTRY_FLAG_DROP);
- const fib_route_path_t path = {
- .frp_proto = DPO_PROTO_IP4,
- .frp_addr = zero_addr,
- .frp_sw_if_index = ~0,
- .frp_fib_index = ~0,
- .frp_weight = 1,
- .frp_flags = FIB_ROUTE_PATH_LOCAL,
- .frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
- };
int ii;
for (ii = 0; ii < ARRAY_LEN(ip4_specials); ii++)
@@ -93,7 +96,7 @@ ip4_create_mfib_with_table_id (u32 table_id,
&ip4_specials[ii],
MFIB_SOURCE_SPECIAL,
MFIB_ENTRY_FLAG_NONE,
- &path);
+ &ip4_special_path);
}
return (mfib_table->mft_index);
@@ -113,11 +116,12 @@ ip4_mfib_table_destroy (ip4_mfib_t *mfib)
MFIB_SOURCE_DEFAULT_ROUTE);
for (ii = 0; ii < ARRAY_LEN(ip4_specials); ii++)
- {
- mfib_table_entry_delete(mfib_table->mft_index,
- &ip4_specials[ii],
- MFIB_SOURCE_SPECIAL);
- }
+ {
+ mfib_table_entry_path_remove(mfib_table->mft_index,
+ &ip4_specials[ii],
+ MFIB_SOURCE_SPECIAL,
+ &ip4_special_path);
+ }
/*
* validate no more routes.
@@ -125,6 +129,8 @@ ip4_mfib_table_destroy (ip4_mfib_t *mfib)
ASSERT(0 == mfib_table->mft_total_route_counts);
ASSERT(~0 != mfib_table->mft_table_id);
+ for (u32 i = 0; i < ARRAY_LEN (mfib->fib_entry_by_dst_address); i++)
+ hash_free (mfib->fib_entry_by_dst_address[i]);
hash_unset (ip4_main.mfib_index_by_table_id, mfib_table->mft_table_id);
pool_put(ip4_main.mfibs, mfib_table);
}
diff --git a/src/vnet/mfib/mfib_entry.c b/src/vnet/mfib/mfib_entry.c
index 1b685d68482..244dd4fb206 100644
--- a/src/vnet/mfib/mfib_entry.c
+++ b/src/vnet/mfib/mfib_entry.c
@@ -412,6 +412,8 @@ mfib_entry_src_flush (mfib_entry_src_t *msrc)
}));
hash_free(msrc->mfes_itfs);
msrc->mfes_itfs = NULL;
+ hash_free(msrc->mfes_exts);
+ msrc->mfes_exts = NULL;
fib_path_list_unlock(msrc->mfes_pl);
}
@@ -497,7 +499,7 @@ mfib_entry_alloc (u32 fib_index,
}
static inline mfib_path_ext_t *
-mfib_entry_path_ext_find (mfib_path_ext_t *exts,
+mfib_entry_path_ext_find (uword *exts,
fib_node_index_t path_index)
{
uword *p;
@@ -547,6 +549,7 @@ typedef struct mfib_entry_collect_forwarding_ctx_t_
load_balance_path_t * next_hops;
fib_forward_chain_type_t fct;
mfib_entry_src_t *msrc;
+ dpo_proto_t payload_proto;
} mfib_entry_collect_forwarding_ctx_t;
static fib_path_list_walk_rc_t
@@ -592,7 +595,8 @@ mfib_entry_src_collect_forwarding (fib_node_index_t pl_index,
nh->path_index = path_index;
nh->path_weight = fib_path_get_weight(path_index);
- fib_path_contribute_forwarding(path_index, ctx->fct, &nh->path_dpo);
+ fib_path_contribute_forwarding(path_index, ctx->fct,
+ ctx->payload_proto, &nh->path_dpo);
break;
case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
@@ -632,6 +636,7 @@ mfib_entry_stack (mfib_entry_t *mfib_entry,
.next_hops = NULL,
.fct = mfib_entry_get_default_chain_type(mfib_entry),
.msrc = msrc,
+ .payload_proto = fib_proto_to_dpo(mfib_entry->mfe_prefix.fp_proto),
};
/*
diff --git a/src/vnet/mfib/mfib_entry_src.h b/src/vnet/mfib/mfib_entry_src.h
index b85c010779c..ab3cb3ebda7 100644
--- a/src/vnet/mfib/mfib_entry_src.h
+++ b/src/vnet/mfib/mfib_entry_src.h
@@ -109,7 +109,7 @@ typedef struct mfib_entry_src_t_
/**
* Hash table of path extensions
*/
- mfib_path_ext_t *mfes_exts;
+ uword *mfes_exts;
/**
* Covering entry (if needed)
diff --git a/src/vnet/mfib/mfib_entry_src_rr.c b/src/vnet/mfib/mfib_entry_src_rr.c
index a6a1e0d8aa5..5f697a5fad1 100644
--- a/src/vnet/mfib/mfib_entry_src_rr.c
+++ b/src/vnet/mfib/mfib_entry_src_rr.c
@@ -20,8 +20,8 @@
#include <vnet/fib/fib_path_list.h>
static void
-mfib_entry_src_rr_deactiviate (mfib_entry_t *mfib_entry,
- mfib_entry_src_t *msrc)
+mfib_entry_src_rr_deactivate (mfib_entry_t *mfib_entry,
+ mfib_entry_src_t *msrc)
{
mfib_entry_t *cover;
@@ -42,8 +42,8 @@ mfib_entry_src_rr_deactiviate (mfib_entry_t *mfib_entry,
}
static void
-mfib_entry_src_rr_activiate (mfib_entry_t *mfib_entry,
- mfib_entry_src_t *msrc)
+mfib_entry_src_rr_activate (mfib_entry_t *mfib_entry,
+ mfib_entry_src_t *msrc)
{
mfib_entry_src_t *csrc;
mfib_entry_t *cover;
@@ -72,8 +72,8 @@ static mfib_src_res_t
mfib_entry_src_rr_cover_change (mfib_entry_t *mfib_entry,
mfib_entry_src_t *msrc)
{
- mfib_entry_src_rr_deactiviate(mfib_entry, msrc);
- mfib_entry_src_rr_activiate(mfib_entry, msrc);
+ mfib_entry_src_rr_deactivate(mfib_entry, msrc);
+ mfib_entry_src_rr_activate(mfib_entry, msrc);
return (MFIB_SRC_REEVALUATE);
}
@@ -87,6 +87,7 @@ mfib_entry_src_rr_cover_update (mfib_entry_t *mfib_entry,
* so there's no need to check for a new one. but we do need to
* copy down any new flags and input interfaces
*/
+ mfib_entry_src_t *csrc;
mfib_entry_t *cover;
cover = mfib_entry_get(msrc->mfes_cover);
@@ -95,6 +96,13 @@ mfib_entry_src_rr_cover_update (mfib_entry_t *mfib_entry,
msrc->mfes_itfs = cover->mfe_itfs;
msrc->mfes_rpf_id = cover->mfe_rpf_id;
+ /* The update to the cover could have removed the extensions.
+ * When a cover is removed from the table, the covereds see it first
+ * updated (to have no forwarding) and then changed
+ */
+ csrc = mfib_entry_get_best_src(cover);
+ msrc->mfes_exts = (csrc ? csrc->mfes_exts : NULL);
+
return (MFIB_SRC_REEVALUATE);
}
@@ -102,8 +110,8 @@ void
mfib_entry_src_rr_module_init (void)
{
mfib_entry_src_vft mvft = {
- .mev_activate = mfib_entry_src_rr_activiate,
- .mev_deactivate = mfib_entry_src_rr_deactiviate,
+ .mev_activate = mfib_entry_src_rr_activate,
+ .mev_deactivate = mfib_entry_src_rr_deactivate,
.mev_cover_change = mfib_entry_src_rr_cover_change,
.mev_cover_update = mfib_entry_src_rr_cover_update,
};
diff --git a/src/vnet/mfib/mfib_forward.c b/src/vnet/mfib/mfib_forward.c
index affedb0ef00..3befce041bb 100644
--- a/src/vnet/mfib/mfib_forward.c
+++ b/src/vnet/mfib/mfib_forward.c
@@ -74,7 +74,7 @@ mfib_forward_lookup_trace (vlib_main_t * vm,
t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
t0->entry_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
t0->fib_index = vec_elt (im->mfib_index_by_sw_if_index,
- vnet_buffer(b1)->sw_if_index[VLIB_RX]);
+ vnet_buffer(b0)->sw_if_index[VLIB_RX]);
}
if (b1->flags & VLIB_BUFFER_IS_TRACED)
{
diff --git a/src/vnet/mfib/mfib_itf.c b/src/vnet/mfib/mfib_itf.c
index b323d3e4a96..e65a6d733cf 100644
--- a/src/vnet/mfib/mfib_itf.c
+++ b/src/vnet/mfib/mfib_itf.c
@@ -206,10 +206,8 @@ format_mfib_itf (u8 * s, va_list * args)
if (~0 != mfib_itf->mfi_sw_if_index)
{
return (format(s, " %U: %U",
- format_vnet_sw_interface_name,
- vnm,
- vnet_get_sw_interface(vnm,
- mfib_itf->mfi_sw_if_index),
+ format_vnet_sw_if_index_name,
+ vnm, mfib_itf->mfi_sw_if_index,
format_mfib_itf_flags, mfib_itf->mfi_flags));
}
else
diff --git a/src/vnet/mfib/mfib_types.c b/src/vnet/mfib/mfib_types.c
index 19583ea18f4..755f656a7b2 100644
--- a/src/vnet/mfib/mfib_types.c
+++ b/src/vnet/mfib/mfib_types.c
@@ -253,7 +253,6 @@ mfib_show_route_flags (vlib_main_t * vm,
/*?
* This command displays the set of supported flags applicable to an MFIB route
*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (mfib_route_flags_command, static) =
{
.path = "show mfib route flags",
@@ -261,7 +260,6 @@ VLIB_CLI_COMMAND (mfib_route_flags_command, static) =
.function = mfib_show_route_flags,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
clib_error_t *
mfib_show_itf_flags (vlib_main_t * vm,
@@ -282,7 +280,6 @@ mfib_show_itf_flags (vlib_main_t * vm,
/*?
* This command displays the set of supported flags applicable to an MFIB interface
*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (mfib_itf_flags_command, static) =
{
.path = "show mfib itf flags",
@@ -290,4 +287,3 @@ VLIB_CLI_COMMAND (mfib_itf_flags_command, static) =
.function = mfib_show_itf_flags,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
diff --git a/src/vnet/misc.c b/src/vnet/misc.c
index 18d4651cff3..ea816615a50 100644
--- a/src/vnet/misc.c
+++ b/src/vnet/misc.c
@@ -56,18 +56,14 @@ vnet_local_interface_tx (vlib_main_t * vm,
return f->n_vectors;
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (vnet_local_interface_device_class) = {
.name = "local",
.tx_function = vnet_local_interface_tx,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (vnet_local_interface_hw_class,static) = {
.name = "local",
};
-/* *INDENT-ON* */
clib_error_t *
vnet_main_init (vlib_main_t * vm)
@@ -86,10 +82,12 @@ vnet_main_init (vlib_main_t * vm)
vnm->local_interface_hw_if_index = hw_if_index;
vnm->local_interface_sw_if_index = hw->sw_if_index;
+ vnm->pcap.current_filter_function =
+ vlib_is_packet_traced_default_function ();
+
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (vnet_main_init)=
{
.init_order = VLIB_INITS("vnet_interface_init",
@@ -102,7 +100,6 @@ VLIB_INIT_FUNCTION (vnet_main_init)=
"mpls_init",
"vnet_main_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/mpls/error.def b/src/vnet/mpls/error.def
deleted file mode 100644
index 9941b18baf4..00000000000
--- a/src/vnet/mpls/error.def
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * mpls_error.def: mpls errors
- *
- * Copyright (c) 2012 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-mpls_error (NONE, "no error")
-mpls_error (UNKNOWN_PROTOCOL, "unknown protocol")
-mpls_error (UNSUPPORTED_VERSION, "unsupported version")
-mpls_error (PKTS_DECAP, "MPLS input packets decapsulated")
-mpls_error (PKTS_ENCAP, "MPLS output packets encapsulated")
-mpls_error (PKTS_NEED_FRAG, "MPLS output packets needs fragmentation")
-mpls_error (NO_LABEL, "MPLS no label for fib/dst")
-mpls_error (TTL_EXPIRED, "MPLS ttl expired")
-mpls_error (S_NOT_SET, "MPLS s-bit not set")
-mpls_error (BAD_LABEL, "invalid FIB id in label")
-mpls_error (NOT_IP4, "non-ip4 packets dropped")
-mpls_error (DISALLOWED_FIB, "disallowed FIB id")
-mpls_error (NOT_ENABLED, "MPLS not enabled")
-mpls_error (DROP, "MPLS DROP DPO")
-mpls_error (PUNT, "MPLS PUNT DPO")
diff --git a/src/vnet/mpls/interface.c b/src/vnet/mpls/interface.c
index 5e80b9d0532..fd654dca891 100644
--- a/src/vnet/mpls/interface.c
+++ b/src/vnet/mpls/interface.c
@@ -22,6 +22,14 @@
#include <vnet/adj/adj_midchain.h>
#include <vnet/dpo/classify_dpo.h>
+typedef struct
+{
+ mpls_interface_state_change_function_t *function;
+ uword function_opaque;
+} mpls_interface_state_change_callback_t;
+
+/** Functions to call when interface becomes MPLS enabled/disabled. */
+static mpls_interface_state_change_callback_t *state_change_callbacks;
u8
mpls_sw_interface_is_enabled (u32 sw_if_index)
@@ -34,6 +42,17 @@ mpls_sw_interface_is_enabled (u32 sw_if_index)
return (mm->mpls_enabled_by_sw_if_index[sw_if_index]);
}
+void
+mpls_interface_state_change_add_callback (
+ mpls_interface_state_change_function_t *function, uword opaque)
+{
+ mpls_interface_state_change_callback_t cb = {
+ .function = function,
+ .function_opaque = opaque,
+ };
+ vec_add1 (state_change_callbacks, cb);
+}
+
int
mpls_sw_interface_enable_disable (mpls_main_t *mm, u32 sw_if_index,
u8 is_enable)
@@ -81,6 +100,12 @@ mpls_sw_interface_enable_disable (mpls_main_t *mm, u32 sw_if_index,
else if (hi->l3_if_count)
hi->l3_if_count--;
+ {
+ mpls_interface_state_change_callback_t *cb;
+ vec_foreach (cb, state_change_callbacks)
+ cb->function (mm, cb->function_opaque, sw_if_index, is_enable);
+ }
+
return (0);
}
diff --git a/src/vnet/mpls/mpls.api b/src/vnet/mpls/mpls.api
index 9d4ec0bf7bf..5d775dafdfc 100644
--- a/src/vnet/mpls/mpls.api
+++ b/src/vnet/mpls/mpls.api
@@ -92,6 +92,26 @@ define mpls_tunnel_details
vl_api_mpls_tunnel_t mt_tunnel;
};
+/** \brief Dump mpls enabled interface(s)
+ @param client_index - opaque cookie to identify the sender
+ @param sw_if_index - sw_if_index of a specific interface, or -1 (default)
+ to return all MPLS enabled interfaces
+*/
+define mpls_interface_dump
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index [default=0xffffffff];
+};
+
+/** \brief mpls enabled interface details
+*/
+define mpls_interface_details
+{
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+};
+
/** \brief MPLS Route Add / del route
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -212,6 +232,108 @@ autoreply define sw_interface_set_mpls_enable
bool enable [default=true];
};
+counters mpls {
+ none {
+ severity info;
+ type counter64;
+ units "packets";
+ description "no error";
+ };
+ unknown_protocol {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unknown protocol";
+ };
+ unsupported_version {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unsupported version";
+ };
+ pkts_decap {
+ severity info;
+ type counter64;
+ units "packets";
+ description "MPLS input packets decapsulated";
+ };
+ pkts_encap {
+ severity info;
+ type counter64;
+ units "packets";
+ description "MPLS output packets encapsulated";
+ };
+ pkts_need_frag {
+ severity info;
+ type counter64;
+ units "packets";
+ description "MPLS output packets needs fragmentation";
+ };
+ no_label {
+ severity error;
+ type counter64;
+ units "packets";
+ description "MPLS no label for fib/dst";
+ };
+ ttl_expired {
+ severity error;
+ type counter64;
+ units "packets";
+ description "MPLS ttl expired";
+ };
+ s_not_set {
+ severity error;
+ type counter64;
+ units "packets";
+ description "MPLS s-bit not set";
+ };
+ bad_label {
+ severity error;
+ type counter64;
+ units "packets";
+ description "invalid FIB id in label";
+ };
+ not_ip4 {
+ severity error;
+ type counter64;
+ units "packets";
+ description "non-ip4 packets dropped";
+ };
+ disallowed_fib {
+ severity error;
+ type counter64;
+ units "packets";
+ description "disallowed FIB id";
+ };
+ not_enabled {
+ severity error;
+ type counter64;
+ units "packets";
+ description "MPLS not enabled";
+ };
+ drop {
+ severity error;
+ type counter64;
+ units "packets";
+ description "MPLS DROP DPO";
+ };
+ punt {
+ severity error;
+ type counter64;
+ units "packets";
+ description "MPLS PUNT DPO";
+ };
+};
+
+paths {
+ "/err/mpls-input" "mpls";
+ "/err/mpls-output" "mpls";
+ "/err/mpls-lookup" "mpls";
+ "/err/mpls-midchain" "mpls";
+ "/err/mpls-adj-incomplete" "mpls";
+ "/err/mpls-frag" "mpls";
+};
+
/*
* Local Variables:
* eval: (c-set-style "gnu")
diff --git a/src/vnet/mpls/mpls.c b/src/vnet/mpls/mpls.c
index 4076a8980a9..7d922b003cc 100644
--- a/src/vnet/mpls/mpls.c
+++ b/src/vnet/mpls/mpls.c
@@ -370,7 +370,13 @@ done:
VLIB_CLI_COMMAND (mpls_local_label_command, static) = {
.path = "mpls local-label",
.function = vnet_mpls_local_label,
- .short_help = "mpls local-label [add|del] <label-value> [eos|non-eos] via [next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight <value>] [preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-attached] [rx-ip4 <interface>] [out-labels <value value value>]",
+ .short_help =
+ "mpls local-label [add|del] <label-value> [eos|non-eos] via "
+ "[next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight "
+ "<value>] [preference <value>] [udp-encap-id <value>] "
+ "[ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] "
+ "[mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-attached] "
+ "[rx-ip4|rx-ip6 <interface>] [out-labels <value value value>]",
};
clib_error_t *
@@ -425,17 +431,16 @@ vnet_mpls_table_cmd (vlib_main_t * vm,
}
done:
- unformat_free (line_input);
- return error;
+ vec_free (name);
+ unformat_free (line_input);
+ return error;
}
-/* *INDENT-ON* */
/*?
* This command is used to add or delete MPLS Tables. All
* Tables must be explicitly added before that can be used,
* Including the default table.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (mpls_table_command, static) = {
.path = "mpls table",
.short_help = "mpls table [add|del] <table-id>",
diff --git a/src/vnet/mpls/mpls.h b/src/vnet/mpls/mpls.h
index b4f90a13f3c..6baaaad95ba 100644
--- a/src/vnet/mpls/mpls.h
+++ b/src/vnet/mpls/mpls.h
@@ -23,22 +23,18 @@
#include <vnet/fib/fib_node.h>
#include <vnet/adj/adj.h>
-typedef enum
-{
-#define mpls_error(n,s) MPLS_ERROR_##n,
-#include <vnet/mpls/error.def>
-#undef mpls_error
- MPLS_N_ERROR,
-} mpls_error_t;
+struct mpls_main_t;
/**
* @brief Definition of a callback for receiving MPLS interface state change
* notifications
*/
-typedef void (*mpls_interface_state_change_callback_t) (u32 sw_if_index,
- u32 is_enable);
+typedef void (mpls_interface_state_change_function_t) (struct mpls_main_t *mm,
+ uword opaque,
+ u32 sw_if_index,
+ u32 is_enable);
-typedef struct
+typedef struct mpls_main_t
{
/* MPLS FIB index for each software interface */
u32 *fib_index_by_sw_if_index;
@@ -85,11 +81,14 @@ unformat_function_t unformat_mpls_unicast_label;
unformat_function_t unformat_mpls_header;
unformat_function_t unformat_pg_mpls_header;
+u8 mpls_sw_interface_is_enabled (u32 sw_if_index);
+
+void mpls_interface_state_change_add_callback (
+ mpls_interface_state_change_function_t *function, uword opaque);
+
int mpls_sw_interface_enable_disable (mpls_main_t *mm, u32 sw_if_index,
u8 is_enable);
-u8 mpls_sw_interface_is_enabled (u32 sw_if_index);
-
int mpls_dest_cmp (void *a1, void *a2);
int mpls_fib_index_cmp (void *a1, void *a2);
diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c
index 4efb61786ad..58998a6576c 100644
--- a/src/vnet/mpls/mpls_api.c
+++ b/src/vnet/mpls/mpls_api.c
@@ -199,12 +199,10 @@ vl_api_mpls_route_add_del_t_handler (vl_api_mpls_route_add_del_t * mp)
rv = mpls_route_add_del_t_handler (vnm, mp, &stats_index);
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_MPLS_ROUTE_ADD_DEL_REPLY,
({
rmp->stats_index = htonl (stats_index);
}));
- /* *INDENT-ON* */
}
void
@@ -270,13 +268,11 @@ vl_api_mpls_tunnel_add_del_t_handler (vl_api_mpls_tunnel_add_del_t * mp)
vec_free (rpaths);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_MPLS_TUNNEL_ADD_DEL_REPLY,
({
rmp->sw_if_index = ntohl(tunnel_sw_if_index);
rmp->tunnel_index = ntohl(tunnel_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -401,12 +397,58 @@ vl_api_mpls_table_dump_t_handler (vl_api_mpls_table_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (fib_table, mm->fibs)
{
send_mpls_table_details(am, reg, mp->context, fib_table);
}
- /* *INDENT-ON* */
+}
+
+static void
+send_mpls_interface_details (vpe_api_main_t *am, vl_api_registration_t *reg,
+ u32 context, const u32 sw_if_index)
+{
+ vl_api_mpls_interface_details_t *mp;
+
+ mp = vl_msg_api_alloc_zero (sizeof (*mp));
+ mp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_MPLS_INTERFACE_DETAILS);
+ mp->context = context;
+
+ mp->sw_if_index = htonl (sw_if_index);
+ vl_api_send_msg (reg, (u8 *) mp);
+}
+
+static void
+vl_api_mpls_interface_dump_t_handler (vl_api_mpls_interface_dump_t *mp)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ vl_api_registration_t *reg;
+ vnet_interface_main_t *im = &vnet_main.interface_main;
+ vnet_sw_interface_t *si;
+ u32 sw_if_index = ~0;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ if (sw_if_index == ~0)
+ {
+ pool_foreach (si, im->sw_interfaces)
+ {
+ if (mpls_sw_interface_is_enabled (si->sw_if_index))
+ {
+ send_mpls_interface_details (am, reg, mp->context,
+ si->sw_if_index);
+ }
+ }
+ }
+ else
+ {
+ if (mpls_sw_interface_is_enabled (sw_if_index))
+ {
+ send_mpls_interface_details (am, reg, mp->context, sw_if_index);
+ }
+ }
}
static void
@@ -508,7 +550,8 @@ mpls_api_hookup (vlib_main_t * vm)
/*
* Trace space for 8 MPLS encap labels
*/
- am->api_trace_cfg[VL_API_MPLS_TUNNEL_ADD_DEL].size += 8 * sizeof (u32);
+ vl_api_increase_msg_trace_size (am, VL_API_MPLS_TUNNEL_ADD_DEL,
+ 8 * sizeof (u32));
/*
* Set up the (msg_name, crc, message-id) table
diff --git a/src/vnet/mpls/mpls_features.c b/src/vnet/mpls/mpls_features.c
index 070f90a1cc6..3b535032908 100644
--- a/src/vnet/mpls/mpls_features.c
+++ b/src/vnet/mpls/mpls_features.c
@@ -16,6 +16,7 @@
*/
#include <vnet/mpls/mpls.h>
+#include <vnet/mpls/mpls.api_enum.h>
static u8 *
format_mpls_drop_trace (u8 * s, va_list * args)
diff --git a/src/vnet/mpls/mpls_input.c b/src/vnet/mpls/mpls_input.c
index 37fa1aead12..0505d9a1829 100644
--- a/src/vnet/mpls/mpls_input.c
+++ b/src/vnet/mpls/mpls_input.c
@@ -19,6 +19,7 @@
#include <vnet/pg/pg.h>
#include <vnet/mpls/mpls.h>
#include <vnet/feature/feature.h>
+#include <vnet/mpls/mpls.api_enum.h>
typedef struct {
u32 next_index;
@@ -236,12 +237,6 @@ VLIB_NODE_FN (mpls_input_node) (vlib_main_t * vm,
return mpls_input_inline (vm, node, from_frame);
}
-static char * mpls_error_strings[] = {
-#define mpls_error(n,s) s,
-#include "error.def"
-#undef mpls_error
-};
-
VLIB_REGISTER_NODE (mpls_input_node) = {
.name = "mpls-input",
/* Takes a vector of packets. */
@@ -250,7 +245,7 @@ VLIB_REGISTER_NODE (mpls_input_node) = {
.runtime_data_bytes = sizeof(mpls_input_runtime_t),
.n_errors = MPLS_N_ERROR,
- .error_strings = mpls_error_strings,
+ .error_counters = mpls_error_counters,
.n_next_nodes = MPLS_INPUT_N_NEXT,
.next_nodes = {
@@ -283,10 +278,8 @@ static clib_error_t * mpls_input_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (mpls_input_init) =
{
.runs_after = VLIB_INITS("mpls_init"),
};
-/* *INDENT-ON* */
#endif /* CLIB_MARCH_VARIANT */
diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c
index 07c5cc47198..a5ac56534a5 100644
--- a/src/vnet/mpls/mpls_lookup.c
+++ b/src/vnet/mpls/mpls_lookup.c
@@ -20,6 +20,7 @@
#include <vnet/fib/mpls_fib.h>
#include <vnet/dpo/load_balance_map.h>
#include <vnet/dpo/replicate_dpo.h>
+#include <vnet/mpls/mpls.api_enum.h>
/**
* The arc/edge from the MPLS lookup node to the MPLS replicate node
@@ -43,13 +44,13 @@ format_mpls_lookup_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
mpls_lookup_trace_t * t = va_arg (*args, mpls_lookup_trace_t *);
- s = format (s, "MPLS: next [%d], lookup fib index %d, LB index %d hash %x "
- "label %d eos %d",
- t->next_index, t->lfib_index, t->lb_index, t->hash,
- vnet_mpls_uc_get_label(
- clib_net_to_host_u32(t->label_net_byte_order)),
- vnet_mpls_uc_get_s(
- clib_net_to_host_u32(t->label_net_byte_order)));
+ s = format (
+ s,
+ "MPLS: next [%d], lookup fib index %d, LB index %d hash 0x%08x "
+ "label %d eos %d",
+ t->next_index, t->lfib_index, t->lb_index, t->hash,
+ vnet_mpls_uc_get_label (clib_net_to_host_u32 (t->label_net_byte_order)),
+ vnet_mpls_uc_get_s (clib_net_to_host_u32 (t->label_net_byte_order)));
return s;
}
@@ -454,18 +455,12 @@ VLIB_NODE_FN (mpls_lookup_node) (vlib_main_t * vm,
return from_frame->n_vectors;
}
-static char * mpls_error_strings[] = {
-#define mpls_error(n,s) s,
-#include "error.def"
-#undef mpls_error
-};
-
VLIB_REGISTER_NODE (mpls_lookup_node) = {
.name = "mpls-lookup",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = MPLS_N_ERROR,
- .error_strings = mpls_error_strings,
+ .error_counters = mpls_error_counters,
.sibling_of = "mpls-load-balance",
@@ -487,8 +482,8 @@ format_mpls_load_balance_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
mpls_load_balance_trace_t * t = va_arg (*args, mpls_load_balance_trace_t *);
- s = format (s, "MPLS: next [%d], LB index %d hash %d",
- t->next_index, t->lb_index, t->hash);
+ s = format (s, "MPLS: next [%d], LB index %d hash 0x%08x", t->next_index,
+ t->lb_index, t->hash);
return s;
}
@@ -558,75 +553,77 @@ VLIB_NODE_FN (mpls_load_balance_node) (vlib_main_t * vm,
* We don't want to use the same hash value at each level in the recursion
* graph as that would lead to polarisation
*/
- hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
- hc1 = vnet_buffer (p1)->ip.flow_hash = 0;
-
- if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
- {
- if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash))
- {
- hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1;
- }
- else
- {
- hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0);
- }
- dpo0 = load_balance_get_fwd_bucket(lb0, (hc0 & lb0->lb_n_buckets_minus_1));
- }
- else
- {
- dpo0 = load_balance_get_bucket_i (lb0, 0);
- }
- if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
- {
- if (PREDICT_TRUE (vnet_buffer(p1)->ip.flow_hash))
- {
- hc1 = vnet_buffer(p1)->ip.flow_hash = vnet_buffer(p1)->ip.flow_hash >> 1;
- }
- else
- {
- hc1 = vnet_buffer(p1)->ip.flow_hash = mpls_compute_flow_hash(mpls1, hc1);
- }
- dpo1 = load_balance_get_fwd_bucket(lb1, (hc1 & lb1->lb_n_buckets_minus_1));
- }
- else
- {
- dpo1 = load_balance_get_bucket_i (lb1, 0);
- }
-
- next0 = dpo0->dpoi_next_node;
- next1 = dpo1->dpoi_next_node;
-
- vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
- vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
-
- vlib_increment_combined_counter
- (cm, thread_index, lbi0, 1,
- vlib_buffer_length_in_chain (vm, p0));
- vlib_increment_combined_counter
- (cm, thread_index, lbi1, 1,
- vlib_buffer_length_in_chain (vm, p1));
-
- if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
- {
- mpls_load_balance_trace_t *tr = vlib_add_trace (vm, node,
- p0, sizeof (*tr));
- tr->next_index = next0;
- tr->lb_index = lbi0;
- tr->hash = hc0;
- }
- if (PREDICT_FALSE(p1->flags & VLIB_BUFFER_IS_TRACED))
- {
- mpls_load_balance_trace_t *tr = vlib_add_trace (vm, node,
- p1, sizeof (*tr));
- tr->next_index = next1;
- tr->lb_index = lbi1;
- tr->hash = hc1;
- }
-
- vlib_validate_buffer_enqueue_x2 (vm, node, next,
- to_next, n_left_to_next,
- pi0, pi1, next0, next1);
+ hc0 = hc1 = 0;
+
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ vnet_buffer (p0)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ mpls_compute_flow_hash (mpls0, lb0->lb_hash_config);
+ }
+ dpo0 = load_balance_get_fwd_bucket (
+ lb0, (hc0 & lb0->lb_n_buckets_minus_1));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+ if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
+ {
+ hc1 = vnet_buffer (p1)->ip.flow_hash =
+ vnet_buffer (p1)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc1 = vnet_buffer (p1)->ip.flow_hash =
+ mpls_compute_flow_hash (mpls1, lb1->lb_hash_config);
+ }
+ dpo1 = load_balance_get_fwd_bucket (
+ lb1, (hc1 & lb1->lb_n_buckets_minus_1));
+ }
+ else
+ {
+ dpo1 = load_balance_get_bucket_i (lb1, 0);
+ }
+
+ next0 = dpo0->dpoi_next_node;
+ next1 = dpo1->dpoi_next_node;
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+
+ vlib_increment_combined_counter (
+ cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+ vlib_increment_combined_counter (
+ cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_load_balance_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->lb_index = lbi0;
+ tr->hash = hc0;
+ }
+ if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_load_balance_trace_t *tr =
+ vlib_add_trace (vm, node, p1, sizeof (*tr));
+ tr->next_index = next1;
+ tr->lb_index = lbi1;
+ tr->hash = hc1;
+ }
+
+ vlib_validate_buffer_enqueue_x2 (
+ vm, node, next, to_next, n_left_to_next, pi0, pi1, next0, next1);
}
while (n_left_from > 0 && n_left_to_next > 0)
@@ -651,44 +648,45 @@ VLIB_NODE_FN (mpls_load_balance_node) (vlib_main_t * vm,
lb0 = load_balance_get(lbi0);
- hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
- if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
- {
- if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash))
- {
- hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1;
- }
- else
- {
- hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0);
- }
- dpo0 = load_balance_get_fwd_bucket(lb0, (hc0 & lb0->lb_n_buckets_minus_1));
- }
- else
- {
- dpo0 = load_balance_get_bucket_i (lb0, 0);
- }
-
- next0 = dpo0->dpoi_next_node;
- vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
-
- if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
- {
- mpls_load_balance_trace_t *tr = vlib_add_trace (vm, node,
- p0, sizeof (*tr));
- tr->next_index = next0;
- tr->lb_index = lbi0;
- tr->hash = hc0;
- }
-
- vlib_increment_combined_counter
- (cm, thread_index, lbi0, 1,
- vlib_buffer_length_in_chain (vm, p0));
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next,
- to_next, n_left_to_next,
- pi0, next0);
- }
+ hc0 = 0;
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ vnet_buffer (p0)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ mpls_compute_flow_hash (mpls0, lb0->lb_hash_config);
+ }
+ dpo0 = load_balance_get_fwd_bucket (
+ lb0, (hc0 & lb0->lb_n_buckets_minus_1));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_load_balance_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->lb_index = lbi0;
+ tr->hash = hc0;
+ }
+
+ vlib_increment_combined_counter (
+ cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next, to_next,
+ n_left_to_next, pi0, next0);
+ }
vlib_put_next_frame (vm, node, next, n_left_to_next);
}
diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c
index a1d2d3baa88..9c1d7316db7 100644
--- a/src/vnet/mpls/mpls_output.c
+++ b/src/vnet/mpls/mpls_output.c
@@ -20,6 +20,7 @@
#include <vnet/mpls/mpls.h>
#include <vnet/ip/ip_frag.h>
#include <vnet/adj/adj_dp.h>
+#include <vnet/mpls/mpls.api_enum.h>
typedef struct {
/* Adjacency taken. */
@@ -317,12 +318,6 @@ mpls_output_inline (vlib_main_t * vm,
return from_frame->n_vectors;
}
-static char * mpls_error_strings[] = {
-#define mpls_error(n,s) s,
-#include "error.def"
-#undef mpls_error
-};
-
VLIB_NODE_FN (mpls_output_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * from_frame)
@@ -335,7 +330,7 @@ VLIB_REGISTER_NODE (mpls_output_node) = {
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = MPLS_N_ERROR,
- .error_strings = mpls_error_strings,
+ .error_counters = mpls_error_counters,
.n_next_nodes = MPLS_OUTPUT_N_NEXT,
.next_nodes = {
@@ -357,18 +352,12 @@ VLIB_REGISTER_NODE (mpls_midchain_node) = {
.vector_size = sizeof (u32),
.n_errors = MPLS_N_ERROR,
- .error_strings = mpls_error_strings,
+ .error_counters = mpls_error_counters,
.sibling_of = "mpls-output",
.format_trace = format_mpls_output_trace,
};
-static char *mpls_frag_error_strings[] = {
-#define _(sym,string) string,
- foreach_ip_frag_error
-#undef _
-};
-
typedef struct mpls_frag_trace_t_
{
u16 pkt_size;
@@ -377,11 +366,12 @@ typedef struct mpls_frag_trace_t_
typedef enum
{
- MPLS_FRAG_NEXT_REWRITE,
- MPLS_FRAG_NEXT_REWRITE_MIDCHAIN,
- MPLS_FRAG_NEXT_ICMP_ERROR,
- MPLS_FRAG_NEXT_DROP,
- MPLS_FRAG_N_NEXT,
+ MPLS_FRAG_NEXT_REWRITE,
+ MPLS_FRAG_NEXT_REWRITE_MIDCHAIN,
+ MPLS_FRAG_NEXT_ICMP4_ERROR,
+ MPLS_FRAG_NEXT_ICMP6_ERROR,
+ MPLS_FRAG_NEXT_DROP,
+ MPLS_FRAG_N_NEXT,
} mpls_frag_next_t;
static uword
@@ -390,9 +380,7 @@ mpls_frag (vlib_main_t * vm,
vlib_frame_t * frame)
{
u32 n_left_from, next_index, * from, * to_next, n_left_to_next, *frags;
- vlib_node_runtime_t * error_node;
- error_node = vlib_node_get_runtime (vm, mpls_output_node.index);
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
@@ -410,91 +398,111 @@ mpls_frag (vlib_main_t * vm,
mpls_frag_next_t next0;
u32 pi0, adj_index0;
ip_frag_error_t error0 = IP_FRAG_ERROR_NONE;
- i16 encap_size;
- u8 is_ip4;
-
- pi0 = to_next[0] = from[0];
- p0 = vlib_get_buffer (vm, pi0);
- from += 1;
- n_left_from -= 1;
- is_ip4 = vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4;
-
- adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
- adj0 = adj_get(adj_index0);
-
- /* the size of the MPLS stack */
- encap_size = vnet_buffer(p0)->l3_hdr_offset - p0->current_data;
-
- /* IP fragmentation */
- if (is_ip4)
- error0 = ip4_frag_do_fragment (vm, pi0,
- adj0->rewrite_header.max_l3_packet_bytes,
- encap_size, &frags);
- else
- error0 = ip6_frag_do_fragment (vm, pi0,
- adj0->rewrite_header.max_l3_packet_bytes,
- encap_size, &frags);
-
- if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
- {
- mpls_frag_trace_t *tr =
- vlib_add_trace (vm, node, p0, sizeof (*tr));
- tr->mtu = adj0->rewrite_header.max_l3_packet_bytes;
- tr->pkt_size = vlib_buffer_length_in_chain(vm, p0);
- }
-
- if (PREDICT_TRUE(error0 == IP_FRAG_ERROR_NONE))
- {
- /* Free original buffer chain */
- vlib_buffer_free_one (vm, pi0); /* Free original packet */
- next0 = (IP_LOOKUP_NEXT_MIDCHAIN == adj0->lookup_next_index ?
- MPLS_FRAG_NEXT_REWRITE_MIDCHAIN :
- MPLS_FRAG_NEXT_REWRITE);
- }
- else if (is_ip4 && error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET)
- {
- icmp4_error_set_vnet_buffer (
- p0, ICMP4_destination_unreachable,
- ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
- vnet_buffer (p0)->ip_frag.mtu);
- next0 = MPLS_FRAG_NEXT_ICMP_ERROR;
- }
- else
- {
- vlib_error_count (vm, mpls_output_node.index, error0, 1);
- vec_add1 (frags, pi0); /* Get rid of the original buffer */
- next0 = MPLS_FRAG_NEXT_DROP;
- }
-
- /* Send fragments that were added in the frame */
- u32 *frag_from, frag_left;
-
- frag_from = frags;
- frag_left = vec_len (frags);
-
- while (frag_left > 0)
- {
- while (frag_left > 0 && n_left_to_next > 0)
- {
- u32 i;
- i = to_next[0] = frag_from[0];
- frag_from += 1;
- frag_left -= 1;
- to_next += 1;
- n_left_to_next -= 1;
-
- p0 = vlib_get_buffer (vm, i);
- p0->error = error_node->errors[error0];
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next, i,
- next0);
- }
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- vlib_get_next_frame (vm, node, next_index, to_next,
- n_left_to_next);
- }
- vec_reset_length (frags);
+ i16 encap_size, mtu;
+ u8 is_ip4;
+
+ pi0 = to_next[0] = from[0];
+ p0 = vlib_get_buffer (vm, pi0);
+ from += 1;
+ n_left_from -= 1;
+ is_ip4 = vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4;
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ adj0 = adj_get (adj_index0);
+
+ /* the size of the MPLS stack */
+ encap_size = vnet_buffer (p0)->l3_hdr_offset - p0->current_data;
+ mtu = adj0->rewrite_header.max_l3_packet_bytes - encap_size;
+
+ /* IP fragmentation */
+ if (is_ip4)
+ error0 = ip4_frag_do_fragment (vm, pi0, mtu, encap_size, &frags);
+ else
+ {
+ if (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
+ {
+ /* only fragment locally generated IPv6 */
+ error0 = IP_FRAG_ERROR_DONT_FRAGMENT_SET;
+ }
+ else
+ {
+ error0 =
+ ip6_frag_do_fragment (vm, pi0, mtu, encap_size, &frags);
+ }
+ }
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_frag_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->mtu = mtu;
+ tr->pkt_size = vlib_buffer_length_in_chain (vm, p0);
+ }
+
+ if (PREDICT_TRUE (error0 == IP_FRAG_ERROR_NONE))
+ {
+ /* Free original buffer chain */
+ vlib_buffer_free_one (vm, pi0);
+ next0 = (IP_LOOKUP_NEXT_MIDCHAIN == adj0->lookup_next_index ?
+ MPLS_FRAG_NEXT_REWRITE_MIDCHAIN :
+ MPLS_FRAG_NEXT_REWRITE);
+ }
+ else
+ {
+ vlib_error_count (vm, node->node_index, error0, 1);
+
+ if (error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET)
+ {
+ vlib_buffer_advance (p0, encap_size);
+ if (is_ip4)
+ {
+ icmp4_error_set_vnet_buffer (
+ p0, ICMP4_destination_unreachable,
+ ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
+ mtu);
+ next0 = MPLS_FRAG_NEXT_ICMP4_ERROR;
+ }
+ else
+ {
+ icmp6_error_set_vnet_buffer (p0, ICMP6_packet_too_big,
+ 0, mtu);
+ next0 = MPLS_FRAG_NEXT_ICMP6_ERROR;
+ }
+ }
+ else
+ {
+ next0 = MPLS_FRAG_NEXT_DROP;
+ }
+
+ /* Get rid of the original buffer */
+ vec_add1 (frags, pi0);
+ }
+
+ /* Send fragments that were added in the frame */
+ u32 *frag_from, frag_left;
+
+ frag_from = frags;
+ frag_left = vec_len (frags);
+
+ while (frag_left > 0)
+ {
+ while (frag_left > 0 && n_left_to_next > 0)
+ {
+ u32 i;
+ i = to_next[0] = frag_from[0];
+ frag_from += 1;
+ frag_left -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vlib_validate_buffer_enqueue_x1 (
+ vm, node, next_index, to_next, n_left_to_next, i, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ vec_reset_length (frags);
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
@@ -515,22 +523,21 @@ format_mpls_frag_trace (u8 * s, va_list * args)
}
VLIB_REGISTER_NODE (mpls_frag_node) = {
- .function = mpls_frag,
- .name = "mpls-frag",
- .vector_size = sizeof (u32),
- .format_trace = format_mpls_frag_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = IP_FRAG_N_ERROR,
- .error_strings = mpls_frag_error_strings,
-
- .n_next_nodes = MPLS_FRAG_N_NEXT,
- .next_nodes = {
- [MPLS_FRAG_NEXT_REWRITE] = "mpls-output",
- [MPLS_FRAG_NEXT_REWRITE_MIDCHAIN] = "mpls-midchain",
- [MPLS_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
- [MPLS_FRAG_NEXT_DROP] = "mpls-drop"
- },
+ .function = mpls_frag,
+ .name = "mpls-frag",
+ .vector_size = sizeof (u32),
+ .format_trace = format_mpls_frag_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = IP_FRAG_N_ERROR,
+ .error_counters = ip_frag_error_counters,
+
+ .n_next_nodes = MPLS_FRAG_N_NEXT,
+ .next_nodes = { [MPLS_FRAG_NEXT_REWRITE] = "mpls-output",
+ [MPLS_FRAG_NEXT_REWRITE_MIDCHAIN] = "mpls-midchain",
+ [MPLS_FRAG_NEXT_ICMP4_ERROR] = "ip4-icmp-error",
+ [MPLS_FRAG_NEXT_ICMP6_ERROR] = "ip6-icmp-error",
+ [MPLS_FRAG_NEXT_DROP] = "mpls-drop" },
};
/*
@@ -649,7 +656,7 @@ VLIB_REGISTER_NODE (mpls_adj_incomplete_node) = {
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = MPLS_N_ERROR,
- .error_strings = mpls_error_strings,
+ .error_counters = mpls_error_counters,
.n_next_nodes = MPLS_ADJ_INCOMPLETE_N_NEXT,
.next_nodes = {
diff --git a/src/vnet/mpls/mpls_tunnel.c b/src/vnet/mpls/mpls_tunnel.c
index 54458eacdf8..b03a4a57f68 100644
--- a/src/vnet/mpls/mpls_tunnel.c
+++ b/src/vnet/mpls/mpls_tunnel.c
@@ -265,10 +265,8 @@ mpls_tunnel_collect_forwarding (fib_node_index_t pl_index,
* found a matching extension. stack it to obtain the forwarding
* info for this path.
*/
- ctx->next_hops = fib_path_ext_stack(path_ext,
- ctx->fct,
- ctx->fct,
- ctx->next_hops);
+ ctx->next_hops =
+ fib_path_ext_stack (path_ext, DPO_PROTO_MPLS, ctx->fct, ctx->next_hops);
return (FIB_PATH_LIST_WALK_CONTINUE);
}
@@ -638,6 +636,7 @@ vnet_mpls_tunnel_del (u32 sw_if_index)
mt->mt_sibling_index);
dpo_reset(&mt->mt_l2_lb);
+ vnet_reset_interface_l3_output_node (vlib_get_main (), mt->mt_sw_if_index);
vnet_delete_hw_interface (vnet_get_main(), mt->mt_hw_if_index);
pool_put(mpls_tunnel_pool, mt);
@@ -685,6 +684,9 @@ vnet_mpls_tunnel_create (u8 l2_only,
if (mt->mt_flags & MPLS_TUNNEL_FLAG_L2)
vnet_set_interface_output_node (vnm, mt->mt_hw_if_index,
mpls_tunnel_tx.index);
+ else
+ vnet_set_interface_l3_output_node (vnm->vlib_main, hi->sw_if_index,
+ (u8 *) "tunnel-output");
/* Standard default MPLS tunnel MTU. */
vnet_sw_interface_set_mtu (vnm, hi->sw_if_index, 9000);
@@ -930,7 +932,12 @@ done:
VLIB_CLI_COMMAND (create_mpls_tunnel_command, static) = {
.path = "mpls tunnel",
.short_help =
- "mpls tunnel [multicast] [l2-only] via [next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight <value>] [preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-connected] [rx-ip4 <interface>] [out-labels <value value value>]",
+ "mpls tunnel [multicast] [l2-only] via [next-hop-address] "
+ "[next-hop-interface] [next-hop-table <value>] [weight <value>] "
+ "[preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table "
+ "<value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] "
+ "[resolve-via-host] [resolve-via-connected] [rx-ip4|rx-ip6 <interface>] "
+ "[out-labels <value value value>]",
.function = vnet_create_mpls_tunnel_command_fn,
};
diff --git a/src/vnet/osi/node.c b/src/vnet/osi/node.c
index 4eb3e461139..9edc354cda7 100644
--- a/src/vnet/osi/node.c
+++ b/src/vnet/osi/node.c
@@ -239,7 +239,6 @@ static char *osi_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (osi_input_node) = {
.function = osi_input,
.name = "osi-input",
@@ -260,7 +259,6 @@ VLIB_REGISTER_NODE (osi_input_node) = {
.format_trace = format_osi_input_trace,
.unformat_buffer = unformat_osi_header,
};
-/* *INDENT-ON* */
static void
osi_setup_node (vlib_main_t *vm, u32 node_index)
diff --git a/src/vnet/pg/cli.c b/src/vnet/pg/cli.c
index e57e72573f3..3f2de2604b2 100644
--- a/src/vnet/pg/cli.c
+++ b/src/vnet/pg/cli.c
@@ -47,12 +47,10 @@
/* Root of all packet generator cli commands. */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_pg_command, static) = {
.path = "packet-generator",
.short_help = "Packet generator commands",
};
-/* *INDENT-ON* */
void
pg_enable_disable (u32 stream_index, int is_enable)
@@ -63,11 +61,9 @@ pg_enable_disable (u32 stream_index, int is_enable)
if (stream_index == ~0)
{
/* No stream specified: enable/disable all streams. */
- /* *INDENT-OFF* */
pool_foreach (s, pg->streams) {
pg_stream_enable_disable (pg, s, is_enable);
}
- /* *INDENT-ON* */
}
else
{
@@ -138,23 +134,19 @@ doit:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (enable_streams_cli, static) = {
.path = "packet-generator enable-stream",
.short_help = "Enable packet generator streams",
.function = enable_disable_stream,
.function_arg = 1, /* is_enable */
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (disable_streams_cli, static) = {
.path = "packet-generator disable-stream",
.short_help = "Disable packet generator streams",
.function = enable_disable_stream,
.function_arg = 0, /* is_enable */
};
-/* *INDENT-ON* */
static u8 *
format_pg_edit_group (u8 * s, va_list * va)
@@ -210,12 +202,10 @@ format_pg_stream (u8 * s, va_list * va)
if (verbose)
{
pg_edit_group_t *g;
- /* *INDENT-OFF* */
vec_foreach (g, t->edit_groups)
{
s = format (s, "\n%U%U", format_white_space, indent, format_pg_edit_group, g);
}
- /* *INDENT-ON* */
}
return s;
@@ -244,23 +234,19 @@ show_streams (vlib_main_t * vm,
}
vlib_cli_output (vm, "%U", format_pg_stream, 0, 0);
- /* *INDENT-OFF* */
pool_foreach (s, pg->streams) {
vlib_cli_output (vm, "%U", format_pg_stream, s, verbose);
}
- /* *INDENT-ON* */
done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_streams_cli, static) = {
.path = "show packet-generator ",
.short_help = "show packet-generator [verbose]",
.function = show_streams,
};
-/* *INDENT-ON* */
static clib_error_t *
pg_pcap_read (pg_stream_t * s, char *file_name)
@@ -446,8 +432,6 @@ new_stream (vlib_main_t * vm,
{
vlib_node_t *n;
- ASSERT (s.if_id != ~0);
-
if (s.if_id != ~0)
n = vlib_get_node_by_name (vm, (u8 *) pg_interface_get_input_node (
&pg->interfaces[s.if_id]));
@@ -507,7 +491,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (new_stream_cli, static) = {
.path = "packet-generator new",
.function = new_stream,
@@ -525,7 +508,6 @@ VLIB_CLI_COMMAND (new_stream_cli, static) = {
"rate PPS rate to transfer packet data\n"
"maxframe NPKTS maximum number of packets per frame\n",
};
-/* *INDENT-ON* */
static clib_error_t *
del_stream (vlib_main_t * vm,
@@ -543,13 +525,11 @@ del_stream (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (del_stream_cli, static) = {
.path = "packet-generator delete",
.function = del_stream,
.short_help = "Delete stream with given name",
};
-/* *INDENT-ON* */
static clib_error_t *
change_stream_parameters (vlib_main_t * vm,
@@ -590,13 +570,11 @@ change_stream_parameters (vlib_main_t * vm,
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (change_stream_parameters_cli, static) = {
.path = "packet-generator configure",
.short_help = "Change packet generator stream parameters",
.function = change_stream_parameters,
};
-/* *INDENT-ON* */
static clib_error_t *
pg_capture_cmd_fn (vlib_main_t * vm,
@@ -673,13 +651,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (pg_capture_cmd, static) = {
.path = "packet-generator capture",
.short_help = "packet-generator capture <interface name> pcap <filename> [count <n>]",
.function = pg_capture_cmd_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
create_pg_if_cmd_fn (vlib_main_t * vm,
@@ -687,8 +663,9 @@ create_pg_if_cmd_fn (vlib_main_t * vm,
{
pg_main_t *pg = &pg_main;
unformat_input_t _line_input, *line_input = &_line_input;
- u32 if_id, gso_enabled = 0, gso_size = 0, coalesce_enabled = 0;
+ u32 if_id = ~0, gso_enabled = 0, gso_size = 0, coalesce_enabled = 0;
clib_error_t *error = NULL;
+ pg_interface_mode_t mode = PG_MODE_ETHERNET;
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
@@ -710,6 +687,10 @@ create_pg_if_cmd_fn (vlib_main_t * vm,
goto done;
}
}
+ else if (unformat (line_input, "mode ip4"))
+ mode = PG_MODE_IP4;
+ else if (unformat (line_input, "mode ip6"))
+ mode = PG_MODE_IP6;
else
{
error = clib_error_create ("unknown input `%U'",
@@ -719,7 +700,7 @@ create_pg_if_cmd_fn (vlib_main_t * vm,
}
pg_interface_add_or_get (pg, if_id, gso_enabled, gso_size, coalesce_enabled,
- PG_MODE_ETHERNET);
+ mode);
done:
unformat_free (line_input);
@@ -727,14 +708,13 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_pg_if_cmd, static) = {
.path = "create packet-generator",
.short_help = "create packet-generator interface <interface name>"
- " [gso-enabled gso-size <size> [coalesce-enabled]]",
+ " [gso-enabled gso-size <size> [coalesce-enabled]]"
+ " [mode <ethernet | ip4 | ip6>]",
.function = create_pg_if_cmd_fn,
};
-/* *INDENT-ON* */
/* Dummy init function so that we can be linked in. */
static clib_error_t *
diff --git a/src/vnet/pg/example.script b/src/vnet/pg/example.script
index 0e29b9ecae6..662088657bf 100644
--- a/src/vnet/pg/example.script
+++ b/src/vnet/pg/example.script
@@ -1,6 +1,6 @@
-packet-generator new {
- name x
- limit 1
- node ethernet-input
- data { IP: 1.2.3 -> 4.5.6 incrementing 100 }
+packet-generator new { \
+ name x \
+ limit 1 \
+ node ethernet-input \
+ data { IP: 1.2.3 -> 4.5.6 incrementing 100 } \
}
diff --git a/src/vnet/pg/input.c b/src/vnet/pg/input.c
index 17b7c518385..321472c4d85 100644
--- a/src/vnet/pg/input.c
+++ b/src/vnet/pg/input.c
@@ -965,7 +965,7 @@ pg_generate_fix_multi_buffer_lengths (pg_main_t * pg,
if (vec_len (unused_buffers) > 0)
{
vlib_buffer_free_no_next (vm, unused_buffers, vec_len (unused_buffers));
- _vec_len (unused_buffers) = 0;
+ vec_set_len (unused_buffers, 0);
}
}
@@ -1578,7 +1578,7 @@ fill_buffer_offload_flags (vlib_main_t *vm, u32 *buffers, u32 n_buffers,
(VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
- if (buffer_oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
+ if (buffer_oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM || gso_enabled)
oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM;
}
else if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP6))
@@ -1596,7 +1596,7 @@ fill_buffer_offload_flags (vlib_main_t *vm, u32 *buffers, u32 n_buffers,
if (l4_proto == IP_PROTOCOL_TCP)
{
- if (buffer_oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
+ if (buffer_oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM || gso_enabled)
oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
/* only set GSO flag for chained buffers */
@@ -1639,8 +1639,8 @@ pg_generate_packets (vlib_node_runtime_t * node,
pg_interface_t *pi;
int i;
- pi = pool_elt_at_index (pg->interfaces,
- pg->if_id_by_sw_if_index[s->sw_if_index[VLIB_RX]]);
+ pi = pool_elt_at_index (
+ pg->interfaces, pg->if_index_by_sw_if_index[s->sw_if_index[VLIB_RX]]);
bi0 = s->buffer_indices;
n_packets_in_fifo = pg_stream_fill (pg, s, n_packets_to_generate);
@@ -1657,7 +1657,11 @@ pg_generate_packets (vlib_node_runtime_t * node,
}
if (PREDICT_FALSE (pi->coalesce_enabled))
- vnet_gro_flow_table_schedule_node_on_dispatcher (vm, pi->flow_table);
+ {
+ vnet_hw_if_tx_queue_t txq = { 0 };
+ vnet_gro_flow_table_schedule_node_on_dispatcher (vm, &txq,
+ pi->flow_table);
+ }
while (n_packets_to_generate > 0)
{
@@ -1812,17 +1816,14 @@ pg_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
if (vlib_num_workers ())
worker_index = vlib_get_current_worker_index ();
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, pg->enabled_streams[worker_index]) {
pg_stream_t *s = vec_elt_at_index (pg->streams, i);
n_packets += pg_input_stream (node, pg, s);
}
- /* *INDENT-ON* */
return n_packets;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (pg_input_node) = {
.function = pg_input,
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
@@ -1835,7 +1836,6 @@ VLIB_REGISTER_NODE (pg_input_node) = {
/* Input node will be left disabled until a stream is active. */
.state = VLIB_NODE_STATE_DISABLED,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (pg_input_mac_filter) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1860,9 +1860,9 @@ VLIB_NODE_FN (pg_input_mac_filter) (vlib_main_t * vm,
pg_interface_t *pi;
mac_address_t in;
- pi = pool_elt_at_index
- (pg->interfaces,
- pg->if_id_by_sw_if_index[vnet_buffer (b[0])->sw_if_index[VLIB_RX]]);
+ pi = pool_elt_at_index (
+ pg->interfaces,
+ pg->if_index_by_sw_if_index[vnet_buffer (b[0])->sw_if_index[VLIB_RX]]);
eth = vlib_buffer_get_current (b[0]);
mac_address_from_bytes (&in, eth->dst_address);
@@ -1894,7 +1894,6 @@ VLIB_NODE_FN (pg_input_mac_filter) (vlib_main_t * vm,
return (frame->n_vectors);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (pg_input_mac_filter) = {
.name = "pg-input-mac-filter",
.vector_size = sizeof (u32),
@@ -1908,7 +1907,6 @@ VNET_FEATURE_INIT (pg_input_mac_filter_feat, static) = {
.arc_name = "device-input",
.node_name = "pg-input-mac-filter",
};
-/* *INDENT-ON* */
static clib_error_t *
pg_input_mac_filter_cfg (vlib_main_t * vm,
@@ -1946,13 +1944,11 @@ pg_input_mac_filter_cfg (vlib_main_t * vm,
return NULL;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (enable_streams_cli, static) = {
.path = "packet-generator mac-filter",
.short_help = "packet-generator mac-filter <INTERFACE> <on|off>",
.function = pg_input_mac_filter_cfg,
};
-/* *INDENT-ON* */
/*
diff --git a/src/vnet/pg/pg.api b/src/vnet/pg/pg.api
index 3630e0c2f0d..4f531fb1f5e 100644
--- a/src/vnet/pg/pg.api
+++ b/src/vnet/pg/pg.api
@@ -38,6 +38,8 @@ enum pg_interface_mode : u8
*/
define pg_create_interface
{
+ option deprecated;
+
u32 client_index;
u32 context;
vl_api_interface_index_t interface_id;
@@ -60,6 +62,8 @@ define pg_create_interface_v2
*/
define pg_create_interface_reply
{
+ option deprecated;
+
u32 context;
i32 retval;
vl_api_interface_index_t sw_if_index;
diff --git a/src/vnet/pg/pg.h b/src/vnet/pg/pg.h
index 963d23a8e01..6d5b25ba25a 100644
--- a/src/vnet/pg/pg.h
+++ b/src/vnet/pg/pg.h
@@ -296,7 +296,7 @@ pg_free_edit_group (pg_stream_t * s)
pg_edit_group_free (g);
clib_memset (g, 0, sizeof (g[0]));
- _vec_len (s->edit_groups) = i;
+ vec_set_len (s->edit_groups, i);
}
typedef enum pg_interface_mode_t_
@@ -349,7 +349,7 @@ typedef struct pg_main_t
/* Pool of interfaces. */
pg_interface_t *interfaces;
uword *if_index_by_if_id;
- uword *if_id_by_sw_if_index;
+ uword *if_index_by_sw_if_index;
/* Vector of buffer indices for use in pg_stream_fill_replay, per thread */
u32 **replay_buffers_by_thread;
@@ -383,7 +383,7 @@ void pg_interface_enable_disable_coalesce (pg_interface_t * pi, u8 enable,
u32 tx_node_index);
/* Find/create free packet-generator interface index. */
-u32 pg_interface_add_or_get (pg_main_t *pg, uword stream_index, u8 gso_enabled,
+u32 pg_interface_add_or_get (pg_main_t *pg, u32 stream_index, u8 gso_enabled,
u32 gso_size, u8 coalesce_enabled,
pg_interface_mode_t mode);
diff --git a/src/vnet/pg/pg_api.c b/src/vnet/pg/pg_api.c
index 468c88ee8bb..e5d0a08a527 100644
--- a/src/vnet/pg/pg_api.c
+++ b/src/vnet/pg/pg_api.c
@@ -40,12 +40,10 @@ vl_api_pg_create_interface_t_handler (vl_api_pg_create_interface_t * mp)
ntohl (mp->gso_size), 0, PG_MODE_ETHERNET);
pg_interface_t *pi = pool_elt_at_index (pg->interfaces, pg_if_id);
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_PG_CREATE_INTERFACE_REPLY,
({
rmp->sw_if_index = ntohl(pi->sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
diff --git a/src/vnet/pg/stream.c b/src/vnet/pg/stream.c
index 605567c1296..cf3d37d5e9e 100644
--- a/src/vnet/pg/stream.c
+++ b/src/vnet/pg/stream.c
@@ -171,7 +171,6 @@ pg_add_del_mac_address (vnet_hw_interface_t * hi,
return (NULL);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (pg_dev_class) = {
.name = "pg",
.tx_function = pg_output,
@@ -180,7 +179,6 @@ VNET_DEVICE_CLASS (pg_dev_class) = {
.admin_up_down_function = pg_interface_admin_up_down,
.mac_addr_add_del_function = pg_add_del_mac_address,
};
-/* *INDENT-ON* */
static u8 *
pg_build_rewrite (vnet_main_t * vnm,
@@ -197,12 +195,10 @@ pg_build_rewrite (vnet_main_t * vnm,
return (rewrite);
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (pg_interface_class,static) = {
.name = "Packet generator",
.build_rewrite = pg_build_rewrite,
};
-/* *INDENT-ON* */
static u32
pg_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags)
@@ -245,10 +241,11 @@ VNET_HW_INTERFACE_CLASS (pg_tun_hw_interface_class) = {
.build_rewrite = NULL,
//.update_adjacency = gre_update_adj,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+ .tx_hash_fn_type = VNET_HASH_FN_TYPE_IP,
};
u32
-pg_interface_add_or_get (pg_main_t *pg, uword if_id, u8 gso_enabled,
+pg_interface_add_or_get (pg_main_t *pg, u32 if_id, u8 gso_enabled,
u32 gso_size, u8 coalesce_enabled,
pg_interface_mode_t mode)
{
@@ -267,6 +264,7 @@ pg_interface_add_or_get (pg_main_t *pg, uword if_id, u8 gso_enabled,
}
else
{
+ vnet_eth_interface_registration_t eir = {};
u8 hw_addr[6];
f64 now = vlib_time_now (vm);
u32 rnd;
@@ -286,8 +284,11 @@ pg_interface_add_or_get (pg_main_t *pg, uword if_id, u8 gso_enabled,
switch (pi->mode)
{
case PG_MODE_ETHERNET:
- ethernet_register_interface (vnm, pg_dev_class.index, i, hw_addr,
- &pi->hw_if_index, pg_eth_flag_change);
+ eir.dev_class_index = pg_dev_class.index;
+ eir.dev_instance = i;
+ eir.address = hw_addr;
+ eir.cb.flag_change = pg_eth_flag_change;
+ pi->hw_if_index = vnet_eth_register_interface (vnm, &eir);
break;
case PG_MODE_IP4:
case PG_MODE_IP6:
@@ -298,7 +299,7 @@ pg_interface_add_or_get (pg_main_t *pg, uword if_id, u8 gso_enabled,
hi = vnet_get_hw_interface (vnm, pi->hw_if_index);
if (gso_enabled)
{
- hi->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO;
+ vnet_hw_if_set_caps (vnm, pi->hw_if_index, VNET_HW_IF_CAP_TCP_GSO);
pi->gso_enabled = 1;
pi->gso_size = gso_size;
if (coalesce_enabled)
@@ -310,8 +311,8 @@ pg_interface_add_or_get (pg_main_t *pg, uword if_id, u8 gso_enabled,
hash_set (pg->if_index_by_if_id, if_id, i);
- vec_validate (pg->if_id_by_sw_if_index, hi->sw_if_index);
- pg->if_id_by_sw_if_index[hi->sw_if_index] = i;
+ vec_validate (pg->if_index_by_sw_if_index, hi->sw_if_index);
+ pg->if_index_by_sw_if_index[hi->sw_if_index] = i;
if (vlib_num_workers ())
{
@@ -555,6 +556,11 @@ pg_stream_add (pg_main_t * pg, pg_stream_t * s_init)
*/
s->sw_if_index[VLIB_RX] = pi->sw_if_index;
}
+ else if (vec_len (pg->if_index_by_sw_if_index) <= s->sw_if_index[VLIB_RX])
+ {
+ vec_validate (pg->if_index_by_sw_if_index, s->sw_if_index[VLIB_RX]);
+ pg->if_index_by_sw_if_index[s->sw_if_index[VLIB_RX]] = s->pg_if_index;
+ }
/* Connect the graph. */
s->next_index = vlib_node_add_next (vm, device_input_node.index,
diff --git a/src/vnet/policer/node_funcs.c b/src/vnet/policer/node_funcs.c
index 21b9393a222..2d2252d247a 100644
--- a/src/vnet/policer/node_funcs.c
+++ b/src/vnet/policer/node_funcs.c
@@ -68,7 +68,7 @@ static char *vnet_policer_error_strings[] = {
static inline uword
vnet_policer_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
- vlib_frame_t *frame)
+ vlib_frame_t *frame, vlib_dir_t dir)
{
u32 n_left_from, *from, *to_next;
vnet_policer_next_t next_index;
@@ -120,11 +120,11 @@ vnet_policer_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
b0 = vlib_get_buffer (vm, bi0);
b1 = vlib_get_buffer (vm, bi1);
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
- sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[dir];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[dir];
- pi0 = pm->policer_index_by_sw_if_index[sw_if_index0];
- pi1 = pm->policer_index_by_sw_if_index[sw_if_index1];
+ pi0 = pm->policer_index_by_sw_if_index[dir][sw_if_index0];
+ pi1 = pm->policer_index_by_sw_if_index[dir][sw_if_index1];
act0 = vnet_policer_police (vm, b0, pi0, time_in_policer_periods,
POLICE_CONFORM /* no chaining */, true);
@@ -206,9 +206,8 @@ vnet_policer_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
b0 = vlib_get_buffer (vm, bi0);
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
-
- pi0 = pm->policer_index_by_sw_if_index[sw_if_index0];
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[dir];
+ pi0 = pm->policer_index_by_sw_if_index[dir][sw_if_index0];
act0 = vnet_policer_police (vm, b0, pi0, time_in_policer_periods,
POLICE_CONFORM /* no chaining */, true);
@@ -256,7 +255,7 @@ vnet_policer_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
VLIB_NODE_FN (policer_input_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- return vnet_policer_inline (vm, node, frame);
+ return vnet_policer_inline (vm, node, frame, VLIB_RX);
}
VLIB_REGISTER_NODE (policer_input_node) = {
@@ -279,12 +278,43 @@ VNET_FEATURE_INIT (policer_input_node, static) = {
.runs_before = VNET_FEATURES ("ethernet-input"),
};
+VLIB_NODE_FN (policer_output_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return vnet_policer_inline (vm, node, frame, VLIB_TX);
+}
+
+VLIB_REGISTER_NODE (policer_output_node) = {
+ .name = "policer-output",
+ .vector_size = sizeof (u32),
+ .format_trace = format_policer_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(vnet_policer_error_strings),
+ .error_strings = vnet_policer_error_strings,
+ .n_next_nodes = VNET_POLICER_N_NEXT,
+ .next_nodes = {
+ [VNET_POLICER_NEXT_DROP] = "error-drop",
+ [VNET_POLICER_NEXT_HANDOFF] = "policer-output-handoff",
+ },
+};
+
+VNET_FEATURE_INIT (policer_output_node, static) = {
+ .arc_name = "ip4-output",
+ .node_name = "policer-output",
+};
+
+VNET_FEATURE_INIT (policer6_output_node, static) = {
+ .arc_name = "ip6-output",
+ .node_name = "policer-output",
+};
+
static char *policer_input_handoff_error_strings[] = { "congestion drop" };
VLIB_NODE_FN (policer_input_handoff_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- return policer_handoff (vm, node, frame, vnet_policer_main.fq_index, ~0);
+ return policer_handoff (vm, node, frame, vnet_policer_main.fq_index[VLIB_RX],
+ ~0);
}
VLIB_REGISTER_NODE (policer_input_handoff_node) = {
@@ -301,6 +331,26 @@ VLIB_REGISTER_NODE (policer_input_handoff_node) = {
},
};
+VLIB_NODE_FN (policer_output_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return policer_handoff (vm, node, frame, vnet_policer_main.fq_index[VLIB_TX],
+ ~0);
+}
+
+VLIB_REGISTER_NODE (policer_output_handoff_node) = {
+ .name = "policer-output-handoff",
+ .vector_size = sizeof (u32),
+ .format_trace = format_policer_handoff_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(policer_input_handoff_error_strings),
+ .error_strings = policer_input_handoff_error_strings,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
typedef struct
{
u32 sw_if_index;
@@ -477,7 +527,7 @@ policer_classify_inline (vlib_main_t * vm,
u32 table_index0;
vnet_classify_table_t *t0;
vnet_classify_entry_t *e0;
- u64 hash0;
+ u32 hash0;
u8 *h0;
u8 act0;
@@ -487,7 +537,7 @@ policer_classify_inline (vlib_main_t * vm,
vlib_buffer_t *p1 = vlib_get_buffer (vm, from[3]);
vnet_classify_table_t *tp1;
u32 table_index1;
- u64 phash1;
+ u32 phash1;
table_index1 = vnet_buffer (p1)->l2_classify.table_index;
@@ -620,7 +670,6 @@ VLIB_NODE_FN (ip4_policer_classify_node) (vlib_main_t * vm,
POLICER_CLASSIFY_TABLE_IP4);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_policer_classify_node) = {
.name = "ip4-policer-classify",
.vector_size = sizeof (u32),
@@ -632,7 +681,6 @@ VLIB_REGISTER_NODE (ip4_policer_classify_node) = {
[POLICER_CLASSIFY_NEXT_INDEX_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip6_policer_classify_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -642,7 +690,6 @@ VLIB_NODE_FN (ip6_policer_classify_node) (vlib_main_t * vm,
POLICER_CLASSIFY_TABLE_IP6);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_policer_classify_node) = {
.name = "ip6-policer-classify",
.vector_size = sizeof (u32),
@@ -654,7 +701,6 @@ VLIB_REGISTER_NODE (ip6_policer_classify_node) = {
[POLICER_CLASSIFY_NEXT_INDEX_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (l2_policer_classify_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -663,7 +709,6 @@ VLIB_NODE_FN (l2_policer_classify_node) (vlib_main_t * vm,
return policer_classify_inline (vm, node, frame, POLICER_CLASSIFY_TABLE_L2);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_policer_classify_node) = {
.name = "l2-policer-classify",
.vector_size = sizeof (u32),
@@ -675,7 +720,6 @@ VLIB_REGISTER_NODE (l2_policer_classify_node) = {
[POLICER_CLASSIFY_NEXT_INDEX_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
static clib_error_t *
diff --git a/src/vnet/policer/police.h b/src/vnet/policer/police.h
index 5ad249ef40e..8f126e22175 100644
--- a/src/vnet/policer/police.h
+++ b/src/vnet/policer/police.h
@@ -73,8 +73,6 @@ typedef enum
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- u32 lock; // for exclusive access to the struct
-
u32 single_rate; // 1 = single rate policer, 0 = two rate policer
u32 color_aware; // for hierarchical policing
u32 scale; // power-of-2 shift amount for lower rates
@@ -93,11 +91,9 @@ typedef struct
u32 current_bucket; // MOD
u32 extended_limit;
u32 extended_bucket; // MOD
-
- u64 last_update_time; // MOD
u32 thread_index; // Tie policer to a thread, rather than lock
- u32 pad32;
-
+ u64 last_update_time; // MOD
+ u8 *name;
} policer_t;
STATIC_ASSERT_SIZEOF (policer_t, CLIB_CACHE_LINE_BYTES);
diff --git a/src/vnet/policer/police_inlines.h b/src/vnet/policer/police_inlines.h
index 6b0c0ecf725..08000b9a303 100644
--- a/src/vnet/policer/police_inlines.h
+++ b/src/vnet/policer/police_inlines.h
@@ -123,7 +123,7 @@ policer_handoff (vlib_main_t *vm, vlib_node_runtime_t *node,
u32 n_enq, n_left_from, *from;
vnet_policer_main_t *pm;
policer_t *policer;
- u32 this_thread, policer_thread;
+ u32 this_thread, policer_thread = 0;
bool single_policer_node = (policer_index != ~0);
pm = &vnet_policer_main;
diff --git a/src/vnet/policer/policer.api b/src/vnet/policer/policer.api
index a664ab0be76..a5a60b35c6b 100644
--- a/src/vnet/policer/policer.api
+++ b/src/vnet/policer/policer.api
@@ -13,7 +13,7 @@
* limitations under the License.
*/
-option version = "2.0.0";
+option version = "3.0.0";
import "vnet/interface_types.api";
import "vnet/policer/policer_types.api";
@@ -35,6 +35,16 @@ autoreply define policer_bind
bool bind_enable;
};
+autoreply define policer_bind_v2
+{
+ u32 client_index;
+ u32 context;
+
+ u32 policer_index;
+ u32 worker_index;
+ bool bind_enable;
+};
+
/** \brief policer input: Apply policer as an input feature.
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -52,6 +62,43 @@ autoreply define policer_input
bool apply;
};
+autoreply define policer_input_v2
+{
+ u32 client_index;
+ u32 context;
+
+ u32 policer_index;
+ vl_api_interface_index_t sw_if_index;
+ bool apply;
+};
+
+/** \brief policer output: Apply policer as an output feature.
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param name - policer name
+ @param sw_if_index - interface to apply the policer
+ @param apply - Apply/remove
+*/
+autoreply define policer_output
+{
+ u32 client_index;
+ u32 context;
+
+ string name[64];
+ vl_api_interface_index_t sw_if_index;
+ bool apply;
+};
+
+autoreply define policer_output_v2
+{
+ u32 client_index;
+ u32 context;
+
+ u32 policer_index;
+ vl_api_interface_index_t sw_if_index;
+ bool apply;
+};
+
/** \brief Add/del policer
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -89,6 +136,40 @@ define policer_add_del
vl_api_sse2_qos_action_t violate_action;
};
+define policer_add
+{
+ u32 client_index;
+ u32 context;
+
+ string name[64];
+ vl_api_policer_config_t infos;
+};
+
+autoreply define policer_del
+{
+ u32 client_index;
+ u32 context;
+
+ u32 policer_index;
+};
+
+autoreply define policer_update
+{
+ u32 client_index;
+ u32 context;
+
+ u32 policer_index;
+ vl_api_policer_config_t infos;
+};
+
+autoreply define policer_reset
+{
+ u32 client_index;
+ u32 context;
+
+ u32 policer_index;
+};
+
/** \brief Add/del policer response
@param context - sender context, to match reply w/ request
@param retval - return value for request
@@ -101,6 +182,13 @@ define policer_add_del_reply
u32 policer_index;
};
+define policer_add_reply
+{
+ u32 context;
+ i32 retval;
+ u32 policer_index;
+};
+
/** \brief Get list of policers
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -116,6 +204,23 @@ define policer_dump
string match_name[64];
};
+/** \brief Get list of policers
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param policer_index - index of policer in the pool, ~0 to request all
+*/
+define policer_dump_v2
+{
+ u32 client_index;
+ u32 context;
+
+ u32 policer_index;
+};
+
+service {
+ rpc policer_dump_v2 returns stream policer_details;
+};
+
/** \brief Policer operational state response.
@param context - sender context, to match reply w/ request
@param name - policer name
diff --git a/src/vnet/policer/policer.c b/src/vnet/policer/policer.c
index 516a029dcee..eb7d40a340a 100644
--- a/src/vnet/policer/policer.c
+++ b/src/vnet/policer/policer.c
@@ -49,105 +49,161 @@ vlib_combined_counter_main_t policer_counters[] = {
},
};
-clib_error_t *
-policer_add_del (vlib_main_t *vm, u8 *name, qos_pol_cfg_params_st *cfg,
- u32 *policer_index, u8 is_add)
+int
+policer_add (vlib_main_t *vm, const u8 *name, const qos_pol_cfg_params_st *cfg,
+ u32 *policer_index)
{
vnet_policer_main_t *pm = &vnet_policer_main;
policer_t test_policer;
policer_t *policer;
+ policer_t *pp;
+ qos_pol_cfg_params_st *cp;
uword *p;
u32 pi;
int rv;
+ int i;
p = hash_get_mem (pm->policer_config_by_name, name);
- if (is_add == 0)
- {
- /* free policer config and template */
- if (p == 0)
- {
- vec_free (name);
- return clib_error_return (0, "No such policer configuration");
- }
- pool_put_index (pm->configs, p[0]);
- pool_put_index (pm->policer_templates, p[0]);
- hash_unset_mem (pm->policer_config_by_name, name);
+ if (p != NULL)
+ return VNET_API_ERROR_VALUE_EXIST;
- /* free policer */
- p = hash_get_mem (pm->policer_index_by_name, name);
- if (p == 0)
- {
- vec_free (name);
- return clib_error_return (0, "No such policer");
- }
- pool_put_index (pm->policers, p[0]);
- hash_unset_mem (pm->policer_index_by_name, name);
+ /* Vet the configuration before adding it to the table */
+ rv = pol_logical_2_physical (cfg, &test_policer);
- vec_free (name);
- return 0;
- }
+ if (rv != 0)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ pool_get (pm->configs, cp);
+ pool_get_aligned (pm->policers, policer, CLIB_CACHE_LINE_BYTES);
- if (p != 0)
+ clib_memcpy (cp, cfg, sizeof (*cp));
+ clib_memcpy (policer, &test_policer, sizeof (*pp));
+
+ policer->name = format (0, "%s%c", name, 0);
+ pi = policer - pm->policers;
+
+ hash_set_mem (pm->policer_config_by_name, policer->name, cp - pm->configs);
+ hash_set_mem (pm->policer_index_by_name, policer->name, pi);
+ *policer_index = pi;
+ policer->thread_index = ~0;
+
+ for (i = 0; i < NUM_POLICE_RESULTS; i++)
{
- vec_free (name);
- return clib_error_return (0, "Policer already exists");
+ vlib_validate_combined_counter (&policer_counters[i], pi);
+ vlib_zero_combined_counter (&policer_counters[i], pi);
}
- /* Vet the configuration before adding it to the table */
- rv = pol_logical_2_physical (cfg, &test_policer);
+ return 0;
+}
+
+int
+policer_del (vlib_main_t *vm, u32 policer_index)
+{
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ policer_t *policer;
+ uword *p;
- if (rv == 0)
+ if (pool_is_free_index (pm->policers, policer_index))
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ policer = &pm->policers[policer_index];
+
+ p = hash_get_mem (pm->policer_config_by_name, policer->name);
+
+ /* free policer config */
+ if (p != NULL)
{
- policer_t *pp;
- qos_pol_cfg_params_st *cp;
- int i;
+ pool_put_index (pm->configs, p[0]);
+ hash_unset_mem (pm->policer_config_by_name, policer->name);
+ }
- pool_get (pm->configs, cp);
- pool_get (pm->policer_templates, pp);
+ /* free policer */
+ hash_unset_mem (pm->policer_index_by_name, policer->name);
+ vec_free (policer->name);
+ pool_put_index (pm->policers, policer_index);
- ASSERT (cp - pm->configs == pp - pm->policer_templates);
+ return 0;
+}
- clib_memcpy (cp, cfg, sizeof (*cp));
- clib_memcpy (pp, &test_policer, sizeof (*pp));
+int
+policer_update (vlib_main_t *vm, u32 policer_index,
+ const qos_pol_cfg_params_st *cfg)
+{
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ policer_t test_policer;
+ policer_t *policer;
+ qos_pol_cfg_params_st *cp;
+ uword *p;
+ u8 *name;
+ int rv;
+ int i;
- hash_set_mem (pm->policer_config_by_name, name, cp - pm->configs);
- pool_get_aligned (pm->policers, policer, CLIB_CACHE_LINE_BYTES);
- policer[0] = pp[0];
- pi = policer - pm->policers;
- hash_set_mem (pm->policer_index_by_name, name, pi);
- *policer_index = pi;
- policer->thread_index = ~0;
+ if (pool_is_free_index (pm->policers, policer_index))
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
- for (i = 0; i < NUM_POLICE_RESULTS; i++)
- {
- vlib_validate_combined_counter (&policer_counters[i], pi);
- vlib_zero_combined_counter (&policer_counters[i], pi);
- }
+ policer = &pm->policers[policer_index];
+
+ /* Vet the configuration before adding it to the table */
+ rv = pol_logical_2_physical (cfg, &test_policer);
+ if (rv != 0)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ p = hash_get_mem (pm->policer_config_by_name, policer->name);
+
+ if (PREDICT_TRUE (p != NULL))
+ {
+ cp = &pm->configs[p[0]];
}
else
{
- vec_free (name);
- return clib_error_return (0, "Config failed sanity check");
+ /* recover from a missing configuration */
+ pool_get (pm->configs, cp);
+ hash_set_mem (pm->policer_config_by_name, policer->name,
+ cp - pm->configs);
}
+ name = policer->name;
+
+ clib_memcpy (cp, cfg, sizeof (*cp));
+ clib_memcpy (policer, &test_policer, sizeof (*policer));
+
+ policer->name = name;
+ policer->thread_index = ~0;
+
+ for (i = 0; i < NUM_POLICE_RESULTS; i++)
+ vlib_zero_combined_counter (&policer_counters[i], policer_index);
+
return 0;
}
int
-policer_bind_worker (u8 *name, u32 worker, bool bind)
+policer_reset (vlib_main_t *vm, u32 policer_index)
{
vnet_policer_main_t *pm = &vnet_policer_main;
policer_t *policer;
- uword *p;
- p = hash_get_mem (pm->policer_index_by_name, name);
- if (p == 0)
- {
- return VNET_API_ERROR_NO_SUCH_ENTRY;
- }
+ if (pool_is_free_index (pm->policers, policer_index))
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
- policer = &pm->policers[p[0]];
+ policer = &pm->policers[policer_index];
+
+ policer->current_bucket = policer->current_limit;
+ policer->extended_bucket = policer->extended_limit;
+
+ return 0;
+}
+
+int
+policer_bind_worker (u32 policer_index, u32 worker, bool bind)
+{
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ policer_t *policer;
+
+ if (pool_is_free_index (pm->policers, policer_index))
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ policer = &pm->policers[policer_index];
if (bind)
{
@@ -166,54 +222,53 @@ policer_bind_worker (u8 *name, u32 worker, bool bind)
}
int
-policer_input (u8 *name, u32 sw_if_index, bool apply)
+policer_input (u32 policer_index, u32 sw_if_index, vlib_dir_t dir, bool apply)
{
vnet_policer_main_t *pm = &vnet_policer_main;
- policer_t *policer;
- u32 policer_index;
- uword *p;
- p = hash_get_mem (pm->policer_index_by_name, name);
- if (p == 0)
+ if (apply)
{
- return VNET_API_ERROR_NO_SUCH_ENTRY;
+ vec_validate (pm->policer_index_by_sw_if_index[dir], sw_if_index);
+ pm->policer_index_by_sw_if_index[dir][sw_if_index] = policer_index;
+ }
+ else
+ {
+ pm->policer_index_by_sw_if_index[dir][sw_if_index] = ~0;
}
- policer = &pm->policers[p[0]];
- policer_index = policer - pm->policers;
-
- if (apply)
+ if (dir == VLIB_RX)
{
- vec_validate (pm->policer_index_by_sw_if_index, sw_if_index);
- pm->policer_index_by_sw_if_index[sw_if_index] = policer_index;
+ vnet_feature_enable_disable ("device-input", "policer-input",
+ sw_if_index, apply, 0, 0);
}
else
{
- pm->policer_index_by_sw_if_index[sw_if_index] = ~0;
+ vnet_feature_enable_disable ("ip4-output", "policer-output", sw_if_index,
+ apply, 0, 0);
+ vnet_feature_enable_disable ("ip6-output", "policer-output", sw_if_index,
+ apply, 0, 0);
}
-
- vnet_feature_enable_disable ("device-input", "policer-input", sw_if_index,
- apply, 0, 0);
return 0;
}
u8 *
format_policer_instance (u8 * s, va_list * va)
{
+ vnet_policer_main_t *pm = &vnet_policer_main;
policer_t *i = va_arg (*va, policer_t *);
- uword pi = va_arg (*va, uword);
+ u32 policer_index = i - pm->policers;
int result;
vlib_counter_t counts[NUM_POLICE_RESULTS];
for (result = 0; result < NUM_POLICE_RESULTS; result++)
{
- vlib_get_combined_counter (&policer_counters[result], pi,
+ vlib_get_combined_counter (&policer_counters[result], policer_index,
&counts[result]);
}
- s = format (s, "policer at %llx: %s rate, %s color-aware\n",
- i, i->single_rate ? "single" : "dual",
- i->color_aware ? "is" : "not");
+ s =
+ format (s, "Policer at index %d: %s rate, %s color-aware\n", policer_index,
+ i->single_rate ? "single" : "dual", i->color_aware ? "is" : "not");
s = format (s, "cir %u tok/period, pir %u tok/period, scale %u\n",
i->cir_tokens_per_period, i->pir_tokens_per_period, i->scale);
s = format (s, "cur lim %u, cur bkt %u, ext lim %u, ext bkt %u\n",
@@ -465,6 +520,7 @@ unformat_policer_classify_next_index (unformat_input_t * input, va_list * va)
return 0;
p = hash_get_mem (pm->policer_index_by_name, match_name);
+ vec_free (match_name);
if (p == 0)
return 0;
@@ -503,12 +559,16 @@ static clib_error_t *
policer_add_command_fn (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd)
{
+ vnet_policer_main_t *pm = &vnet_policer_main;
qos_pol_cfg_params_st c;
unformat_input_t _line_input, *line_input = &_line_input;
- u8 is_add = 1;
u8 *name = 0;
+ uword *p;
u32 pi;
+ u32 policer_index = ~0;
+ int rv = 0;
clib_error_t *error = NULL;
+ u8 is_update = cmd->function_arg;
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
@@ -518,9 +578,9 @@ policer_add_command_fn (vlib_main_t *vm, unformat_input_t *input,
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "del"))
- is_add = 0;
- else if (unformat (line_input, "name %s", &name))
+ if (unformat (line_input, "name %s", &name))
+ ;
+ else if (is_update && unformat (line_input, "index %u", &policer_index))
;
else if (unformat (line_input, "color-aware"))
c.color_aware = 1;
@@ -536,10 +596,41 @@ policer_add_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
}
- error = policer_add_del (vm, name, &c, &pi, is_add);
+ if (is_update)
+ {
+ if (~0 == policer_index && 0 != name)
+ {
+ p = hash_get_mem (pm->policer_index_by_name, name);
+ if (p != NULL)
+ policer_index = p[0];
+ }
+
+ if (~0 != policer_index)
+ {
+ rv = policer_update (vm, policer_index, &c);
+ }
+ }
+ else
+ {
+ rv = policer_add (vm, name, &c, &pi);
+ }
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error = clib_error_return (0, "No such policer");
+ break;
+ case VNET_API_ERROR_VALUE_EXIST:
+ error = clib_error_return (0, "Policer already exists");
+ break;
+ case VNET_API_ERROR_INVALID_VALUE:
+ error = clib_error_return (0, "Config failed sanity check");
+ break;
+ }
done:
unformat_free (line_input);
+ vec_free (name);
return error;
}
@@ -550,6 +641,10 @@ policer_del_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
unformat_input_t _line_input, *line_input = &_line_input;
clib_error_t *error = NULL;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ int rv;
+ u32 policer_index = ~0;
+ uword *p;
u8 *name = 0;
/* Get a line of input. */
@@ -560,6 +655,8 @@ policer_del_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
if (unformat (line_input, "name %s", &name))
;
+ else if (unformat (line_input, "index %u", &policer_index))
+ ;
else
{
error = clib_error_return (0, "unknown input `%U'",
@@ -568,10 +665,30 @@ policer_del_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
}
- error = policer_add_del (vm, name, NULL, NULL, 0);
+ if (~0 == policer_index && 0 != name)
+ {
+ p = hash_get_mem (pm->policer_index_by_name, name);
+ if (p != NULL)
+ policer_index = p[0];
+ }
+
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (~0 != policer_index)
+ rv = policer_del (vm, policer_index);
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_INVALID_VALUE:
+ error = clib_error_return (0, "No such policer configuration");
+ break;
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error = clib_error_return (0, "No such policer");
+ break;
+ }
done:
unformat_free (line_input);
+ vec_free (name);
return error;
}
@@ -582,13 +699,14 @@ policer_bind_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
unformat_input_t _line_input, *line_input = &_line_input;
clib_error_t *error = NULL;
- u8 bind, *name = 0;
- u32 worker;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ u8 bind = 1;
+ u8 *name = 0;
+ u32 worker = ~0;
+ u32 policer_index = ~0;
+ uword *p;
int rv;
- bind = 1;
- worker = ~0;
-
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
@@ -597,6 +715,8 @@ policer_bind_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
if (unformat (line_input, "name %s", &name))
;
+ else if (unformat (line_input, "index %u", &policer_index))
+ ;
else if (unformat (line_input, "unbind"))
bind = 0;
else if (unformat (line_input, "%d", &worker))
@@ -616,7 +736,16 @@ policer_bind_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
else
{
- rv = policer_bind_worker (name, worker, bind);
+ if (~0 == policer_index && 0 != name)
+ {
+ p = hash_get_mem (pm->policer_index_by_name, name);
+ if (p != NULL)
+ policer_index = p[0];
+ }
+
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (~0 != policer_index)
+ rv = policer_bind_worker (policer_index, worker, bind);
if (rv)
error = clib_error_return (0, "failed: `%d'", rv);
@@ -624,6 +753,7 @@ policer_bind_command_fn (vlib_main_t *vm, unformat_input_t *input,
done:
unformat_free (line_input);
+ vec_free (name);
return error;
}
@@ -634,12 +764,14 @@ policer_input_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
unformat_input_t _line_input, *line_input = &_line_input;
clib_error_t *error = NULL;
- u8 apply, *name = 0;
- u32 sw_if_index;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ u8 apply = 1;
+ u8 *name = 0;
+ u32 sw_if_index = ~0;
+ u32 policer_index = ~0;
+ uword *p;
int rv;
-
- apply = 1;
- sw_if_index = ~0;
+ vlib_dir_t dir = cmd->function_arg;
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
@@ -649,6 +781,8 @@ policer_input_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
if (unformat (line_input, "name %s", &name))
;
+ else if (unformat (line_input, "index %u", &policer_index))
+ ;
else if (unformat (line_input, "unapply"))
apply = 0;
else if (unformat (line_input, "%U", unformat_vnet_sw_interface,
@@ -669,7 +803,16 @@ policer_input_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
else
{
- rv = policer_input (name, sw_if_index, apply);
+ if (~0 == policer_index && 0 != name)
+ {
+ p = hash_get_mem (pm->policer_index_by_name, name);
+ if (p != NULL)
+ policer_index = p[0];
+ }
+
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (~0 != policer_index)
+ rv = policer_input (policer_index, sw_if_index, dir, apply);
if (rv)
error = clib_error_return (0, "failed: `%d'", rv);
@@ -677,83 +820,199 @@ policer_input_command_fn (vlib_main_t *vm, unformat_input_t *input,
done:
unformat_free (line_input);
+ vec_free (name);
+
+ return error;
+}
+
+static clib_error_t *
+policer_reset_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = NULL;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ int rv;
+ u32 policer_index = ~0;
+ uword *p;
+ u8 *name = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "name %s", &name))
+ ;
+ else if (unformat (line_input, "index %u", &policer_index))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (~0 == policer_index && 0 != name)
+ {
+ p = hash_get_mem (pm->policer_index_by_name, name);
+ if (p != NULL)
+ policer_index = p[0];
+ }
+
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (~0 != policer_index)
+ rv = policer_reset (vm, policer_index);
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error = clib_error_return (0, "No such policer");
+ break;
+ }
+
+done:
+ unformat_free (line_input);
+ vec_free (name);
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (configure_policer_command, static) = {
.path = "configure policer",
- .short_help = "configure policer name <name> <params> ",
+ .short_help = "configure policer [name <name> | index <index>] [type 1r2c | "
+ "1r3c | 2r3c-2698 "
+ "| 2r3c-4115] [color-aware] [cir <cir>] [cb <cb>] [eir <eir>] "
+ "[eb <eb>] [rate kbps | pps] [round closest | up | down] "
+ "[conform-action drop | transmit | mark-and-transmit <dscp>] "
+ "[exceed-action drop | transmit | mark-and-transmit <dscp>] "
+ "[violate-action drop | transmit | mark-and-transmit <dscp>]",
.function = policer_add_command_fn,
+ .function_arg = 1
};
+
VLIB_CLI_COMMAND (policer_add_command, static) = {
.path = "policer add",
- .short_help = "policer name <name> <params> ",
+ .short_help = "policer add name <name> [type 1r2c | 1r3c | 2r3c-2698 | "
+ "2r3c-4115] [color-aware] [cir <cir>] [cb <cb>] [eir <eir>] "
+ "[eb <eb>] [rate kbps | pps] [round closest | up | down] "
+ "[conform-action drop | transmit | mark-and-transmit <dscp>] "
+ "[exceed-action drop | transmit | mark-and-transmit <dscp>] "
+ "[violate-action drop | transmit | mark-and-transmit <dscp>]",
.function = policer_add_command_fn,
+ .function_arg = 0
};
+
VLIB_CLI_COMMAND (policer_del_command, static) = {
.path = "policer del",
- .short_help = "policer del name <name> ",
+ .short_help = "policer del [name <name> | index <index>]",
.function = policer_del_command_fn,
};
+
VLIB_CLI_COMMAND (policer_bind_command, static) = {
.path = "policer bind",
- .short_help = "policer bind [unbind] name <name> <worker>",
+ .short_help = "policer bind [unbind] [name <name> | index <index>] <worker>",
.function = policer_bind_command_fn,
};
+
VLIB_CLI_COMMAND (policer_input_command, static) = {
.path = "policer input",
- .short_help = "policer input [unapply] name <name> <interfac>",
+ .short_help =
+ "policer input [unapply] [name <name> | index <index>] <interface>",
.function = policer_input_command_fn,
+ .function_arg = VLIB_RX,
+};
+
+VLIB_CLI_COMMAND (policer_output_command, static) = {
+ .path = "policer output",
+ .short_help =
+ "policer output [unapply] [name <name> | index <index>] <interface>",
+ .function = policer_input_command_fn,
+ .function_arg = VLIB_TX,
+};
+
+VLIB_CLI_COMMAND (policer_reset_command, static) = {
+ .path = "policer reset",
+ .short_help = "policer reset [name <name> | index <index>]",
+ .function = policer_reset_command_fn
};
-/* *INDENT-ON* */
static clib_error_t *
show_policer_command_fn (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
vnet_policer_main_t *pm = &vnet_policer_main;
- hash_pair_t *p;
- u32 pool_index;
- u8 *match_name = 0;
- u8 *name;
- uword *pi;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ policer_t *policer;
+ u32 policer_index = ~0;
+ u8 *name = 0;
+ uword *ci, *pi;
qos_pol_cfg_params_st *config;
- policer_t *templ;
-
- (void) unformat (input, "name %s", &match_name);
-
- /* *INDENT-OFF* */
- hash_foreach_pair (p, pm->policer_config_by_name,
- ({
- name = (u8 *) p->key;
- if (match_name == 0 || !strcmp((char *) name, (char *) match_name))
- {
- pi = hash_get_mem (pm->policer_index_by_name, name);
-
- pool_index = p->value[0];
- config = pool_elt_at_index (pm->configs, pool_index);
- templ = pool_elt_at_index (pm->policer_templates, pool_index);
- vlib_cli_output (vm, "Name \"%s\" %U ", name, format_policer_config,
- config);
- vlib_cli_output (vm, "Template %U", format_policer_instance, templ,
- pi[0]);
- vlib_cli_output (vm, "-----------");
- }
- }));
- /* *INDENT-ON* */
- return 0;
+ clib_error_t *error = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ {
+ pool_foreach (policer, pm->policers)
+ {
+ ci = hash_get_mem (pm->policer_config_by_name, policer->name);
+ config = pool_elt_at_index (pm->configs, ci[0]);
+
+ vlib_cli_output (vm, "Name \"%s\" %U ", policer->name,
+ format_policer_config, config);
+ vlib_cli_output (vm, "%U", format_policer_instance, policer);
+ vlib_cli_output (vm, "-----------");
+ }
+ return 0;
+ }
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "name %s", &name))
+ ;
+ else if (unformat (line_input, "index %u", &policer_index))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (~0 == policer_index && 0 != name)
+ {
+ pi = hash_get_mem (pm->policer_index_by_name, name);
+ if (pi != NULL)
+ policer_index = pi[0];
+ }
+
+ if (~0 == policer_index || pool_is_free_index (pm->policers, policer_index))
+ goto done;
+
+ policer = &pm->policers[policer_index];
+ ci = hash_get_mem (pm->policer_config_by_name, policer->name);
+ config = pool_elt_at_index (pm->configs, ci[0]);
+ vlib_cli_output (vm, "Name \"%s\" %U ", policer->name, format_policer_config,
+ config);
+ vlib_cli_output (vm, "%U", format_policer_instance, policer);
+ vlib_cli_output (vm, "-----------");
+
+done:
+ unformat_free (line_input);
+ vec_free (name);
+
+ return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_policer_command, static) = {
- .path = "show policer",
- .short_help = "show policer [name]",
- .function = show_policer_command_fn,
+ .path = "show policer",
+ .short_help = "show policer [name <name> | index <index>]",
+ .function = show_policer_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_policer_pools_command_fn (vlib_main_t * vm,
@@ -762,19 +1021,15 @@ show_policer_pools_command_fn (vlib_main_t * vm,
{
vnet_policer_main_t *pm = &vnet_policer_main;
- vlib_cli_output (vm, "pool sizes: configs=%d templates=%d policers=%d",
- pool_elts (pm->configs),
- pool_elts (pm->policer_templates),
- pool_elts (pm->policers));
+ vlib_cli_output (vm, "pool sizes: configs=%d policers=%d",
+ pool_elts (pm->configs), pool_elts (pm->policers));
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_policer_pools_command, static) = {
.path = "show policer pools",
.short_help = "show policer pools",
.function = show_policer_pools_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
policer_init (vlib_main_t * vm)
@@ -784,7 +1039,10 @@ policer_init (vlib_main_t * vm)
pm->vlib_main = vm;
pm->vnet_main = vnet_get_main ();
pm->log_class = vlib_log_register_class ("policer", 0);
- pm->fq_index = vlib_frame_queue_main_init (policer_input_node.index, 0);
+ pm->fq_index[VLIB_RX] =
+ vlib_frame_queue_main_init (policer_input_node.index, 0);
+ pm->fq_index[VLIB_TX] =
+ vlib_frame_queue_main_init (policer_output_node.index, 0);
pm->policer_config_by_name = hash_create_string (0, sizeof (uword));
pm->policer_index_by_name = hash_create_string (0, sizeof (uword));
diff --git a/src/vnet/policer/policer.h b/src/vnet/policer/policer.h
index 2687064bf0d..7ce7fc79d47 100644
--- a/src/vnet/policer/policer.h
+++ b/src/vnet/policer/policer.h
@@ -32,14 +32,14 @@ typedef struct
qos_pol_cfg_params_st *configs;
policer_t *policer_templates;
- /* Config by name hash */
+ /* Config by policer name hash */
uword *policer_config_by_name;
/* Policer by name hash */
uword *policer_index_by_name;
/* Policer by sw_if_index vector */
- u32 *policer_index_by_sw_if_index;
+ u32 *policer_index_by_sw_if_index[VLIB_N_RX_TX];
/* convenience */
vlib_main_t *vlib_main;
@@ -48,7 +48,7 @@ typedef struct
vlib_log_class_t log_class;
/* frame queue for thread handoff */
- u32 fq_index;
+ u32 fq_index[VLIB_N_RX_TX];
u16 msg_id_base;
} vnet_policer_main_t;
@@ -58,6 +58,7 @@ extern vnet_policer_main_t vnet_policer_main;
extern vlib_combined_counter_main_t policer_counters[];
extern vlib_node_registration_t policer_input_node;
+extern vlib_node_registration_t policer_output_node;
typedef enum
{
@@ -67,11 +68,16 @@ typedef enum
} vnet_policer_next_t;
u8 *format_policer_instance (u8 * s, va_list * va);
-clib_error_t *policer_add_del (vlib_main_t *vm, u8 *name,
- qos_pol_cfg_params_st *cfg, u32 *policer_index,
- u8 is_add);
-int policer_bind_worker (u8 *name, u32 worker, bool bind);
-int policer_input (u8 *name, u32 sw_if_index, bool apply);
+int policer_add (vlib_main_t *vm, const u8 *name,
+ const qos_pol_cfg_params_st *cfg, u32 *policer_index);
+
+int policer_update (vlib_main_t *vm, u32 policer_index,
+ const qos_pol_cfg_params_st *cfg);
+int policer_del (vlib_main_t *vm, u32 policer_index);
+int policer_reset (vlib_main_t *vm, u32 policer_index);
+int policer_bind_worker (u32 policer_index, u32 worker, bool bind);
+int policer_input (u32 policer_index, u32 sw_if_index, vlib_dir_t dir,
+ bool apply);
#endif /* __included_policer_h__ */
diff --git a/src/vnet/policer/policer.rst b/src/vnet/policer/policer.rst
new file mode 100644
index 00000000000..0e7369e373b
--- /dev/null
+++ b/src/vnet/policer/policer.rst
@@ -0,0 +1,217 @@
+.. _policer:
+
+Policing
+========
+
+VPP implements several policer types, that don't always conform
+to the related RFCs [#rfc2697]_ [#rfc2698]_ [#rfc4115]_.
+Only policers implemented in VPP will be presented, along with
+the differences they have compared to RFCs.
+
+.. contents:: :local:
+ :depth: 1
+
+
+1 rate 2 color (1r2c)
+---------------------
+
+This is the most straightforward policer. There is no RFC describing it,
+however we can found its description in many documentation [#juniper]_ [#cisco]_ .
+
+A 1r2c policer is great to classify incoming packets into two categories:
+conforming packets (said green), and violating ones (said red).
+
+Parameters
+~~~~~~~~~~
+
+To set-up such a policer, only two parameters are needed:
+
+Committed Information Rate (CIR)
+ Given in bytes per second, this parameter is the average
+ throughput allowed by the policer.
+
+ It sets the limit between conforming arriving packets (those making the
+ traffic fall below the CIR), and violating arriving packets
+ (those making the traffic exceed the CIR).
+
+Committed Burst Size (CBS)
+ It represents the size (in bytes) of a token bucket used to allow
+ some burstiness from the incoming traffic.
+
+.. figure:: /_images/policer-1r2c-bucket.png
+ :align: center
+ :scale: 25%
+
+ Figure 1: 1r2c bucket filling logic
+
+The committed token bucket (C) is filling up at CIR tokens (bytes)
+per second, up to CBS tokens. All overflowing tokens are lost.
+
+Color-Blind algorithm
+~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: /_images/policer-1r2c-blind.png
+ :align: center
+ :scale: 75%
+
+|
+
+Color-Aware algorithm
+~~~~~~~~~~~~~~~~~~~~~
+
+In online documentation, there is no trace of a color-aware 1r2c policer.
+However, VPP implementation allows such a thing.
+
+.. image:: /_images/policer-1r2c-aware.png
+ :align: center
+ :scale: 75%
+
+|
+
+
+1 rate 3 color (1r3c) RFC 2697 [#rfc2697]_
+------------------------------------------
+
+As for the `1 rate 2 color (1r2c)`_ policer, only one rate parameters is required
+to setup a 1r3c policer. However, such a policer adds another kind of packet category:
+exceeding ones (said yellow).
+
+Parameters
+~~~~~~~~~~
+
+To set-up such a policer, three parameters are needed:
+
+Committed Information Rate (CIR)
+ As in the `1 rate 2 color (1r2c)`_ policer.
+
+Committed Burst Size (CBS)
+ As in the `1 rate 2 color (1r2c)`_ policer.
+
+Excess Burst Size (EBS)
+ It represents the size (in bytes) of a second token bucket used
+ to allow an additional burstiness from the incoming traffic, when
+ traffic as been below the CIR for some time.
+
+.. figure:: /_images/policer-1r3c-bucket.png
+ :align: center
+ :scale: 25%
+
+ Figure 2: 1r3c buckets filling logic
+
+The committed token bucket (C) is filling up at CIR tokens (bytes)
+per second, up to CBS tokens. When C is full, tokens are overflowing
+into the excess token bucket (E), up to EBS tokens. Only overflowing
+tokens from E are lost.
+
+Color-Blind algorithm
+~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: /_images/policer-1r3c-blind.png
+ :align: center
+ :scale: 75%
+
+|
+
+Color-Aware algorithm
+~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: /_images/policer-1r3c-aware.png
+ :align: center
+ :scale: 75%
+
+|
+
+Notes
+~~~~~
+
+In the RFC 2697 [#rfc2697]_ describing the 1r3c policer, conforming (green) packets
+only consume tokens from the token bucket C. Whereas, in VPP, they also consume tokens from E.
+
+One way to stick to the RFC is then to set the EBS parameter to be superior to CBS, so that
+EBS - CBS corresponds to the EBS from the RFC.
+
+However, VPP does not enforce setting EBS > CBS, which could result in undesired behavior.
+
+2 rate 3 color (2r3c) RFC 2698 [#rfc2698]_
+------------------------------------------
+
+Instead of setting the limit between yellow and red packets in terms of bursts,
+as it is done by `1 rate 3 color (1r3c) RFC 2697`_ policers, two rate policers introduce
+another rate parameter to discriminate between those two kinds of packets.
+
+Parameters
+~~~~~~~~~~
+
+To set-up such a policer, four parameters are needed:
+
+Committed Information Rate (CIR)
+ As in the `1 rate 2 color (1r2c)`_ policer.
+
+Committed Burst Size (CBS)
+ As in the `1 rate 2 color (1r2c)`_ policer.
+
+Peak Information Rate (PIR)
+ Given in bytes per second, this parameter is the average
+ throughput allowed by the policer when there is a peak in
+ traffic.
+
+ It sets a second limit between exceeding arriving packets
+ (those making the traffic fall below the PIR, but above CIR),
+ and violating arriving packets (those making the traffic exceed the PIR).
+
+Peak Burst Size (PBS)
+ It represents the size (in bytes) of a second token bucket used
+ to allow an additional peak traffic.
+
+.. figure:: /_images/policer-2r3c-bucket.png
+ :align: center
+ :scale: 25%
+
+ Figure 2: 2r3c-rfc2698 buckets filling logic
+
+The committed token bucket (C) is filling up at CIR tokens (bytes)
+per second, up to CBS tokens. In the meantime, the peak token bucket (P)
+is filling up at PIR tokens per second, up to PBS. All overflowing tokens
+from C and P are lost.
+
+Color-Blind algorithm
+~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: /_images/policer-2r3c-blind.png
+ :align: center
+ :scale: 75%
+
+|
+
+Color-Aware algorithm
+~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: /_images/policer-2r3c-aware.png
+ :align: center
+ :scale: 50%
+
+|
+
+Notes
+~~~~~
+
+To have a working policer, the condition PIR >= CIR needs to hold.
+Indeed, since we assume that peak traffic should have a greater
+rate than committed ones.
+
+
+2 rate 3 color (2r3c) RFC 4115 [#rfc4115]_
+------------------------------------------
+
+The 2r3c-RFC4115 is an allowed choice by VPP. However, there is currently
+no implementation of such a policer. Hence, the only two rate policer VPP
+implements is the `2 rate 3 color (2r3c) RFC 2698`_ policer.
+
+
+.. rubric:: References:
+
+.. [#juniper] https://www.juniper.net/documentation/us/en/software/junos/traffic-mgmt-nfx/routing-policy/topics/concept/tcm-overview-cos-qfx-series-understanding.html
+.. [#cisco] https://www.cisco.com/c/en/us/td/docs/ios-xml/ios/qos_mqc/configuration/xe-16-8/qos-mqc-xe-16-8-book/qos-pkt-policing.html
+.. [#rfc2697] https://www.rfc-editor.org/rfc/rfc2697.html
+.. [#rfc2698] https://www.rfc-editor.org/rfc/rfc2698.html
+.. [#rfc4115] https://www.rfc-editor.org/rfc/rfc4115.html
diff --git a/src/vnet/policer/policer_api.c b/src/vnet/policer/policer_api.c
index 1382d17e2de..df35b472a89 100644
--- a/src/vnet/policer/policer_api.c
+++ b/src/vnet/policer/policer_api.c
@@ -35,99 +35,293 @@ static void
vl_api_policer_add_del_t_handler (vl_api_policer_add_del_t * mp)
{
vlib_main_t *vm = vlib_get_main ();
+ vnet_policer_main_t *pm = &vnet_policer_main;
vl_api_policer_add_del_reply_t *rmp;
int rv = 0;
- u8 *name = NULL;
+ uword *p;
+ char name[sizeof (mp->name) + 1];
+ qos_pol_cfg_params_st cfg;
+ u32 policer_index;
+
+ snprintf (name, sizeof (name), "%s", mp->name);
+
+ if (mp->is_add)
+ {
+ clib_memset (&cfg, 0, sizeof (cfg));
+ cfg.rfc = (qos_policer_type_en) mp->type;
+ cfg.rnd_type = (qos_round_type_en) mp->round_type;
+ cfg.rate_type = (qos_rate_type_en) mp->rate_type;
+ cfg.rb.kbps.cir_kbps = ntohl (mp->cir);
+ cfg.rb.kbps.eir_kbps = ntohl (mp->eir);
+ cfg.rb.kbps.cb_bytes = clib_net_to_host_u64 (mp->cb);
+ cfg.rb.kbps.eb_bytes = clib_net_to_host_u64 (mp->eb);
+ cfg.conform_action.action_type =
+ (qos_action_type_en) mp->conform_action.type;
+ cfg.conform_action.dscp = mp->conform_action.dscp;
+ cfg.exceed_action.action_type =
+ (qos_action_type_en) mp->exceed_action.type;
+ cfg.exceed_action.dscp = mp->exceed_action.dscp;
+ cfg.violate_action.action_type =
+ (qos_action_type_en) mp->violate_action.type;
+ cfg.violate_action.dscp = mp->violate_action.dscp;
+ cfg.color_aware = mp->color_aware;
+
+ rv = policer_add (vm, (u8 *) name, &cfg, &policer_index);
+ }
+ else
+ {
+ p = hash_get_mem (pm->policer_index_by_name, name);
+
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (p != NULL)
+ rv = policer_del (vm, p[0]);
+ }
+
+ REPLY_MACRO2 (VL_API_POLICER_ADD_DEL_REPLY, ({
+ if (rv == 0 && mp->is_add)
+ rmp->policer_index = htonl (policer_index);
+ else
+ rmp->policer_index = ~0;
+ }));
+}
+
+static_always_inline void
+policer_set_configuration (qos_pol_cfg_params_st *cfg,
+ vl_api_policer_config_t *infos)
+{
+ clib_memset (cfg, 0, sizeof (*cfg));
+ cfg->rfc = (qos_policer_type_en) infos->type;
+ cfg->rnd_type = (qos_round_type_en) infos->round_type;
+ cfg->rate_type = (qos_rate_type_en) infos->rate_type;
+ cfg->rb.kbps.cir_kbps = ntohl (infos->cir);
+ cfg->rb.kbps.eir_kbps = ntohl (infos->eir);
+ cfg->rb.kbps.cb_bytes = clib_net_to_host_u64 (infos->cb);
+ cfg->rb.kbps.eb_bytes = clib_net_to_host_u64 (infos->eb);
+ cfg->conform_action.action_type =
+ (qos_action_type_en) infos->conform_action.type;
+ cfg->conform_action.dscp = infos->conform_action.dscp;
+ cfg->exceed_action.action_type =
+ (qos_action_type_en) infos->exceed_action.type;
+ cfg->exceed_action.dscp = infos->exceed_action.dscp;
+ cfg->violate_action.action_type =
+ (qos_action_type_en) infos->violate_action.type;
+ cfg->violate_action.dscp = infos->violate_action.dscp;
+ cfg->color_aware = infos->color_aware;
+}
+
+static void
+vl_api_policer_add_t_handler (vl_api_policer_add_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_policer_add_reply_t *rmp;
+ int rv = 0;
+ char name[sizeof (mp->name) + 1];
+ qos_pol_cfg_params_st cfg;
+ u32 policer_index;
+
+ snprintf (name, sizeof (name), "%s", mp->name);
+
+ policer_set_configuration (&cfg, &mp->infos);
+
+ rv = policer_add (vm, (u8 *) name, &cfg, &policer_index);
+
+ REPLY_MACRO2 (VL_API_POLICER_ADD_REPLY, ({
+ if (rv == 0)
+ rmp->policer_index = htonl (policer_index);
+ else
+ rmp->policer_index = ~0;
+ }));
+}
+
+static void
+vl_api_policer_del_t_handler (vl_api_policer_del_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_policer_del_reply_t *rmp;
+ u32 policer_index;
+ int rv = 0;
+
+ policer_index = ntohl (mp->policer_index);
+ rv = policer_del (vm, policer_index);
+
+ REPLY_MACRO (VL_API_POLICER_DEL_REPLY);
+}
+
+static void
+vl_api_policer_update_t_handler (vl_api_policer_update_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_policer_update_reply_t *rmp;
+ int rv = 0;
qos_pol_cfg_params_st cfg;
- clib_error_t *error;
u32 policer_index;
- name = format (0, "%s", mp->name);
- vec_terminate_c_string (name);
-
- clib_memset (&cfg, 0, sizeof (cfg));
- cfg.rfc = (qos_policer_type_en) mp->type;
- cfg.rnd_type = (qos_round_type_en) mp->round_type;
- cfg.rate_type = (qos_rate_type_en) mp->rate_type;
- cfg.rb.kbps.cir_kbps = ntohl (mp->cir);
- cfg.rb.kbps.eir_kbps = ntohl (mp->eir);
- cfg.rb.kbps.cb_bytes = clib_net_to_host_u64 (mp->cb);
- cfg.rb.kbps.eb_bytes = clib_net_to_host_u64 (mp->eb);
- cfg.conform_action.action_type =
- (qos_action_type_en) mp->conform_action.type;
- cfg.conform_action.dscp = mp->conform_action.dscp;
- cfg.exceed_action.action_type = (qos_action_type_en) mp->exceed_action.type;
- cfg.exceed_action.dscp = mp->exceed_action.dscp;
- cfg.violate_action.action_type =
- (qos_action_type_en) mp->violate_action.type;
- cfg.violate_action.dscp = mp->violate_action.dscp;
-
- cfg.color_aware = mp->color_aware;
-
- error = policer_add_del (vm, name, &cfg, &policer_index, mp->is_add);
-
- if (error)
- rv = VNET_API_ERROR_UNSPECIFIED;
-
- /* *INDENT-OFF* */
- REPLY_MACRO2(VL_API_POLICER_ADD_DEL_REPLY,
- ({
- if (rv == 0 && mp->is_add)
- rmp->policer_index = ntohl(policer_index);
- else
- rmp->policer_index = ~0;
- }));
- /* *INDENT-ON* */
+ policer_set_configuration (&cfg, &mp->infos);
+
+ policer_index = ntohl (mp->policer_index);
+ rv = policer_update (vm, policer_index, &cfg);
+
+ REPLY_MACRO (VL_API_POLICER_UPDATE_REPLY);
+}
+
+static void
+vl_api_policer_reset_t_handler (vl_api_policer_reset_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_policer_reset_reply_t *rmp;
+ u32 policer_index;
+ int rv = 0;
+
+ policer_index = ntohl (mp->policer_index);
+ rv = policer_reset (vm, policer_index);
+
+ REPLY_MACRO (VL_API_POLICER_RESET_REPLY);
}
static void
vl_api_policer_bind_t_handler (vl_api_policer_bind_t *mp)
{
vl_api_policer_bind_reply_t *rmp;
- u8 *name;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ char name[sizeof (mp->name) + 1];
+ uword *p;
u32 worker_index;
u8 bind_enable;
int rv;
- name = format (0, "%s", mp->name);
- vec_terminate_c_string (name);
+ snprintf (name, sizeof (name), "%s", mp->name);
worker_index = ntohl (mp->worker_index);
bind_enable = mp->bind_enable;
- rv = policer_bind_worker (name, worker_index, bind_enable);
- vec_free (name);
+ p = hash_get_mem (pm->policer_index_by_name, name);
+
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (p != NULL)
+ rv = policer_bind_worker (p[0], worker_index, bind_enable);
+
REPLY_MACRO (VL_API_POLICER_BIND_REPLY);
}
static void
+vl_api_policer_bind_v2_t_handler (vl_api_policer_bind_v2_t *mp)
+{
+ vl_api_policer_bind_v2_reply_t *rmp;
+ u32 policer_index;
+ u32 worker_index;
+ u8 bind_enable;
+ int rv;
+
+ policer_index = ntohl (mp->policer_index);
+ worker_index = ntohl (mp->worker_index);
+ bind_enable = mp->bind_enable;
+
+ rv = policer_bind_worker (policer_index, worker_index, bind_enable);
+
+ REPLY_MACRO (VL_API_POLICER_BIND_V2_REPLY);
+}
+
+static void
vl_api_policer_input_t_handler (vl_api_policer_input_t *mp)
{
- vl_api_policer_bind_reply_t *rmp;
- u8 *name;
+ vl_api_policer_input_reply_t *rmp;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ char name[sizeof (mp->name) + 1];
+ uword *p;
u32 sw_if_index;
u8 apply;
int rv;
VALIDATE_SW_IF_INDEX (mp);
- name = format (0, "%s", mp->name);
- vec_terminate_c_string (name);
+ snprintf (name, sizeof (name), "%s", mp->name);
sw_if_index = ntohl (mp->sw_if_index);
apply = mp->apply;
- rv = policer_input (name, sw_if_index, apply);
- vec_free (name);
+ p = hash_get_mem (pm->policer_index_by_name, name);
+
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (p != NULL)
+ rv = policer_input (p[0], sw_if_index, VLIB_RX, apply);
BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_POLICER_INPUT_REPLY);
}
static void
-send_policer_details (u8 *name, qos_pol_cfg_params_st *config,
- policer_t *templ, vl_api_registration_t *reg,
- u32 context)
+vl_api_policer_input_v2_t_handler (vl_api_policer_input_v2_t *mp)
+{
+ vl_api_policer_input_v2_reply_t *rmp;
+ u32 policer_index;
+ u32 sw_if_index;
+ u8 apply;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ policer_index = ntohl (mp->policer_index);
+ sw_if_index = ntohl (mp->sw_if_index);
+ apply = mp->apply;
+
+ rv = policer_input (policer_index, sw_if_index, VLIB_RX, apply);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_POLICER_INPUT_REPLY);
+}
+
+static void
+vl_api_policer_output_t_handler (vl_api_policer_output_t *mp)
+{
+ vl_api_policer_output_reply_t *rmp;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ char name[sizeof (mp->name) + 1];
+ uword *p;
+ u32 sw_if_index;
+ u8 apply;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ snprintf (name, sizeof (name), "%s", mp->name);
+
+ sw_if_index = ntohl (mp->sw_if_index);
+ apply = mp->apply;
+
+ p = hash_get_mem (pm->policer_index_by_name, name);
+
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (p != NULL)
+ rv = policer_input (p[0], sw_if_index, VLIB_TX, apply);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_POLICER_OUTPUT_REPLY);
+}
+
+static void
+vl_api_policer_output_v2_t_handler (vl_api_policer_output_v2_t *mp)
+{
+ vl_api_policer_output_reply_t *rmp;
+ u32 policer_index;
+ u32 sw_if_index;
+ u8 apply;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ policer_index = ntohl (mp->policer_index);
+ sw_if_index = ntohl (mp->sw_if_index);
+ apply = mp->apply;
+
+ rv = policer_input (policer_index, sw_if_index, VLIB_TX, apply);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_POLICER_OUTPUT_REPLY);
+}
+
+static void
+send_policer_details (qos_pol_cfg_params_st *config, policer_t *policer,
+ vl_api_registration_t *reg, u32 context)
{
vl_api_policer_details_t *mp;
@@ -143,26 +337,27 @@ send_policer_details (u8 *name, qos_pol_cfg_params_st *config,
mp->round_type = (vl_api_sse2_qos_round_type_t) config->rnd_type;
mp->type = (vl_api_sse2_qos_policer_type_t) config->rfc;
mp->conform_action.type =
- (vl_api_sse2_qos_action_type_t) config->conform_action.action_type;
- mp->conform_action.dscp = config->conform_action.dscp;
+ (vl_api_sse2_qos_action_type_t) policer->action[POLICE_CONFORM];
+ mp->conform_action.dscp = policer->mark_dscp[POLICE_CONFORM];
mp->exceed_action.type =
- (vl_api_sse2_qos_action_type_t) config->exceed_action.action_type;
- mp->exceed_action.dscp = config->exceed_action.dscp;
+ (vl_api_sse2_qos_action_type_t) policer->action[POLICE_EXCEED];
+ mp->exceed_action.dscp = policer->mark_dscp[POLICE_EXCEED];
mp->violate_action.type =
- (vl_api_sse2_qos_action_type_t) config->violate_action.action_type;
- mp->violate_action.dscp = config->violate_action.dscp;
- mp->single_rate = templ->single_rate ? 1 : 0;
- mp->color_aware = templ->color_aware ? 1 : 0;
- mp->scale = htonl (templ->scale);
- mp->cir_tokens_per_period = htonl (templ->cir_tokens_per_period);
- mp->pir_tokens_per_period = htonl (templ->pir_tokens_per_period);
- mp->current_limit = htonl (templ->current_limit);
- mp->current_bucket = htonl (templ->current_bucket);
- mp->extended_limit = htonl (templ->extended_limit);
- mp->extended_bucket = htonl (templ->extended_bucket);
- mp->last_update_time = clib_host_to_net_u64 (templ->last_update_time);
-
- strncpy ((char *) mp->name, (char *) name, ARRAY_LEN (mp->name) - 1);
+ (vl_api_sse2_qos_action_type_t) policer->action[POLICE_VIOLATE];
+ mp->violate_action.dscp = policer->mark_dscp[POLICE_VIOLATE];
+ mp->single_rate = policer->single_rate ? 1 : 0;
+ mp->color_aware = policer->color_aware ? 1 : 0;
+ mp->scale = htonl (policer->scale);
+ mp->cir_tokens_per_period = htonl (policer->cir_tokens_per_period);
+ mp->pir_tokens_per_period = htonl (policer->pir_tokens_per_period);
+ mp->current_limit = htonl (policer->current_limit);
+ mp->current_bucket = htonl (policer->current_bucket);
+ mp->extended_limit = htonl (policer->extended_limit);
+ mp->extended_bucket = htonl (policer->extended_bucket);
+ mp->last_update_time = clib_host_to_net_u64 (policer->last_update_time);
+
+ strncpy ((char *) mp->name, (char *) policer->name,
+ ARRAY_LEN (mp->name) - 1);
vl_api_send_msg (reg, (u8 *) mp);
}
@@ -172,13 +367,11 @@ vl_api_policer_dump_t_handler (vl_api_policer_dump_t * mp)
{
vl_api_registration_t *reg;
vnet_policer_main_t *pm = &vnet_policer_main;
- hash_pair_t *hp;
- uword *p;
- u32 pool_index;
+ uword *p, *pi;
+ u32 pool_index, policer_index;
u8 *match_name = 0;
- u8 *name;
qos_pol_cfg_params_st *config;
- policer_t *templ;
+ policer_t *policer;
reg = vl_api_client_index_to_registration (mp->client_index);
if (!reg)
@@ -193,26 +386,67 @@ vl_api_policer_dump_t_handler (vl_api_policer_dump_t * mp)
if (mp->match_name_valid)
{
p = hash_get_mem (pm->policer_config_by_name, match_name);
- if (p)
+ pi = hash_get_mem (pm->policer_index_by_name, match_name);
+ if (0 == p || 0 == pi)
+ return;
+
+ pool_index = p[0];
+ policer_index = pi[0];
+ config = pool_elt_at_index (pm->configs, pool_index);
+ policer = pool_elt_at_index (pm->policers, policer_index);
+ send_policer_details (config, policer, reg, mp->context);
+ }
+ else
+ {
+ pool_foreach (policer, pm->policers)
+ {
+ p = hash_get_mem (pm->policer_config_by_name, policer->name);
+ if (0 == p)
+ continue;
+
+ pool_index = p[0];
+ config = pool_elt_at_index (pm->configs, pool_index);
+ send_policer_details (config, policer, reg, mp->context);
+ };
+ }
+}
+
+static void
+vl_api_policer_dump_v2_t_handler (vl_api_policer_dump_v2_t *mp)
+{
+ vl_api_registration_t *reg;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ qos_pol_cfg_params_st *config;
+ u32 policer_index, pool_index;
+ policer_t *policer;
+ uword *p;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ policer_index = ntohl (mp->policer_index);
+
+ if (~0 == policer_index)
+ {
+ pool_foreach (policer, pm->policers)
{
+ p = hash_get_mem (pm->policer_config_by_name, policer->name);
pool_index = p[0];
config = pool_elt_at_index (pm->configs, pool_index);
- templ = pool_elt_at_index (pm->policer_templates, pool_index);
- send_policer_details (match_name, config, templ, reg, mp->context);
- }
+ send_policer_details (config, policer, reg, mp->context);
+ };
}
else
{
- /* *INDENT-OFF* */
- hash_foreach_pair (hp, pm->policer_config_by_name,
- ({
- name = (u8 *) hp->key;
- pool_index = hp->value[0];
- config = pool_elt_at_index (pm->configs, pool_index);
- templ = pool_elt_at_index (pm->policer_templates, pool_index);
- send_policer_details(name, config, templ, reg, mp->context);
- }));
- /* *INDENT-ON* */
+ if (pool_is_free_index (pm->policers, policer_index))
+ return;
+
+ policer = &pm->policers[policer_index];
+ p = hash_get_mem (pm->policer_config_by_name, policer->name);
+ pool_index = p[0];
+ config = pool_elt_at_index (pm->configs, pool_index);
+ send_policer_details (config, policer, reg, mp->context);
}
}
diff --git a/src/vnet/policer/policer_types.api b/src/vnet/policer/policer_types.api
index 3e21b7d707c..9d4c6447f69 100644
--- a/src/vnet/policer/policer_types.api
+++ b/src/vnet/policer/policer_types.api
@@ -56,6 +56,34 @@ typedef sse2_qos_action
u8 dscp;
};
+/** \brief Policer configuration
+ @param cir - CIR
+ @param eir - EIR
+ @param cb - Committed Burst
+ @param eb - Excess or Peak Burst
+ @param rate_type - rate type
+ @param round_type - rounding type
+ @param type - policer algorithm
+ @param color_aware - 0=color-blind, 1=color-aware
+ @param conform_action - conform action
+ @param exceed_action - exceed action type
+ @param violate_action - violate action type
+*/
+typedef policer_config
+{
+ u32 cir;
+ u32 eir;
+ u64 cb;
+ u64 eb;
+ vl_api_sse2_qos_rate_type_t rate_type;
+ vl_api_sse2_qos_round_type_t round_type;
+ vl_api_sse2_qos_policer_type_t type;
+ bool color_aware;
+ vl_api_sse2_qos_action_t conform_action;
+ vl_api_sse2_qos_action_t exceed_action;
+ vl_api_sse2_qos_action_t violate_action;
+};
+
/*
* Local Variables:
* eval: (c-set-style "gnu")
diff --git a/src/vnet/policer/xlate.c b/src/vnet/policer/xlate.c
index 9c4d76fd990..bffd208716d 100644
--- a/src/vnet/policer/xlate.c
+++ b/src/vnet/policer/xlate.c
@@ -1058,7 +1058,7 @@ x86_pol_compute_hw_params (qos_pol_cfg_params_st *cfg, policer_t *hw)
* Return: Status, success or failure code.
*/
int
-pol_logical_2_physical (qos_pol_cfg_params_st *cfg, policer_t *phys)
+pol_logical_2_physical (const qos_pol_cfg_params_st *cfg, policer_t *phys)
{
int rc;
qos_pol_cfg_params_st kbps_cfg;
diff --git a/src/vnet/policer/xlate.h b/src/vnet/policer/xlate.h
index 722ac2fb777..7f6ebe7b65d 100644
--- a/src/vnet/policer/xlate.h
+++ b/src/vnet/policer/xlate.h
@@ -158,7 +158,7 @@ typedef struct qos_pol_hw_params_st_
u32 extd_bkt;
} qos_pol_hw_params_st;
-int pol_logical_2_physical (qos_pol_cfg_params_st *cfg, policer_t *phys);
+int pol_logical_2_physical (const qos_pol_cfg_params_st *cfg, policer_t *phys);
#endif /* __included_xlate_h__ */
diff --git a/src/vnet/ppp/node.c b/src/vnet/ppp/node.c
index eead2b2f0c1..fa056bfb99f 100644
--- a/src/vnet/ppp/node.c
+++ b/src/vnet/ppp/node.c
@@ -265,7 +265,6 @@ static char *ppp_error_strings[] = {
#undef ppp_error
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ppp_input_node) = {
.function = ppp_input,
.name = "ppp-input",
@@ -288,7 +287,6 @@ VLIB_REGISTER_NODE (ppp_input_node) = {
.format_trace = format_ppp_input_trace,
.unformat_buffer = unformat_ppp_header,
};
-/* *INDENT-ON* */
static clib_error_t *
ppp_input_runtime_init (vlib_main_t * vm)
diff --git a/src/vnet/ppp/ppp.c b/src/vnet/ppp/ppp.c
index b1fafa13145..8aa8504fcdd 100644
--- a/src/vnet/ppp/ppp.c
+++ b/src/vnet/ppp/ppp.c
@@ -197,7 +197,6 @@ ppp_build_rewrite (vnet_main_t * vnm,
return (rewrite);
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (ppp_hw_interface_class) = {
.name = "PPP",
.format_header = format_ppp_header_with_length,
@@ -205,7 +204,6 @@ VNET_HW_INTERFACE_CLASS (ppp_hw_interface_class) = {
.build_rewrite = ppp_build_rewrite,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
-/* *INDENT-ON* */
static void
add_protocol (ppp_main_t * pm, ppp_protocol_t protocol, char *protocol_name)
diff --git a/src/vnet/qos/qos_egress_map.c b/src/vnet/qos/qos_egress_map.c
index 7985579d3cf..43c0c55df07 100644
--- a/src/vnet/qos/qos_egress_map.c
+++ b/src/vnet/qos/qos_egress_map.c
@@ -47,13 +47,11 @@ qos_egress_map_get_id (index_t qemi)
qos_egress_map_id_t qid;
index_t qmi;
- /* *INDENT-OFF* */
hash_foreach(qid, qmi, qem_db,
({
if (qmi == qemi)
return (qid);
}));
- /* *INDENT-OFF* */
return (~0);
}
@@ -129,12 +127,10 @@ qos_egress_map_walk (qos_egress_map_walk_cb_t fn, void *c)
qos_egress_map_id_t qid;
index_t qmi;
- /* *INDENT-OFF* */
hash_foreach(qid, qmi, qem_db,
({
fn(qid, pool_elt_at_index(qem_pool, qmi), c);
}));
- /* *INDENT-OFF* */
}
static clib_error_t *
@@ -181,14 +177,12 @@ qos_egress_map_update_cli (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{qos egress map id 0 [ip][4]=4}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (qos_egress_map_update_command, static) = {
.path = "qos egress map",
.short_help = "qos egress map id %d [delete] {[SOURCE][INPUT]=OUTPUT}",
.function = qos_egress_map_update_cli,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
u8 *format_qos_egress_map (u8 * s, va_list * args)
{
@@ -239,7 +233,6 @@ VLIB_CLI_COMMAND (qos_egress_map_update_command, static) = {
{
index_t qemi;
- /* *INDENT-OFF* */
hash_foreach(map_id, qemi, qem_db,
({
vlib_cli_output (vm, " Map-ID:%d\n%U",
@@ -247,7 +240,6 @@ VLIB_CLI_COMMAND (qos_egress_map_update_command, static) = {
format_qos_egress_map,
pool_elt_at_index(qem_pool, qemi), 2);
}));
- /* *INDENT-ON* */
}
else
{
@@ -274,14 +266,12 @@ VLIB_CLI_COMMAND (qos_egress_map_update_command, static) = {
* @cliexpar
* @cliexcmd{show qos egress map}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (qos_egress_map_show_command, static) = {
.path = "show qos egress map",
.short_help = "show qos egress map id %d",
.function = qos_egress_map_show,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/qos/qos_mark.c b/src/vnet/qos/qos_mark.c
index 44bb34bd010..3817c89a009 100644
--- a/src/vnet/qos/qos_mark.c
+++ b/src/vnet/qos/qos_mark.c
@@ -187,14 +187,12 @@ qos_mark_cli (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{qos egress interface GigEthernet0/9/0 id 0 output ip}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (qos_egress_map_interface_command, static) = {
.path = "qos mark",
.short_help = "qos mark <SOURCE> <INTERFACE> id <MAP>",
.function = qos_mark_cli,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static void
qos_mark_show_one_interface (vlib_main_t * vm, u32 sw_if_index)
@@ -271,14 +269,12 @@ qos_mark_show (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{show qos egress map}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (qos_mark_show_command, static) = {
.path = "show qos mark",
.short_help = "show qos mark [interface]",
.function = qos_mark_show,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/qos/qos_mark_node.c b/src/vnet/qos/qos_mark_node.c
index f12e66b4fa0..16a487aede8 100644
--- a/src/vnet/qos/qos_mark_node.c
+++ b/src/vnet/qos/qos_mark_node.c
@@ -212,7 +212,6 @@ VLIB_NODE_FN (vlan_ip6_qos_mark_node) (vlib_main_t * vm,
return (qos_mark_inline (vm, node, frame, QOS_SOURCE_VLAN, 0));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_qos_mark_node) = {
.name = "ip4-qos-mark",
.vector_size = sizeof (u32),
@@ -330,7 +329,6 @@ VNET_FEATURE_INIT (vlan_mpls_qos_mark_node, static) = {
.runs_after = VNET_FEATURES ("mpls-qos-mark"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/qos/qos_record.c b/src/vnet/qos/qos_record.c
index d52c1442d8d..fdf79766471 100644
--- a/src/vnet/qos/qos_record.c
+++ b/src/vnet/qos/qos_record.c
@@ -203,14 +203,12 @@ qos_record_cli (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{qos record ip GigEthernet0/1/0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (qos_record_command, static) = {
.path = "qos record",
.short_help = "qos record <record-source> <INTERFACE> [disable]",
.function = qos_record_cli,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static void
qos_record_show_one_interface (vlib_main_t * vm, u32 sw_if_index)
@@ -285,14 +283,12 @@ qos_record_show (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{show qos egress map}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (qos_record_show_command, static) = {
.path = "show qos record",
.short_help = "show qos record [interface]",
.function = qos_record_show,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/qos/qos_record_node.c b/src/vnet/qos/qos_record_node.c
index 75e1421dc08..1a34891f85d 100644
--- a/src/vnet/qos/qos_record_node.c
+++ b/src/vnet/qos/qos_record_node.c
@@ -222,7 +222,6 @@ VLIB_NODE_FN (l2_ip_qos_record_node) (vlib_main_t * vm,
return (qos_record_inline (vm, node, frame, QOS_SOURCE_VLAN, 0, 1));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_qos_record_node) = {
.name = "ip4-qos-record",
.vector_size = sizeof (u32),
@@ -372,7 +371,6 @@ VLIB_REGISTER_NODE (l2_ip_qos_record_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/qos/qos_store.c b/src/vnet/qos/qos_store.c
index 1e8a53bbdfc..3424a914e35 100644
--- a/src/vnet/qos/qos_store.c
+++ b/src/vnet/qos/qos_store.c
@@ -211,14 +211,12 @@ qos_store_cli (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{qos store ip GigEthernet0/1/0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (qos_store_command, static) = {
.path = "qos store",
.short_help = "qos store <store-source> <INTERFACE> [disable]",
.function = qos_store_cli,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static void
qos_store_show_one_interface (vlib_main_t * vm, u32 sw_if_index)
@@ -295,14 +293,12 @@ qos_store_show (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{show qos egress map}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (qos_store_show_command, static) = {
.path = "show qos store",
.short_help = "show qos store [interface]",
.function = qos_store_show,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/qos/qos_store_node.c b/src/vnet/qos/qos_store_node.c
index 2273b2eac77..6a5ad24453d 100644
--- a/src/vnet/qos/qos_store_node.c
+++ b/src/vnet/qos/qos_store_node.c
@@ -121,7 +121,6 @@ VLIB_NODE_FN (ip6_qos_store_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_qos_store_node) = {
.name = "ip4-qos-store",
.vector_size = sizeof (u32),
@@ -168,7 +167,6 @@ VNET_FEATURE_INIT (ip6m_qos_store_node, static) = {
.node_name = "ip6-qos-store",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c
index 30c59e1bc84..c66548507e5 100644
--- a/src/vnet/session/application.c
+++ b/src/vnet/session/application.c
@@ -31,10 +31,12 @@ static app_main_t app_main;
static app_listener_t *
app_listener_alloc (application_t * app)
{
+ app_main_t *am = &app_main;
app_listener_t *app_listener;
- pool_get (app->listeners, app_listener);
+
+ pool_get (am->listeners, app_listener);
clib_memset (app_listener, 0, sizeof (*app_listener));
- app_listener->al_index = app_listener - app->listeners;
+ app_listener->al_index = app_listener - am->listeners;
app_listener->app_index = app->app_index;
app_listener->session_index = SESSION_INVALID_INDEX;
app_listener->local_index = SESSION_INVALID_INDEX;
@@ -43,18 +45,23 @@ app_listener_alloc (application_t * app)
}
app_listener_t *
-app_listener_get (application_t * app, u32 app_listener_index)
+app_listener_get (u32 app_listener_index)
{
- return pool_elt_at_index (app->listeners, app_listener_index);
+ app_main_t *am = &app_main;
+
+ return pool_elt_at_index (am->listeners, app_listener_index);
}
static void
app_listener_free (application_t * app, app_listener_t * app_listener)
{
+ app_main_t *am = &app_main;
+
clib_bitmap_free (app_listener->workers);
+ vec_free (app_listener->cl_listeners);
if (CLIB_DEBUG)
clib_memset (app_listener, 0xfa, sizeof (*app_listener));
- pool_put (app->listeners, app_listener);
+ pool_put (am->listeners, app_listener);
}
session_handle_t
@@ -63,24 +70,14 @@ app_listener_handle (app_listener_t * al)
return al->ls_handle;
}
-app_listener_t *
-app_listener_get_w_session (session_t * ls)
-{
- application_t *app;
-
- app = application_get_if_valid (ls->app_index);
- if (!app)
- return 0;
- return app_listener_get (app, ls->al_index);
-}
-
session_handle_t
app_listen_session_handle (session_t * ls)
{
app_listener_t *al;
- al = app_listener_get_w_session (ls);
- if (!al)
+ /* TODO(fcoras): quic session handles */
+ if (ls->al_index == SESSION_INVALID_INDEX)
return listen_session_get_handle (ls);
+ al = app_listener_get (ls->al_index);
return al->ls_handle;
}
@@ -91,7 +88,7 @@ app_listener_get_w_handle (session_handle_t handle)
ls = session_get_from_handle_if_valid (handle);
if (!ls)
return 0;
- return app_listener_get_w_session (ls);
+ return app_listener_get (ls->al_index);
}
app_listener_t *
@@ -112,7 +109,7 @@ app_listener_lookup (application_t * app, session_endpoint_cfg_t * sep_ext)
if (handle != SESSION_INVALID_HANDLE)
{
ls = listen_session_get_from_handle (handle);
- return app_listener_get_w_session (ls);
+ return app_listener_get (ls->al_index);
}
}
@@ -122,7 +119,7 @@ app_listener_lookup (application_t * app, session_endpoint_cfg_t * sep_ext)
if (handle != SESSION_INVALID_HANDLE)
{
ls = listen_session_get_from_handle (handle);
- return app_listener_get_w_session ((session_t *) ls);
+ return app_listener_get (ls->al_index);
}
/*
@@ -144,7 +141,7 @@ app_listener_lookup (application_t * app, session_endpoint_cfg_t * sep_ext)
if (handle != SESSION_INVALID_HANDLE)
{
ls = listen_session_get_from_handle (handle);
- return app_listener_get_w_session ((session_t *) ls);
+ return app_listener_get (ls->al_index);
}
}
}
@@ -181,7 +178,6 @@ app_listener_alloc_and_init (application_t * app,
local_st = session_type_from_proto_and_ip (TRANSPORT_PROTO_NONE,
sep->is_ip4);
ls = listen_session_alloc (0, local_st);
- ls->app_index = app->app_index;
ls->app_wrk_index = sep->app_wrk_index;
lh = session_handle (ls);
@@ -189,11 +185,12 @@ app_listener_alloc_and_init (application_t * app,
{
ls = session_get_from_handle (lh);
session_free (ls);
+ app_listener_free (app, app_listener);
return rv;
}
ls = session_get_from_handle (lh);
- app_listener = app_listener_get (app, al_index);
+ app_listener = app_listener_get (al_index);
app_listener->local_index = ls->session_index;
app_listener->ls_handle = lh;
ls->al_index = al_index;
@@ -212,7 +209,6 @@ app_listener_alloc_and_init (application_t * app,
* build it's own specific listening connection.
*/
ls = listen_session_alloc (0, st);
- ls->app_index = app->app_index;
ls->app_wrk_index = sep->app_wrk_index;
/* Listen pool can be reallocated if the transport is
@@ -223,10 +219,11 @@ app_listener_alloc_and_init (application_t * app,
{
ls = listen_session_get_from_handle (lh);
session_free (ls);
+ app_listener_free (app, app_listener);
return rv;
}
ls = listen_session_get_from_handle (lh);
- app_listener = app_listener_get (app, al_index);
+ app_listener = app_listener_get (al_index);
app_listener->session_index = ls->session_index;
app_listener->ls_handle = lh;
ls->al_index = al_index;
@@ -288,8 +285,9 @@ app_listener_cleanup (app_listener_t * al)
}
static app_worker_t *
-app_listener_select_worker (application_t * app, app_listener_t * al)
+app_listener_select_worker (app_listener_t *al)
{
+ application_t *app;
u32 wrk_index;
app = application_get (al->app_index);
@@ -319,6 +317,13 @@ app_listener_get_local_session (app_listener_t * al)
return listen_session_get (al->local_index);
}
+session_t *
+app_listener_get_wrk_cl_session (app_listener_t *al, u32 wrk_map_index)
+{
+ u32 si = vec_elt (al->cl_listeners, wrk_map_index);
+ return session_get (si, 0 /* listener thread */);
+}
+
static app_worker_map_t *
app_worker_map_alloc (application_t * app)
{
@@ -684,7 +689,7 @@ application_get_rx_mqs_segment (application_t *app)
{
if (application_use_private_rx_mqs ())
return &app->rx_mqs_segment;
- return session_main_get_evt_q_segment ();
+ return session_main_get_wrk_mqs_segment ();
}
void
@@ -723,6 +728,12 @@ application_get_if_valid (u32 app_index)
return pool_elt_at_index (app_main.app_pool, app_index);
}
+static int
+_null_app_tx_callback (session_t *s)
+{
+ return 0;
+}
+
static void
application_verify_cb_fns (session_cb_vft_t * cb_fns)
{
@@ -734,6 +745,8 @@ application_verify_cb_fns (session_cb_vft_t * cb_fns)
clib_warning ("No session disconnect callback function provided");
if (cb_fns->session_reset_callback == 0)
clib_warning ("No session reset callback function provided");
+ if (!cb_fns->builtin_app_tx_callback)
+ cb_fns->builtin_app_tx_callback = _null_app_tx_callback;
}
/**
@@ -747,14 +760,14 @@ application_verify_cfg (ssvm_segment_type_t st)
u8 is_valid;
if (st == SSVM_SEGMENT_MEMFD)
{
- is_valid = (session_main_get_evt_q_segment () != 0);
+ is_valid = (session_main_get_wrk_mqs_segment () != 0);
if (!is_valid)
clib_warning ("memfd seg: vpp's event qs IN binary api svm region");
return is_valid;
}
else if (st == SSVM_SEGMENT_SHM)
{
- is_valid = (session_main_get_evt_q_segment () == 0);
+ is_valid = (session_main_get_wrk_mqs_segment () == 0);
if (!is_valid)
clib_warning ("shm seg: vpp's event qs NOT IN binary api svm region");
return is_valid;
@@ -763,8 +776,8 @@ application_verify_cfg (ssvm_segment_type_t st)
return 1;
}
-static int
-application_alloc_and_init (app_init_args_t * a)
+static session_error_t
+application_alloc_and_init (app_init_args_t *a)
{
ssvm_segment_type_t seg_type = SSVM_SEGMENT_MEMFD;
segment_manager_props_t *props;
@@ -785,15 +798,15 @@ application_alloc_and_init (app_init_args_t * a)
{
clib_warning ("mq eventfds can only be used if socket transport is "
"used for binary api");
- return VNET_API_ERROR_APP_UNSUPPORTED_CFG;
+ return SESSION_E_NOSUPPORT;
}
if (!application_verify_cfg (seg_type))
- return VNET_API_ERROR_APP_UNSUPPORTED_CFG;
+ return SESSION_E_NOSUPPORT;
if (opts[APP_OPTIONS_PREALLOC_FIFO_PAIRS] &&
opts[APP_OPTIONS_PREALLOC_FIFO_HDRS])
- return VNET_API_ERROR_APP_UNSUPPORTED_CFG;
+ return SESSION_E_NOSUPPORT;
/* Check that the obvious things are properly set up */
application_verify_cb_fns (a->session_cb_vft);
@@ -819,6 +832,8 @@ application_alloc_and_init (app_init_args_t * a)
props->add_segment_size = opts[APP_OPTIONS_ADD_SEGMENT_SIZE];
props->add_segment = 1;
}
+ if (opts[APP_OPTIONS_FLAGS] & APP_OPTIONS_FLAGS_USE_HUGE_PAGE)
+ props->huge_page = 1;
if (opts[APP_OPTIONS_RX_FIFO_SIZE])
props->rx_fifo_size = opts[APP_OPTIONS_RX_FIFO_SIZE];
if (opts[APP_OPTIONS_TX_FIFO_SIZE])
@@ -872,12 +887,10 @@ application_free (application_t * app)
* Free workers
*/
- /* *INDENT-OFF* */
pool_flush (wrk_map, app->worker_maps, ({
app_wrk = app_worker_get (wrk_map->wrk_index);
app_worker_free (app_wrk);
}));
- /* *INDENT-ON* */
pool_free (app->worker_maps);
/*
@@ -920,13 +933,11 @@ application_detach_process (application_t * app, u32 api_client_index)
APP_DBG ("Detaching for app %v index %u api client index %u", app->name,
app->app_index, api_client_index);
- /* *INDENT-OFF* */
pool_foreach (wrk_map, app->worker_maps) {
app_wrk = app_worker_get (wrk_map->wrk_index);
if (app_wrk->api_client_index == api_client_index)
vec_add1 (wrks, app_wrk->wrk_index);
}
- /* *INDENT-ON* */
if (!vec_len (wrks))
{
@@ -997,12 +1008,55 @@ application_n_workers (application_t * app)
app_worker_t *
application_listener_select_worker (session_t * ls)
{
- application_t *app;
app_listener_t *al;
- app = application_get (ls->app_index);
- al = app_listener_get (app, ls->al_index);
- return app_listener_select_worker (app, al);
+ al = app_listener_get (ls->al_index);
+ return app_listener_select_worker (al);
+}
+
+always_inline u32
+app_listener_cl_flow_hash (session_dgram_hdr_t *hdr)
+{
+ u32 hash = 0;
+
+ if (hdr->is_ip4)
+ {
+ hash = clib_crc32c_u32 (hash, hdr->rmt_ip.ip4.as_u32);
+ hash = clib_crc32c_u32 (hash, hdr->lcl_ip.ip4.as_u32);
+ hash = clib_crc32c_u16 (hash, hdr->rmt_port);
+ hash = clib_crc32c_u16 (hash, hdr->lcl_port);
+ }
+ else
+ {
+ hash = clib_crc32c_u64 (hash, hdr->rmt_ip.ip6.as_u64[0]);
+ hash = clib_crc32c_u64 (hash, hdr->rmt_ip.ip6.as_u64[1]);
+ hash = clib_crc32c_u64 (hash, hdr->lcl_ip.ip6.as_u64[0]);
+ hash = clib_crc32c_u64 (hash, hdr->lcl_ip.ip6.as_u64[1]);
+ hash = clib_crc32c_u16 (hash, hdr->rmt_port);
+ hash = clib_crc32c_u16 (hash, hdr->lcl_port);
+ }
+
+ return hash;
+}
+
+session_t *
+app_listener_select_wrk_cl_session (session_t *ls, session_dgram_hdr_t *hdr)
+{
+ u32 wrk_map_index = 0;
+ app_listener_t *al;
+
+ al = app_listener_get (ls->al_index);
+ /* Crude test to check if only worker 0 is set */
+ if (al->workers[0] != 1)
+ {
+ u32 hash = app_listener_cl_flow_hash (hdr);
+ hash %= vec_len (al->workers) * sizeof (uword);
+ wrk_map_index = clib_bitmap_next_set (al->workers, hash);
+ if (wrk_map_index == ~0)
+ wrk_map_index = clib_bitmap_first_set (al->workers);
+ }
+
+ return app_listener_get_wrk_cl_session (al, wrk_map_index);
}
int
@@ -1044,8 +1098,8 @@ application_alloc_worker_and_init (application_t * app, app_worker_t ** wrk)
return 0;
}
-int
-vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a)
+session_error_t
+vnet_app_worker_add_del (vnet_app_worker_add_del_args_t *a)
{
fifo_segment_t *fs;
app_worker_map_t *wrk_map;
@@ -1056,7 +1110,7 @@ vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a)
app = application_get (a->app_index);
if (!app)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
if (a->is_add)
{
@@ -1079,11 +1133,11 @@ vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a)
{
wrk_map = app_worker_map_get (app, a->wrk_map_index);
if (!wrk_map)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
app_wrk = app_worker_get (wrk_map->wrk_index);
if (!app_wrk)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
application_api_table_del (app_wrk->api_client_index);
if (appns_sapi_enabled ())
@@ -1096,8 +1150,8 @@ vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a)
return 0;
}
-static int
-app_validate_namespace (u8 * namespace_id, u64 secret, u32 * app_ns_index)
+static session_error_t
+app_validate_namespace (u8 *namespace_id, u64 secret, u32 *app_ns_index)
{
app_namespace_t *app_ns;
if (vec_len (namespace_id) == 0)
@@ -1109,12 +1163,12 @@ app_validate_namespace (u8 * namespace_id, u64 secret, u32 * app_ns_index)
*app_ns_index = app_namespace_index_from_id (namespace_id);
if (*app_ns_index == APP_NAMESPACE_INVALID_INDEX)
- return VNET_API_ERROR_APP_INVALID_NS;
+ return SESSION_E_INVALID_NS;
app_ns = app_namespace_get (*app_ns_index);
if (!app_ns)
- return VNET_API_ERROR_APP_INVALID_NS;
+ return SESSION_E_INVALID_NS;
if (app_ns->ns_secret != secret)
- return VNET_API_ERROR_APP_WRONG_NS_SECRET;
+ return SESSION_E_WRONG_NS_SECRET;
return 0;
}
@@ -1138,8 +1192,8 @@ app_name_from_api_index (u32 api_client_index)
* to external app and a segment manager for shared memory fifo based
* communication with the external app.
*/
-int
-vnet_application_attach (vnet_app_attach_args_t * a)
+session_error_t
+vnet_application_attach (vnet_app_attach_args_t *a)
{
fifo_segment_t *fs;
application_t *app = 0;
@@ -1148,17 +1202,17 @@ vnet_application_attach (vnet_app_attach_args_t * a)
u32 app_ns_index = 0;
u8 *app_name = 0;
u64 secret;
- int rv;
+ session_error_t rv;
if (a->api_client_index != APP_INVALID_INDEX)
app = application_lookup (a->api_client_index);
else if (a->name)
app = application_lookup_name (a->name);
else
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
if (app)
- return VNET_API_ERROR_APP_ALREADY_ATTACHED;
+ return SESSION_E_APP_ATTACHED;
/* Socket api sets the name and validates namespace prior to attach */
if (!a->use_sock_api)
@@ -1212,8 +1266,8 @@ vnet_application_attach (vnet_app_attach_args_t * a)
/**
* Detach application from vpp
*/
-int
-vnet_application_detach (vnet_app_detach_args_t * a)
+session_error_t
+vnet_application_detach (vnet_app_detach_args_t *a)
{
application_t *app;
@@ -1221,7 +1275,7 @@ vnet_application_detach (vnet_app_detach_args_t * a)
if (!app)
{
clib_warning ("app not attached");
- return VNET_API_ERROR_APPLICATION_NOT_ATTACHED;
+ return SESSION_E_NOAPP;
}
app_interface_check_thread_and_barrier (vnet_application_detach, a);
@@ -1229,11 +1283,15 @@ vnet_application_detach (vnet_app_detach_args_t * a)
return 0;
}
-
static u8
-session_endpoint_in_ns (session_endpoint_t * sep)
+session_endpoint_in_ns (session_endpoint_cfg_t *sep)
{
- u8 is_lep = session_endpoint_is_local (sep);
+ u8 is_lep;
+
+ if (sep->flags & SESSION_ENDPT_CFG_F_PROXY_LISTEN)
+ return 1;
+
+ is_lep = session_endpoint_is_local ((session_endpoint_t *) sep);
if (!is_lep && sep->sw_if_index != ENDPOINT_INVALID_INDEX
&& !ip_interface_has_address (sep->sw_if_index, &sep->ip, sep->is_ip4))
{
@@ -1242,6 +1300,7 @@ session_endpoint_in_ns (session_endpoint_t * sep)
sep->is_ip4);
return 0;
}
+
return (is_lep || ip_is_local (sep->fib_index, &sep->ip, sep->is_ip4));
}
@@ -1290,8 +1349,8 @@ session_endpoint_update_for_app (session_endpoint_cfg_t * sep,
}
}
-int
-vnet_listen (vnet_listen_args_t * a)
+session_error_t
+vnet_listen (vnet_listen_args_t *a)
{
app_listener_t *app_listener;
app_worker_t *app_wrk;
@@ -1311,7 +1370,7 @@ vnet_listen (vnet_listen_args_t * a)
a->sep_ext.app_wrk_index = app_wrk->wrk_index;
session_endpoint_update_for_app (&a->sep_ext, app, 0 /* is_connect */ );
- if (!session_endpoint_in_ns (&a->sep))
+ if (!session_endpoint_in_ns (&a->sep_ext))
return SESSION_E_INVALID_NS;
/*
@@ -1344,13 +1403,13 @@ vnet_listen (vnet_listen_args_t * a)
return 0;
}
-int
-vnet_connect (vnet_connect_args_t * a)
+session_error_t
+vnet_connect (vnet_connect_args_t *a)
{
app_worker_t *client_wrk;
application_t *client;
- ASSERT (vlib_thread_is_main_w_barrier ());
+ ASSERT (session_vlib_thread_is_cl_thread ());
if (session_endpoint_is_zero (&a->sep))
return SESSION_E_INVALID_RMT_IP;
@@ -1368,7 +1427,7 @@ vnet_connect (vnet_connect_args_t * a)
*/
if (application_has_local_scope (client))
{
- int rv;
+ session_error_t rv;
a->sep_ext.original_tp = a->sep_ext.transport_proto;
a->sep_ext.transport_proto = TRANSPORT_PROTO_NONE;
@@ -1383,8 +1442,8 @@ vnet_connect (vnet_connect_args_t * a)
return app_worker_connect_session (client_wrk, &a->sep_ext, &a->sh);
}
-int
-vnet_unlisten (vnet_unlisten_args_t * a)
+session_error_t
+vnet_unlisten (vnet_unlisten_args_t *a)
{
app_worker_t *app_wrk;
app_listener_t *al;
@@ -1414,7 +1473,7 @@ vnet_unlisten (vnet_unlisten_args_t * a)
return app_worker_stop_listen (app_wrk, al);
}
-int
+session_error_t
vnet_shutdown_session (vnet_shutdown_args_t *a)
{
app_worker_t *app_wrk;
@@ -1435,8 +1494,8 @@ vnet_shutdown_session (vnet_shutdown_args_t *a)
return 0;
}
-int
-vnet_disconnect_session (vnet_disconnect_args_t * a)
+session_error_t
+vnet_disconnect_session (vnet_disconnect_args_t *a)
{
app_worker_t *app_wrk;
session_t *s;
@@ -1476,7 +1535,7 @@ application_change_listener_owner (session_t * s, app_worker_t * app_wrk)
if (!app)
return SESSION_E_NOAPP;
- app_listener = app_listener_get (app, s->al_index);
+ app_listener = app_listener_get (s->al_index);
/* Only remove from lb for now */
app_listener->workers = clib_bitmap_set (app_listener->workers,
@@ -1520,6 +1579,12 @@ application_has_global_scope (application_t * app)
return app->flags & APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE;
}
+int
+application_original_dst_is_enabled (application_t *app)
+{
+ return app->flags & APP_OPTIONS_FLAGS_GET_ORIGINAL_DST;
+}
+
static clib_error_t *
application_start_stop_proxy_fib_proto (application_t * app, u8 fib_proto,
u8 transport_proto, u8 is_start)
@@ -1676,12 +1741,11 @@ application_format_listeners (application_t * app, int verbose)
if (!app)
{
- vlib_cli_output (vm, "%U", format_app_worker_listener, 0 /* header */ ,
+ vlib_cli_output (vm, "%U", format_app_worker_listener, NULL /* header */,
0, 0, verbose);
return;
}
- /* *INDENT-OFF* */
pool_foreach (wrk_map, app->worker_maps) {
app_wrk = app_worker_get (wrk_map->wrk_index);
if (hash_elts (app_wrk->listeners_table) == 0)
@@ -1691,7 +1755,6 @@ application_format_listeners (application_t * app, int verbose)
handle, sm_index, verbose);
}));
}
- /* *INDENT-ON* */
}
static void
@@ -1706,12 +1769,10 @@ application_format_connects (application_t * app, int verbose)
return;
}
- /* *INDENT-OFF* */
pool_foreach (wrk_map, app->worker_maps) {
app_wrk = app_worker_get (wrk_map->wrk_index);
app_worker_format_connects (app_wrk, verbose);
}
- /* *INDENT-ON* */
}
u8 *
@@ -1812,12 +1873,10 @@ format_application (u8 * s, va_list * args)
format_memory_size, props->rx_fifo_size,
format_memory_size, props->tx_fifo_size);
- /* *INDENT-OFF* */
pool_foreach (wrk_map, app->worker_maps) {
app_wrk = app_worker_get (wrk_map->wrk_index);
s = format (s, "%U", format_app_worker, app_wrk);
}
- /* *INDENT-ON* */
return s;
}
@@ -1835,11 +1894,9 @@ application_format_all_listeners (vlib_main_t * vm, int verbose)
application_format_listeners (0, verbose);
- /* *INDENT-OFF* */
pool_foreach (app, app_main.app_pool) {
application_format_listeners (app, verbose);
}
- /* *INDENT-ON* */
}
void
@@ -1855,11 +1912,9 @@ application_format_all_clients (vlib_main_t * vm, int verbose)
application_format_connects (0, verbose);
- /* *INDENT-OFF* */
pool_foreach (app, app_main.app_pool) {
application_format_connects (app, verbose);
}
- /* *INDENT-ON* */
}
static clib_error_t *
@@ -1869,11 +1924,9 @@ show_certificate_command_fn (vlib_main_t * vm, unformat_input_t * input,
app_cert_key_pair_t *ckpair;
session_cli_return_if_not_enabled ();
- /* *INDENT-OFF* */
pool_foreach (ckpair, app_main.cert_key_pair_store) {
vlib_cli_output (vm, "%U", format_cert_key_pair, ckpair);
}
- /* *INDENT-ON* */
return 0;
}
@@ -1884,14 +1937,12 @@ appliction_format_app_mq (vlib_main_t * vm, application_t * app)
app_worker_t *wrk;
int i;
- /* *INDENT-OFF* */
pool_foreach (map, app->worker_maps) {
wrk = app_worker_get (map->wrk_index);
vlib_cli_output (vm, "[A%d][%d]%U", app->app_index,
map->wrk_index, format_svm_msg_q,
wrk->event_queue);
}
- /* *INDENT-ON* */
for (i = 0; i < vec_len (app->rx_mqs); i++)
vlib_cli_output (vm, "[A%d][R%d]%U", app->app_index, i, format_svm_msg_q,
@@ -1912,11 +1963,9 @@ appliction_format_all_app_mq (vlib_main_t * vm)
session_main_get_vpp_event_queue (i));
}
- /* *INDENT-OFF* */
pool_foreach (app, app_main.app_pool) {
appliction_format_app_mq (vm, app);
}
- /* *INDENT-ON* */
return 0;
}
@@ -1924,10 +1973,11 @@ static clib_error_t *
show_app_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
- int do_server = 0, do_client = 0, do_mq = 0;
+ int do_server = 0, do_client = 0, do_mq = 0, do_transports = 0;
application_t *app;
u32 app_index = ~0;
int verbose = 0;
+ u8 is_ta;
session_cli_return_if_not_enabled ();
@@ -1937,6 +1987,8 @@ show_app_command_fn (vlib_main_t * vm, unformat_input_t * input,
do_server = 1;
else if (unformat (input, "client"))
do_client = 1;
+ else if (unformat (input, "transports"))
+ do_transports = 1;
else if (unformat (input, "mq"))
do_mq = 1;
else if (unformat (input, "%u", &app_index))
@@ -1990,11 +2042,11 @@ show_app_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (!do_server && !do_client)
{
vlib_cli_output (vm, "%U", format_application, 0, 0);
- /* *INDENT-OFF* */
pool_foreach (app, app_main.app_pool) {
- vlib_cli_output (vm, "%U", format_application, app, 0);
+ is_ta = app->flags & APP_OPTIONS_FLAGS_IS_TRANSPORT_APP;
+ if ((!do_transports && !is_ta) || (do_transports && is_ta))
+ vlib_cli_output (vm, "%U", format_application, app, 0);
}
- /* *INDENT-ON* */
}
return 0;
@@ -2064,7 +2116,7 @@ vnet_app_del_cert_key_pair (u32 index)
u32 *app_index;
if (!(ckpair = app_cert_key_pair_get_if_valid (index)))
- return (VNET_API_ERROR_INVALID_VALUE);
+ return SESSION_E_INVALID;
vec_foreach (app_index, ckpair->app_interests)
{
@@ -2097,23 +2149,20 @@ application_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (application_init);
-VLIB_CLI_COMMAND (show_app_command, static) =
-{
+VLIB_CLI_COMMAND (show_app_command, static) = {
.path = "show app",
- .short_help = "show app [app_id] [server|client] [mq] [verbose]",
+ .short_help = "show app [index] [server|client] [mq] [verbose] "
+ "[transports]",
.function = show_app_command_fn,
};
-VLIB_CLI_COMMAND (show_certificate_command, static) =
-{
+VLIB_CLI_COMMAND (show_certificate_command, static) = {
.path = "show app certificate",
.short_help = "list app certs and keys present in store",
.function = show_certificate_command_fn,
};
-/* *INDENT-ON* */
crypto_engine_type_t
app_crypto_engine_type_add (void)
diff --git a/src/vnet/session/application.h b/src/vnet/session/application.h
index b3201b9833e..c68a911230f 100644
--- a/src/vnet/session/application.h
+++ b/src/vnet/session/application.h
@@ -29,6 +29,16 @@
#define APP_DBG(_fmt, _args...)
#endif
+typedef struct app_wrk_postponed_msg_
+{
+ u32 len;
+ u8 event_type;
+ u8 ring;
+ u8 is_sapi;
+ int fd;
+ u8 data[SESSION_CTRL_MSG_TX_MAX_SIZE];
+} app_wrk_postponed_msg_t;
+
typedef struct app_worker_
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
@@ -59,11 +69,20 @@ typedef struct app_worker_
/** API index for the worker. Needed for multi-process apps */
u32 api_client_index;
+ /** Set if mq is congested */
+ u8 mq_congested;
+
u8 app_is_builtin;
/** Pool of half-open session handles. Tracked in case worker detaches */
session_handle_t *half_open_table;
+ /* Per vpp worker fifos of events for app worker */
+ session_event_t **wrk_evts;
+
+ /* Vector of vpp workers mq congestion flags */
+ u8 *wrk_mq_congested;
+
/** Protects detached seg managers */
clib_spinlock_t detached_seg_managers_lock;
@@ -87,6 +106,8 @@ typedef struct app_listener_
session_handle_t ls_handle; /**< session handle of the local or global
listening session that also identifies
the app listener */
+ u32 *cl_listeners; /**< vector that maps app workers to their
+ cl sessions with fifos */
} app_listener_t;
typedef enum app_rx_mq_flags_
@@ -130,9 +151,6 @@ typedef struct application_
u16 proxied_transports;
- /** Pool of listeners for the app */
- app_listener_t *listeners;
-
/** Preferred tls engine */
u8 tls_engine;
@@ -179,6 +197,9 @@ typedef struct app_main_
*/
application_t *app_pool;
+ /** Pool of app listeners */
+ app_listener_t *listeners;
+
/**
* Hash table of apps by api client index
*/
@@ -227,7 +248,7 @@ typedef struct _vnet_app_worker_add_del_args
#define APP_NS_INVALID_INDEX ((u32)~0)
#define APP_INVALID_SEGMENT_MANAGER_INDEX ((u32) ~0)
-app_listener_t *app_listener_get (application_t * app, u32 al_index);
+app_listener_t *app_listener_get (u32 al_index);
int app_listener_alloc_and_init (application_t * app,
session_endpoint_cfg_t * sep,
app_listener_t ** listener);
@@ -235,6 +256,8 @@ void app_listener_cleanup (app_listener_t * app_listener);
session_handle_t app_listener_handle (app_listener_t * app_listener);
app_listener_t *app_listener_lookup (application_t * app,
session_endpoint_cfg_t * sep);
+session_t *app_listener_select_wrk_cl_session (session_t *ls,
+ session_dgram_hdr_t *hdr);
/**
* Get app listener handle for listening session
@@ -258,9 +281,9 @@ session_handle_t app_listen_session_handle (session_t * ls);
* @return pointer to app listener or 0
*/
app_listener_t *app_listener_get_w_handle (session_handle_t handle);
-app_listener_t *app_listener_get_w_session (session_t * ls);
session_t *app_listener_get_session (app_listener_t * al);
session_t *app_listener_get_local_session (app_listener_t * al);
+session_t *app_listener_get_wrk_cl_session (app_listener_t *al, u32 wrk_index);
application_t *application_get (u32 index);
application_t *application_get_if_valid (u32 index);
@@ -281,6 +304,7 @@ u8 application_has_global_scope (application_t * app);
void application_setup_proxy (application_t * app);
void application_remove_proxy (application_t * app);
void application_namespace_cleanup (app_namespace_t *app_ns);
+int application_original_dst_is_enabled (application_t *app);
segment_manager_props_t *application_get_segment_manager_properties (u32
app_index);
@@ -297,6 +321,12 @@ void application_enable_rx_mqs_nodes (u8 is_en);
* App worker
*/
+always_inline u8
+app_worker_mq_is_congested (app_worker_t *app_wrk)
+{
+ return app_wrk->mq_congested > 0;
+}
+
app_worker_t *app_worker_alloc (application_t * app);
int application_alloc_worker_and_init (application_t * app,
app_worker_t ** wrk);
@@ -307,9 +337,14 @@ int app_worker_own_session (app_worker_t * app_wrk, session_t * s);
void app_worker_free (app_worker_t * app_wrk);
int app_worker_connect_session (app_worker_t *app, session_endpoint_cfg_t *sep,
session_handle_t *rsh);
-int app_worker_start_listen (app_worker_t * app_wrk, app_listener_t * lstnr);
+session_error_t app_worker_start_listen (app_worker_t *app_wrk,
+ app_listener_t *lstnr);
int app_worker_stop_listen (app_worker_t * app_wrk, app_listener_t * al);
int app_worker_init_accepted (session_t * s);
+int app_worker_listened_notify (app_worker_t *app_wrk, session_handle_t alsh,
+ u32 opaque, session_error_t err);
+int app_worker_unlisten_reply (app_worker_t *app_wrk, session_handle_t sh,
+ u32 opaque, session_error_t err);
int app_worker_accept_notify (app_worker_t * app_wrk, session_t * s);
int app_worker_init_connected (app_worker_t * app_wrk, session_t * s);
int app_worker_connect_notify (app_worker_t * app_wrk, session_t * s,
@@ -322,13 +357,21 @@ int app_worker_transport_closed_notify (app_worker_t * app_wrk,
int app_worker_reset_notify (app_worker_t * app_wrk, session_t * s);
int app_worker_cleanup_notify (app_worker_t * app_wrk, session_t * s,
session_cleanup_ntf_t ntf);
+int app_worker_cleanup_notify_custom (app_worker_t *app_wrk, session_t *s,
+ session_cleanup_ntf_t ntf,
+ void (*cleanup_cb) (session_t *s));
int app_worker_migrate_notify (app_worker_t * app_wrk, session_t * s,
session_handle_t new_sh);
-int app_worker_builtin_rx (app_worker_t * app_wrk, session_t * s);
-int app_worker_builtin_tx (app_worker_t * app_wrk, session_t * s);
+int app_worker_rx_notify (app_worker_t *app_wrk, session_t *s);
int app_worker_session_fifo_tuning (app_worker_t * app_wrk, session_t * s,
svm_fifo_t * f,
session_ft_action_t act, u32 len);
+void app_worker_add_event (app_worker_t *app_wrk, session_t *s,
+ session_evt_type_t evt_type);
+void app_worker_add_event_custom (app_worker_t *app_wrk, u32 thread_index,
+ session_event_t *evt);
+int app_wrk_flush_wrk_events (app_worker_t *app_wrk, u32 thread_index);
+void app_worker_del_all_events (app_worker_t *app_wrk);
segment_manager_t *app_worker_get_listen_segment_manager (app_worker_t *,
session_t *);
segment_manager_t *app_worker_get_connect_segment_manager (app_worker_t *);
@@ -339,9 +382,14 @@ int app_worker_del_segment_notify (app_worker_t * app_wrk,
u32 app_worker_n_listeners (app_worker_t * app);
session_t *app_worker_first_listener (app_worker_t * app,
u8 fib_proto, u8 transport_proto);
-int app_worker_send_event (app_worker_t * app, session_t * s, u8 evt);
-int app_worker_lock_and_send_event (app_worker_t * app, session_t * s,
- u8 evt_type);
+void app_wrk_send_ctrl_evt_fd (app_worker_t *app_wrk, u8 evt_type, void *msg,
+ u32 msg_len, int fd);
+void app_wrk_send_ctrl_evt (app_worker_t *app_wrk, u8 evt_type, void *msg,
+ u32 msg_len);
+u8 app_worker_mq_wrk_is_congested (app_worker_t *app_wrk, u32 thread_index);
+void app_worker_set_mq_wrk_congested (app_worker_t *app_wrk, u32 thread_index);
+void app_worker_unset_wrk_mq_congested (app_worker_t *app_wrk,
+ u32 thread_index);
session_t *app_worker_proxy_listener (app_worker_t * app, u8 fib_proto,
u8 transport_proto);
void app_worker_del_detached_sm (app_worker_t * app_wrk, u32 sm_index);
@@ -350,7 +398,7 @@ u8 *format_app_worker_listener (u8 * s, va_list * args);
u8 *format_crypto_engine (u8 * s, va_list * args);
u8 *format_crypto_context (u8 * s, va_list * args);
void app_worker_format_connects (app_worker_t * app_wrk, int verbose);
-int vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a);
+session_error_t vnet_app_worker_add_del (vnet_app_worker_add_del_args_t *a);
uword unformat_application_proto (unformat_input_t * input, va_list * args);
@@ -358,18 +406,17 @@ app_cert_key_pair_t *app_cert_key_pair_get (u32 index);
app_cert_key_pair_t *app_cert_key_pair_get_if_valid (u32 index);
app_cert_key_pair_t *app_cert_key_pair_get_default ();
-/* Needed while we support both bapi and mq ctrl messages */
-int mq_send_session_bound_cb (u32 app_wrk_index, u32 api_context,
- session_handle_t handle, int rv);
-int mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context,
- session_t * s, session_error_t err);
-void mq_send_unlisten_reply (app_worker_t * app_wrk, session_handle_t sh,
- u32 context, int rv);
void sapi_socket_close_w_handle (u32 api_handle);
crypto_engine_type_t app_crypto_engine_type_add (void);
u8 app_crypto_engine_n_types (void);
+static inline u8
+app_worker_application_is_builtin (app_worker_t *app_wrk)
+{
+ return app_wrk->app_is_builtin;
+}
+
#endif /* SRC_VNET_SESSION_APPLICATION_H_ */
/*
diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c
index 74f456a1eab..a62f914d43a 100644
--- a/src/vnet/session/application_interface.c
+++ b/src/vnet/session/application_interface.c
@@ -73,8 +73,8 @@ unformat_vnet_uri (unformat_input_t * input, va_list * args)
static u8 *cache_uri;
static session_endpoint_cfg_t *cache_sep;
-int
-parse_uri (char *uri, session_endpoint_cfg_t * sep)
+session_error_t
+parse_uri (char *uri, session_endpoint_cfg_t *sep)
{
unformat_input_t _input, *input = &_input;
@@ -92,7 +92,7 @@ parse_uri (char *uri, session_endpoint_cfg_t * sep)
if (!unformat (input, "%U", unformat_vnet_uri, sep))
{
unformat_free (input);
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
}
unformat_free (input);
@@ -106,8 +106,8 @@ parse_uri (char *uri, session_endpoint_cfg_t * sep)
return 0;
}
-int
-vnet_bind_uri (vnet_listen_args_t * a)
+session_error_t
+vnet_bind_uri (vnet_listen_args_t *a)
{
session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
int rv;
@@ -120,36 +120,36 @@ vnet_bind_uri (vnet_listen_args_t * a)
return vnet_listen (a);
}
-int
-vnet_unbind_uri (vnet_unlisten_args_t * a)
+session_error_t
+vnet_unbind_uri (vnet_unlisten_args_t *a)
{
session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
application_t *app;
session_t *listener;
u32 table_index;
- int rv;
+ session_error_t rv;
if ((rv = parse_uri (a->uri, &sep)))
return rv;
app = application_get (a->app_index);
if (!app)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
table_index = application_session_table (app, fib_ip_proto (!sep.is_ip4));
listener = session_lookup_listener (table_index,
(session_endpoint_t *) & sep);
if (!listener)
- return VNET_API_ERROR_ADDRESS_NOT_IN_USE;
+ return SESSION_E_ADDR_NOT_IN_USE;
a->handle = listen_session_get_handle (listener);
return vnet_unlisten (a);
}
-int
-vnet_connect_uri (vnet_connect_args_t * a)
+session_error_t
+vnet_connect_uri (vnet_connect_args_t *a)
{
session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
- int rv;
+ session_error_t rv;
if ((rv = parse_uri (a->uri, &sep)))
return rv;
diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h
index ca8dc38c4e1..f175e4a58c6 100644
--- a/src/vnet/session/application_interface.h
+++ b/src/vnet/session/application_interface.h
@@ -62,6 +62,13 @@ typedef struct session_cb_vft_
/** Notify app that session pool migration happened */
void (*session_migrate_callback) (session_t * s, session_handle_t new_sh);
+ /** Notify app (external only) that listen was processed */
+ int (*session_listened_callback) (u32 app_wrk_index, u32 api_context,
+ session_handle_t handle, int rv);
+ /** Notify app (external only) that unlisten was processed */
+ void (*session_unlistened_callback) (u32 app_wrk_index, session_handle_t sh,
+ u32 context, int rv);
+
/** Direct RX callback for built-in application */
int (*builtin_app_rx_callback) (session_t * session);
@@ -74,6 +81,8 @@ typedef struct session_cb_vft_
/** Delegate fifo-tuning-logic to application */
int (*fifo_tuning_callback) (session_t * s, svm_fifo_t * f,
session_ft_action_t act, u32 bytes);
+ /** Custom fifo allocation for proxy */
+ int (*proxy_alloc_session_fifos) (session_t *s);
} session_cb_vft_t;
@@ -117,7 +126,7 @@ typedef struct _vnet_bind_args_t
/*
* Results
*/
- u64 handle;
+ session_handle_t handle;
} vnet_listen_args_t;
typedef struct _vnet_unlisten_args_t
@@ -125,7 +134,7 @@ typedef struct _vnet_unlisten_args_t
union
{
char *uri;
- u64 handle; /**< Session handle */
+ session_handle_t handle; /**< Session handle */
};
u32 app_index; /**< Owning application index */
u32 wrk_map_index; /**< App's local pool worker index */
@@ -232,7 +241,9 @@ typedef enum
_ (USE_GLOBAL_SCOPE, "App can use global session scope") \
_ (USE_LOCAL_SCOPE, "App can use local session scope") \
_ (EVT_MQ_USE_EVENTFD, "Use eventfds for signaling") \
- _ (MEMFD_FOR_BUILTIN, "Use memfd for builtin app segs")
+ _ (MEMFD_FOR_BUILTIN, "Use memfd for builtin app segs") \
+ _ (USE_HUGE_PAGE, "Use huge page for FIFO") \
+ _ (GET_ORIGINAL_DST, "Get original dst enabled")
typedef enum _app_options
{
@@ -269,24 +280,26 @@ typedef enum session_fd_flag_
#undef _
} session_fd_flag_t;
-int parse_uri (char *uri, session_endpoint_cfg_t * sep);
-int vnet_bind_uri (vnet_listen_args_t *);
-int vnet_unbind_uri (vnet_unlisten_args_t * a);
-int vnet_connect_uri (vnet_connect_args_t * a);
+session_error_t parse_uri (char *uri, session_endpoint_cfg_t *sep);
+session_error_t vnet_bind_uri (vnet_listen_args_t *);
+session_error_t vnet_unbind_uri (vnet_unlisten_args_t *a);
+session_error_t vnet_connect_uri (vnet_connect_args_t *a);
-int vnet_application_attach (vnet_app_attach_args_t * a);
-int vnet_application_detach (vnet_app_detach_args_t * a);
-int vnet_listen (vnet_listen_args_t * a);
-int vnet_connect (vnet_connect_args_t * a);
-int vnet_unlisten (vnet_unlisten_args_t * a);
-int vnet_shutdown_session (vnet_shutdown_args_t *a);
-int vnet_disconnect_session (vnet_disconnect_args_t * a);
+session_error_t vnet_application_attach (vnet_app_attach_args_t *a);
+session_error_t vnet_application_detach (vnet_app_detach_args_t *a);
+session_error_t vnet_listen (vnet_listen_args_t *a);
+session_error_t vnet_connect (vnet_connect_args_t *a);
+session_error_t vnet_unlisten (vnet_unlisten_args_t *a);
+session_error_t vnet_shutdown_session (vnet_shutdown_args_t *a);
+session_error_t vnet_disconnect_session (vnet_disconnect_args_t *a);
int vnet_app_add_cert_key_pair (vnet_app_add_cert_key_pair_args_t * a);
int vnet_app_del_cert_key_pair (u32 index);
/** Ask for app cb on pair deletion */
int vnet_app_add_cert_key_interest (u32 index, u32 app_index);
+uword unformat_vnet_uri (unformat_input_t *input, va_list *args);
+
typedef struct app_session_transport_
{
ip46_address_t rmt_ip; /**< remote ip */
@@ -296,15 +309,15 @@ typedef struct app_session_transport_
u8 is_ip4; /**< set if uses ip4 networking */
} app_session_transport_t;
-#define foreach_app_session_field \
- _(svm_fifo_t, *rx_fifo) /**< rx fifo */ \
- _(svm_fifo_t, *tx_fifo) /**< tx fifo */ \
- _(session_type_t, session_type) /**< session type */ \
- _(volatile u8, session_state) /**< session state */ \
- _(u32, session_index) /**< index in owning pool */ \
- _(app_session_transport_t, transport) /**< transport info */ \
- _(svm_msg_q_t, *vpp_evt_q) /**< vpp event queue */ \
- _(u8, is_dgram) /**< flag for dgram mode */ \
+#define foreach_app_session_field \
+ _ (svm_fifo_t, *rx_fifo) /**< rx fifo */ \
+ _ (svm_fifo_t, *tx_fifo) /**< tx fifo */ \
+ _ (session_type_t, session_type) /**< session type */ \
+ _ (volatile u8, session_state) /**< session state */ \
+ _ (u32, session_index) /**< index in owning pool */ \
+ _ (app_session_transport_t, transport) /**< transport info */ \
+ _ (svm_msg_q_t, *vpp_evt_q) /**< vpp event queue */ \
+ _ (u8, is_dgram) /**< flag for dgram mode */
typedef struct
{
@@ -343,7 +356,7 @@ STATIC_ASSERT (sizeof (session_listen_uri_msg_t) <= SESSION_CTRL_MSG_MAX_SIZE,
typedef struct session_bound_msg_
{
u32 context;
- u64 handle;
+ session_handle_t handle;
i32 retval;
u8 lcl_is_ip4;
u8 lcl_ip[16];
@@ -366,15 +379,15 @@ typedef struct session_unlisten_msg_
typedef struct session_unlisten_reply_msg_
{
u32 context;
- u64 handle;
+ session_handle_t handle;
i32 retval;
} __clib_packed session_unlisten_reply_msg_t;
typedef struct session_accepted_msg_
{
u32 context;
- u64 listener_handle;
- u64 handle;
+ session_handle_t listener_handle;
+ session_handle_t handle;
uword server_rx_fifo;
uword server_tx_fifo;
u64 segment_handle;
@@ -383,13 +396,15 @@ typedef struct session_accepted_msg_
transport_endpoint_t lcl;
transport_endpoint_t rmt;
u8 flags;
+ u32 original_dst_ip4;
+ u16 original_dst_port;
} __clib_packed session_accepted_msg_t;
typedef struct session_accepted_reply_msg_
{
u32 context;
i32 retval;
- u64 handle;
+ session_handle_t handle;
} __clib_packed session_accepted_reply_msg_t;
typedef struct session_connect_msg_
@@ -408,6 +423,7 @@ typedef struct session_connect_msg_
u32 ckpair_index;
u8 crypto_engine;
u8 flags;
+ u8 dscp;
uword ext_config;
} __clib_packed session_connect_msg_t;
@@ -428,7 +444,7 @@ typedef struct session_connected_msg_
{
u32 context;
i32 retval;
- u64 handle;
+ session_handle_t handle;
uword server_rx_fifo;
uword server_tx_fifo;
u64 segment_handle;
@@ -458,33 +474,33 @@ typedef struct session_disconnected_msg_
{
u32 client_index;
u32 context;
- u64 handle;
+ session_handle_t handle;
} __clib_packed session_disconnected_msg_t;
typedef struct session_disconnected_reply_msg_
{
u32 context;
i32 retval;
- u64 handle;
+ session_handle_t handle;
} __clib_packed session_disconnected_reply_msg_t;
typedef struct session_reset_msg_
{
u32 client_index;
u32 context;
- u64 handle;
+ session_handle_t handle;
} __clib_packed session_reset_msg_t;
typedef struct session_reset_reply_msg_
{
u32 context;
i32 retval;
- u64 handle;
+ session_handle_t handle;
} __clib_packed session_reset_reply_msg_t;
typedef struct session_req_worker_update_msg_
{
- u64 session_handle;
+ session_handle_t session_handle;
} __clib_packed session_req_worker_update_msg_t;
/* NOTE: using u16 for wrk indices because message needs to fit in 18B */
@@ -493,12 +509,12 @@ typedef struct session_worker_update_msg_
u32 client_index;
u16 wrk_index;
u16 req_wrk_index;
- u64 handle;
+ session_handle_t handle;
} __clib_packed session_worker_update_msg_t;
typedef struct session_worker_update_reply_msg_
{
- u64 handle;
+ session_handle_t handle;
uword rx_fifo;
uword tx_fifo;
u64 segment_handle;
@@ -612,8 +628,8 @@ app_send_io_evt_to_vpp (svm_msg_q_t * mq, u32 session_index, u8 evt_type,
{
if (svm_msg_q_try_lock (mq))
return -1;
- if (PREDICT_FALSE (svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING)
- || svm_msg_q_is_full (mq)))
+ if (PREDICT_FALSE (
+ svm_msg_q_or_ring_is_full (mq, SESSION_MQ_IO_EVT_RING)))
{
svm_msg_q_unlock (mq);
return -2;
@@ -628,9 +644,8 @@ app_send_io_evt_to_vpp (svm_msg_q_t * mq, u32 session_index, u8 evt_type,
else
{
svm_msg_q_lock (mq);
- while (svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING)
- || svm_msg_q_is_full (mq))
- svm_msg_q_wait_prod (mq);
+ while (svm_msg_q_or_ring_is_full (mq, SESSION_MQ_IO_EVT_RING))
+ svm_msg_q_or_ring_wait_prod (mq, SESSION_MQ_IO_EVT_RING);
msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg);
evt->session_index = session_index;
@@ -640,14 +655,18 @@ app_send_io_evt_to_vpp (svm_msg_q_t * mq, u32 session_index, u8 evt_type,
}
}
+#define app_send_dgram_raw(f, at, vpp_evt_q, data, len, evt_type, do_evt, \
+ noblock) \
+ app_send_dgram_raw_gso (f, at, vpp_evt_q, data, len, 0, evt_type, do_evt, \
+ noblock)
+
always_inline int
-app_send_dgram_raw (svm_fifo_t * f, app_session_transport_t * at,
- svm_msg_q_t * vpp_evt_q, u8 * data, u32 len, u8 evt_type,
- u8 do_evt, u8 noblock)
+app_send_dgram_raw_gso (svm_fifo_t *f, app_session_transport_t *at,
+ svm_msg_q_t *vpp_evt_q, u8 *data, u32 len,
+ u16 gso_size, u8 evt_type, u8 do_evt, u8 noblock)
{
session_dgram_hdr_t hdr;
int rv;
-
if (svm_fifo_max_enqueue_prod (f) < (sizeof (session_dgram_hdr_t) + len))
return 0;
@@ -658,10 +677,8 @@ app_send_dgram_raw (svm_fifo_t * f, app_session_transport_t * at,
hdr.rmt_port = at->rmt_port;
clib_memcpy_fast (&hdr.lcl_ip, &at->lcl_ip, sizeof (ip46_address_t));
hdr.lcl_port = at->lcl_port;
-
- /* *INDENT-OFF* */
+ hdr.gso_size = gso_size;
svm_fifo_seg_t segs[2] = {{ (u8 *) &hdr, sizeof (hdr) }, { data, len }};
- /* *INDENT-ON* */
rv = svm_fifo_enqueue_segments (f, segs, 2, 0 /* allow partial */ );
if (PREDICT_FALSE (rv < 0))
@@ -786,13 +803,11 @@ app_recv (app_session_t * s, u8 * data, u32 len)
return app_recv_stream (s, data, len);
}
-/* *INDENT-OFF* */
static char *session_error_str[] = {
#define _(sym, str) str,
foreach_session_error
#undef _
};
-/* *INDENT-ON* */
static inline u8 *
format_session_error (u8 * s, va_list * args)
diff --git a/src/vnet/session/application_local.c b/src/vnet/session/application_local.c
index 0abf03d0c15..3cb743d10e0 100644
--- a/src/vnet/session/application_local.c
+++ b/src/vnet/session/application_local.c
@@ -41,9 +41,25 @@ typedef struct ct_segments_
ct_segment_t *segments;
} ct_segments_ctx_t;
+typedef struct ct_cleanup_req_
+{
+ u32 ct_index;
+} ct_cleanup_req_t;
+
+typedef struct ct_worker_
+{
+ ct_connection_t *connections; /**< Per-worker connection pools */
+ u32 *pending_connects; /**< Fifo of pending ho indices */
+ ct_cleanup_req_t *pending_cleanups; /**< Fifo of pending indices */
+ u8 have_connects; /**< Set if connect rpc pending */
+ u8 have_cleanups; /**< Set if cleanup rpc pending */
+ clib_spinlock_t pending_connects_lock; /**< Lock for pending connects */
+ u32 *new_connects; /**< Burst of connects to be done */
+} ct_worker_t;
+
typedef struct ct_main_
{
- ct_connection_t **connections; /**< Per-worker connection pools */
+ ct_worker_t *wrk; /**< Per-worker state */
u32 n_workers; /**< Number of vpp workers */
u32 n_sessions; /**< Cumulative sessions counter */
u32 *ho_reusable; /**< Vector of reusable ho indices */
@@ -51,17 +67,28 @@ typedef struct ct_main_
clib_rwlock_t app_segs_lock; /**< RW lock for seg contexts */
uword *app_segs_ctxs_table; /**< App handle to segment pool map */
ct_segments_ctx_t *app_seg_ctxs; /**< Pool of ct segment contexts */
+ u32 **fwrk_pending_connects; /**< First wrk pending half-opens */
+ u32 fwrk_thread; /**< First worker thread */
+ u8 fwrk_have_flush; /**< Flag for connect flush rpc */
} ct_main_t;
static ct_main_t ct_main;
+static inline ct_worker_t *
+ct_worker_get (u32 thread_index)
+{
+ return &ct_main.wrk[thread_index];
+}
+
static ct_connection_t *
ct_connection_alloc (u32 thread_index)
{
+ ct_worker_t *wrk = ct_worker_get (thread_index);
ct_connection_t *ct;
- pool_get_zero (ct_main.connections[thread_index], ct);
- ct->c_c_index = ct - ct_main.connections[thread_index];
+ pool_get_aligned_safe (wrk->connections, ct, CLIB_CACHE_LINE_BYTES);
+ clib_memset (ct, 0, sizeof (*ct));
+ ct->c_c_index = ct - wrk->connections;
ct->c_thread_index = thread_index;
ct->client_wrk = ~0;
ct->server_wrk = ~0;
@@ -73,22 +100,25 @@ ct_connection_alloc (u32 thread_index)
static ct_connection_t *
ct_connection_get (u32 ct_index, u32 thread_index)
{
- if (pool_is_free_index (ct_main.connections[thread_index], ct_index))
+ ct_worker_t *wrk = ct_worker_get (thread_index);
+
+ if (pool_is_free_index (wrk->connections, ct_index))
return 0;
- return pool_elt_at_index (ct_main.connections[thread_index], ct_index);
+ return pool_elt_at_index (wrk->connections, ct_index);
}
static void
ct_connection_free (ct_connection_t * ct)
{
+ ct_worker_t *wrk = ct_worker_get (ct->c_thread_index);
+
if (CLIB_DEBUG)
{
- u32 thread_index = ct->c_thread_index;
- memset (ct, 0xfc, sizeof (*ct));
- pool_put (ct_main.connections[thread_index], ct);
+ clib_memset (ct, 0xfc, sizeof (*ct));
+ pool_put (wrk->connections, ct);
return;
}
- pool_put (ct_main.connections[ct->c_thread_index], ct);
+ pool_put (wrk->connections, ct);
}
static ct_connection_t *
@@ -99,11 +129,18 @@ ct_half_open_alloc (void)
clib_spinlock_lock (&cm->ho_reuseable_lock);
vec_foreach (hip, cm->ho_reusable)
- pool_put_index (cm->connections[0], *hip);
+ pool_put_index (cm->wrk[cm->fwrk_thread].connections, *hip);
vec_reset_length (cm->ho_reusable);
clib_spinlock_unlock (&cm->ho_reuseable_lock);
- return ct_connection_alloc (0);
+ return ct_connection_alloc (cm->fwrk_thread);
+}
+
+static ct_connection_t *
+ct_half_open_get (u32 ho_index)
+{
+ ct_main_t *cm = &ct_main;
+ return ct_connection_get (ho_index, cm->fwrk_thread);
}
void
@@ -137,6 +174,33 @@ ct_session_endpoint (session_t * ll, session_endpoint_t * sep)
}
static void
+ct_set_invalid_app_wrk (ct_connection_t *ct, u8 is_client)
+{
+ ct_connection_t *peer_ct;
+
+ peer_ct = ct_connection_get (ct->peer_index, ct->c_thread_index);
+
+ if (is_client)
+ {
+ ct->client_wrk = APP_INVALID_INDEX;
+ if (peer_ct)
+ ct->client_wrk = APP_INVALID_INDEX;
+ }
+ else
+ {
+ ct->server_wrk = APP_INVALID_INDEX;
+ if (peer_ct)
+ ct->server_wrk = APP_INVALID_INDEX;
+ }
+}
+
+static inline u64
+ct_client_seg_handle (u64 server_sh, u32 client_wrk_index)
+{
+ return (((u64) client_wrk_index << 56) | server_sh);
+}
+
+static void
ct_session_dealloc_fifos (ct_connection_t *ct, svm_fifo_t *rx_fifo,
svm_fifo_t *tx_fifo)
{
@@ -146,8 +210,8 @@ ct_session_dealloc_fifos (ct_connection_t *ct, svm_fifo_t *rx_fifo,
app_worker_t *app_wrk;
ct_segment_t *ct_seg;
fifo_segment_t *fs;
- u8 del_segment = 0;
u32 seg_index;
+ session_t *s;
int cnt;
/*
@@ -202,77 +266,82 @@ ct_session_dealloc_fifos (ct_connection_t *ct, svm_fifo_t *rx_fifo,
if (ct->flags & CT_CONN_F_CLIENT)
{
cnt = ct_seg->client_n_sessions;
- if (!cnt)
- ct_seg->flags |= CT_SEGMENT_F_CLIENT_DETACHED;
+ if (cnt)
+ goto done;
+ ct_seg->flags |= CT_SEGMENT_F_CLIENT_DETACHED;
+ s = session_get (ct->c_s_index, ct->c_thread_index);
+ if (s->app_wrk_index == APP_INVALID_INDEX)
+ ct_set_invalid_app_wrk (ct, 1 /* is_client */);
}
else
{
cnt = ct_seg->server_n_sessions;
- if (!cnt)
- ct_seg->flags |= CT_SEGMENT_F_SERVER_DETACHED;
+ if (cnt)
+ goto done;
+ ct_seg->flags |= CT_SEGMENT_F_SERVER_DETACHED;
+ s = session_get (ct->c_s_index, ct->c_thread_index);
+ if (s->app_wrk_index == APP_INVALID_INDEX)
+ ct_set_invalid_app_wrk (ct, 0 /* is_client */);
}
+ if (!(ct_seg->flags & CT_SEGMENT_F_CLIENT_DETACHED) ||
+ !(ct_seg->flags & CT_SEGMENT_F_SERVER_DETACHED))
+ goto done;
+
/*
* Remove segment context because both client and server detached
*/
- if (!cnt && (ct_seg->flags & CT_SEGMENT_F_CLIENT_DETACHED) &&
- (ct_seg->flags & CT_SEGMENT_F_SERVER_DETACHED))
- {
- pool_put_index (seg_ctx->segments, ct->ct_seg_index);
+ pool_put_index (seg_ctx->segments, ct->ct_seg_index);
- /*
- * No more segment indices left, remove the segments context
- */
- if (!pool_elts (seg_ctx->segments))
- {
- u64 table_handle = seg_ctx->client_wrk << 16 | seg_ctx->server_wrk;
- table_handle = (u64) seg_ctx->sm_index << 32 | table_handle;
- hash_unset (cm->app_segs_ctxs_table, table_handle);
- pool_free (seg_ctx->segments);
- pool_put_index (cm->app_seg_ctxs, ct->seg_ctx_index);
- }
- del_segment = 1;
+ /*
+ * No more segment indices left, remove the segments context
+ */
+ if (!pool_elts (seg_ctx->segments))
+ {
+ u64 table_handle = seg_ctx->client_wrk << 16 | seg_ctx->server_wrk;
+ table_handle = (u64) seg_ctx->sm_index << 32 | table_handle;
+ hash_unset (cm->app_segs_ctxs_table, table_handle);
+ pool_free (seg_ctx->segments);
+ pool_put_index (cm->app_seg_ctxs, ct->seg_ctx_index);
}
- clib_rwlock_writer_unlock (&cm->app_segs_lock);
-
/*
- * Session counter went to zero, notify the app that detached
+ * Segment to be removed so notify both apps
*/
- if (cnt)
- return;
- if (ct->flags & CT_CONN_F_CLIENT)
- {
- app_wrk = app_worker_get_if_valid (ct->client_wrk);
- /* Determine if client app still needs notification, i.e., if it is
- * still attached. If client detached and this is the last ct session
- * on this segment, then its connects segment manager should also be
- * detached, so do not send notification */
- if (app_wrk)
- {
- segment_manager_t *csm;
- csm = app_worker_get_connect_segment_manager (app_wrk);
- if (!segment_manager_app_detached (csm))
- app_worker_del_segment_notify (app_wrk, ct->segment_handle);
- }
- }
- else if (!segment_manager_app_detached (sm))
+ app_wrk = app_worker_get_if_valid (ct->client_wrk);
+ /* Determine if client app still needs notification, i.e., if it is
+ * still attached. If client detached and this is the last ct session
+ * on this segment, then its connects segment manager should also be
+ * detached, so do not send notification */
+ if (app_wrk)
{
- app_wrk = app_worker_get (ct->server_wrk);
- app_worker_del_segment_notify (app_wrk, ct->segment_handle);
+ segment_manager_t *csm;
+ csm = app_worker_get_connect_segment_manager (app_wrk);
+ if (!segment_manager_app_detached (csm))
+ app_worker_del_segment_notify (
+ app_wrk, ct_client_seg_handle (ct->segment_handle, ct->client_wrk));
}
- if (!del_segment)
- return;
-
+ /* Notify server app and free segment */
segment_manager_lock_and_del_segment (sm, seg_index);
/* Cleanup segment manager if needed. If server detaches there's a chance
* the client's sessions will hold up segment removal */
if (segment_manager_app_detached (sm) && !segment_manager_has_fifos (sm))
segment_manager_free_safe (sm);
+
+done:
+
+ clib_rwlock_writer_unlock (&cm->app_segs_lock);
+}
+
+static void
+ct_session_force_disconnect_server (ct_connection_t *sct)
+{
+ sct->peer_index = ~0;
+ session_transport_closing_notify (&sct->connection);
}
int
@@ -294,9 +363,7 @@ ct_session_connect_notify (session_t *ss, session_error_t err)
/* Client closed while waiting for reply from server */
if (PREDICT_FALSE (!cct))
{
- session_transport_closing_notify (&sct->connection);
- session_transport_delete_notify (&sct->connection);
- ct_connection_free (sct);
+ ct_session_force_disconnect_server (sct);
return 0;
}
@@ -307,16 +374,19 @@ ct_session_connect_notify (session_t *ss, session_error_t err)
goto connect_error;
/*
- * Alloc client session
+ * Alloc client session, server session assumed to be established
*/
+ ASSERT (ss->session_state >= SESSION_STATE_READY);
+
cs = session_alloc (thread_index);
ss = session_get (ss_index, thread_index);
cs->session_type = ss->session_type;
cs->listener_handle = SESSION_INVALID_HANDLE;
- cs->session_state = SESSION_STATE_CONNECTING;
+ session_set_state (cs, SESSION_STATE_CONNECTING);
cs->app_wrk_index = client_wrk->wrk_index;
cs->connection_index = cct->c_c_index;
+ cs->opaque = opaque;
cct->c_s_index = cs->session_index;
/* This will allocate fifos for the session. They won't be used for
@@ -325,23 +395,23 @@ ct_session_connect_notify (session_t *ss, session_error_t err)
if ((err = app_worker_init_connected (client_wrk, cs)))
{
session_free (cs);
- session_close (ss);
+ ct_session_force_disconnect_server (sct);
err = SESSION_E_ALLOC;
goto connect_error;
}
- cs->session_state = SESSION_STATE_CONNECTING;
+ session_set_state (cs, SESSION_STATE_CONNECTING);
if (app_worker_connect_notify (client_wrk, cs, 0, opaque))
{
segment_manager_dealloc_fifos (cs->rx_fifo, cs->tx_fifo);
session_free (cs);
- session_close (ss);
+ ct_session_force_disconnect_server (sct);
goto cleanup_client;
}
cs = session_get (cct->c_s_index, cct->c_thread_index);
- cs->session_state = SESSION_STATE_READY;
+ session_set_state (cs, SESSION_STATE_READY);
return 0;
@@ -373,9 +443,6 @@ ct_lookup_free_segment (ct_main_t *cm, segment_manager_t *sm,
pool_foreach (ct_seg, seg_ctx->segments)
{
/* Client or server has detached so segment cannot be used */
- if ((ct_seg->flags & CT_SEGMENT_F_SERVER_DETACHED) ||
- (ct_seg->flags & CT_SEGMENT_F_CLIENT_DETACHED))
- continue;
fs = segment_manager_get_segment (sm, ct_seg->segment_index);
free_bytes = fifo_segment_available_bytes (fs);
max_fifos = fifo_segment_size (fs) / seg_ctx->fifo_pair_bytes;
@@ -395,11 +462,11 @@ ct_alloc_segment (ct_main_t *cm, app_worker_t *server_wrk, u64 table_handle,
segment_manager_t *sm, u32 client_wrk_index)
{
u32 seg_ctx_index = ~0, sm_index, pair_bytes;
+ u64 seg_size, seg_handle, client_seg_handle;
segment_manager_props_t *props;
const u32 margin = 16 << 10;
ct_segments_ctx_t *seg_ctx;
app_worker_t *client_wrk;
- u64 seg_size, seg_handle;
application_t *server;
ct_segment_t *ct_seg;
uword *spp;
@@ -461,7 +528,11 @@ ct_alloc_segment (ct_main_t *cm, app_worker_t *server_wrk, u64 table_handle,
goto error;
client_wrk = app_worker_get (client_wrk_index);
- if (app_worker_add_segment_notify (client_wrk, seg_handle))
+ /* Make sure client workers do not have overlapping segment handles.
+ * Ideally, we should attach fs to client worker segment manager and
+ * create a new handle but that's not currently possible. */
+ client_seg_handle = ct_client_seg_handle (seg_handle, client_wrk_index);
+ if (app_worker_add_segment_notify (client_wrk, client_seg_handle))
{
app_worker_del_segment_notify (server_wrk, seg_handle);
goto error;
@@ -515,6 +586,8 @@ ct_init_accepted_session (app_worker_t *server_wrk, ct_connection_t *ct,
ct->seg_ctx_index = ct_seg->seg_ctx_index;
ct->ct_seg_index = ct_seg->ct_seg_index;
fs_index = ct_seg->segment_index;
+ ct_seg->flags &=
+ ~(CT_SEGMENT_F_SERVER_DETACHED | CT_SEGMENT_F_CLIENT_DETACHED);
__atomic_add_fetch (&ct_seg->server_n_sessions, 1, __ATOMIC_RELAXED);
__atomic_add_fetch (&ct_seg->client_n_sessions, 1, __ATOMIC_RELAXED);
}
@@ -573,10 +646,6 @@ ct_init_accepted_session (app_worker_t *server_wrk, ct_connection_t *ct,
ls->tx_fifo->shr->master_session_index = ls->session_index;
ls->rx_fifo->master_thread_index = ls->thread_index;
ls->tx_fifo->master_thread_index = ls->thread_index;
- ls->rx_fifo->segment_manager = sm_index;
- ls->tx_fifo->segment_manager = sm_index;
- ls->rx_fifo->segment_index = fs_index;
- ls->tx_fifo->segment_index = fs_index;
seg_handle = segment_manager_segment_handle (sm, fs);
segment_manager_segment_reader_unlock (sm);
@@ -587,23 +656,21 @@ ct_init_accepted_session (app_worker_t *server_wrk, ct_connection_t *ct,
}
static void
-ct_accept_rpc_wrk_handler (void *accept_args)
+ct_accept_one (u32 thread_index, u32 ho_index)
{
- u32 cct_index, ho_index, thread_index, ll_index;
ct_connection_t *sct, *cct, *ho;
transport_connection_t *ll_ct;
app_worker_t *server_wrk;
+ u32 cct_index, ll_index;
session_t *ss, *ll;
/*
* Alloc client ct and initialize from ho
*/
- thread_index = vlib_get_thread_index ();
cct = ct_connection_alloc (thread_index);
cct_index = cct->c_c_index;
- ho_index = pointer_to_uword (accept_args);
- ho = ct_connection_get (ho_index, 0);
+ ho = ct_half_open_get (ho_index);
/* Unlikely but half-open session and transport could have been freed */
if (PREDICT_FALSE (!ho))
@@ -659,7 +726,7 @@ ct_accept_rpc_wrk_handler (void *accept_args)
sct->c_is_ip4);
ss->connection_index = sct->c_c_index;
ss->listener_handle = listen_session_get_handle (ll);
- ss->session_state = SESSION_STATE_CREATED;
+ session_set_state (ss, SESSION_STATE_CREATED);
server_wrk = application_listener_select_worker (ll);
ss->app_wrk_index = server_wrk->wrk_index;
@@ -675,15 +742,17 @@ ct_accept_rpc_wrk_handler (void *accept_args)
return;
}
+ cct->server_wrk = sct->server_wrk;
cct->seg_ctx_index = sct->seg_ctx_index;
cct->ct_seg_index = sct->ct_seg_index;
cct->client_rx_fifo = ss->tx_fifo;
cct->client_tx_fifo = ss->rx_fifo;
cct->client_rx_fifo->refcnt++;
cct->client_tx_fifo->refcnt++;
- cct->segment_handle = sct->segment_handle;
+ cct->segment_handle =
+ ct_client_seg_handle (sct->segment_handle, cct->client_wrk);
- ss->session_state = SESSION_STATE_ACCEPTING;
+ session_set_state (ss, SESSION_STATE_ACCEPTING);
if (app_worker_accept_notify (server_wrk, ss))
{
ct_session_connect_notify (ss, SESSION_E_REFUSED);
@@ -693,13 +762,93 @@ ct_accept_rpc_wrk_handler (void *accept_args)
}
}
-static int
-ct_connect (app_worker_t * client_wrk, session_t * ll,
- session_endpoint_cfg_t * sep)
+static void
+ct_accept_rpc_wrk_handler (void *rpc_args)
{
- u32 thread_index, ho_index;
+ u32 thread_index, n_connects, i, n_pending;
+ const u32 max_connects = 32;
+ ct_worker_t *wrk;
+ u8 need_rpc = 0;
+
+ thread_index = pointer_to_uword (rpc_args);
+ wrk = ct_worker_get (thread_index);
+
+ /* Connects could be handled without worker barrier so grab lock */
+ clib_spinlock_lock (&wrk->pending_connects_lock);
+
+ n_pending = clib_fifo_elts (wrk->pending_connects);
+ n_connects = clib_min (n_pending, max_connects);
+ vec_validate (wrk->new_connects, n_connects);
+
+ for (i = 0; i < n_connects; i++)
+ clib_fifo_sub1 (wrk->pending_connects, wrk->new_connects[i]);
+
+ if (n_pending == n_connects)
+ wrk->have_connects = 0;
+ else
+ need_rpc = 1;
+
+ clib_spinlock_unlock (&wrk->pending_connects_lock);
+
+ for (i = 0; i < n_connects; i++)
+ ct_accept_one (thread_index, wrk->new_connects[i]);
+
+ if (need_rpc)
+ session_send_rpc_evt_to_thread_force (
+ thread_index, ct_accept_rpc_wrk_handler,
+ uword_to_pointer (thread_index, void *));
+}
+
+static void
+ct_fwrk_flush_connects (void *rpc_args)
+{
+ u32 thread_index, fwrk_index, n_workers;
ct_main_t *cm = &ct_main;
- ct_connection_t *ho;
+ ct_worker_t *wrk;
+ u8 need_rpc;
+
+ fwrk_index = cm->fwrk_thread;
+ n_workers = vec_len (cm->fwrk_pending_connects);
+
+ for (thread_index = fwrk_index; thread_index < n_workers; thread_index++)
+ {
+ if (!vec_len (cm->fwrk_pending_connects[thread_index]))
+ continue;
+
+ wrk = ct_worker_get (thread_index);
+
+ /* Connects can be done without worker barrier, grab dst worker lock */
+ if (thread_index != fwrk_index)
+ clib_spinlock_lock (&wrk->pending_connects_lock);
+
+ clib_fifo_add (wrk->pending_connects,
+ cm->fwrk_pending_connects[thread_index],
+ vec_len (cm->fwrk_pending_connects[thread_index]));
+ if (!wrk->have_connects)
+ {
+ wrk->have_connects = 1;
+ need_rpc = 1;
+ }
+
+ if (thread_index != fwrk_index)
+ clib_spinlock_unlock (&wrk->pending_connects_lock);
+
+ vec_reset_length (cm->fwrk_pending_connects[thread_index]);
+
+ if (need_rpc)
+ session_send_rpc_evt_to_thread_force (
+ thread_index, ct_accept_rpc_wrk_handler,
+ uword_to_pointer (thread_index, void *));
+ }
+
+ cm->fwrk_have_flush = 0;
+}
+
+static void
+ct_program_connect_to_wrk (u32 ho_index)
+{
+ ct_main_t *cm = &ct_main;
+ u32 thread_index;
/* Simple round-robin policy for spreading sessions over workers. We skip
* thread index 0, i.e., offset the index by 1, when we have workers as it
@@ -708,6 +857,25 @@ ct_connect (app_worker_t * client_wrk, session_t * ll,
cm->n_sessions += 1;
thread_index = cm->n_workers ? (cm->n_sessions % cm->n_workers) + 1 : 0;
+ /* Pospone flushing of connect request to dst worker until after session
+ * layer fully initializes the half-open session. */
+ vec_add1 (cm->fwrk_pending_connects[thread_index], ho_index);
+ if (!cm->fwrk_have_flush)
+ {
+ session_send_rpc_evt_to_thread_force (
+ cm->fwrk_thread, ct_fwrk_flush_connects,
+ uword_to_pointer (thread_index, void *));
+ cm->fwrk_have_flush = 1;
+ }
+}
+
+static int
+ct_connect (app_worker_t *client_wrk, session_t *ll,
+ session_endpoint_cfg_t *sep)
+{
+ ct_connection_t *ho;
+ u32 ho_index;
+
/*
* Alloc and init client half-open transport
*/
@@ -725,22 +893,19 @@ ct_connect (app_worker_t * client_wrk, session_t * ll,
clib_memcpy (&ho->c_rmt_ip, &sep->ip, sizeof (sep->ip));
ho->flags |= CT_CONN_F_CLIENT;
ho->c_s_index = ~0;
- ho->actual_tp = sep->transport_proto;
+ ho->actual_tp = sep->original_tp;
/*
- * Accept connection on thread selected above. Connected reply comes
+ * Program connect on a worker, connected reply comes
* after server accepts the connection.
*/
-
- session_send_rpc_evt_to_thread_force (thread_index,
- ct_accept_rpc_wrk_handler,
- uword_to_pointer (ho_index, void *));
+ ct_program_connect_to_wrk (ho_index);
return ho_index;
}
static u32
-ct_start_listen (u32 app_listener_index, transport_endpoint_t * tep)
+ct_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep)
{
session_endpoint_cfg_t *sep;
ct_connection_t *ct;
@@ -772,9 +937,9 @@ ct_listener_get (u32 ct_index)
}
static transport_connection_t *
-ct_half_open_get (u32 ct_index)
+ct_session_half_open_get (u32 ct_index)
{
- return (transport_connection_t *) ct_connection_get (ct_index, 0);
+ return (transport_connection_t *) ct_half_open_get (ct_index);
}
static void
@@ -796,7 +961,10 @@ ct_session_cleanup (u32 conn_index, u32 thread_index)
static void
ct_cleanup_ho (u32 ho_index)
{
- ct_connection_free (ct_connection_get (ho_index, 0));
+ ct_connection_t *ho;
+
+ ho = ct_half_open_get (ho_index);
+ ct_connection_free (ho);
}
static int
@@ -827,7 +995,7 @@ ct_session_connect (transport_endpoint_cfg_t * tep)
goto global_scope;
ll = listen_session_get_from_handle (lh);
- al = app_listener_get_w_session (ll);
+ al = app_listener_get (ll->al_index);
/*
* Break loop if rule in local table points to connecting app. This
@@ -856,58 +1024,189 @@ global_scope:
ll = session_lookup_listener_wildcard (table_index, sep);
/* Avoid connecting app to own listener */
- if (ll && ll->app_index != app->app_index)
- return ct_connect (app_wrk, ll, sep_ext);
+ if (ll)
+ {
+ al = app_listener_get (ll->al_index);
+ if (al->app_index != app->app_index)
+ return ct_connect (app_wrk, ll, sep_ext);
+ }
/* Failed to connect but no error */
return SESSION_E_LOCAL_CONNECT;
}
+static inline int
+ct_close_is_reset (ct_connection_t *ct, session_t *s)
+{
+ if (ct->flags & CT_CONN_F_RESET)
+ return 1;
+ if (ct->flags & CT_CONN_F_CLIENT)
+ return (svm_fifo_max_dequeue (ct->client_rx_fifo) > 0);
+ else
+ return (svm_fifo_max_dequeue (s->rx_fifo) > 0);
+}
+
static void
-ct_session_close (u32 ct_index, u32 thread_index)
+ct_session_cleanup_server_session (session_t *s)
{
- ct_connection_t *ct, *peer_ct;
+ ct_connection_t *ct;
+
+ ct = (ct_connection_t *) session_get_transport (s);
+ ct_session_dealloc_fifos (ct, s->rx_fifo, s->tx_fifo);
+ session_free (s);
+ ct_connection_free (ct);
+}
+
+static void
+ct_session_postponed_cleanup (ct_connection_t *ct)
+{
+ ct_connection_t *peer_ct;
app_worker_t *app_wrk;
session_t *s;
- ct = ct_connection_get (ct_index, thread_index);
s = session_get (ct->c_s_index, ct->c_thread_index);
- peer_ct = ct_connection_get (ct->peer_index, thread_index);
+ app_wrk = app_worker_get_if_valid (s->app_wrk_index);
+
+ peer_ct = ct_connection_get (ct->peer_index, ct->c_thread_index);
if (peer_ct)
{
- peer_ct->peer_index = ~0;
- /* Make sure session was allocated */
- if (peer_ct->flags & CT_CONN_F_HALF_OPEN)
- {
- ct_session_connect_notify (s, SESSION_E_REFUSED);
- }
- else if (peer_ct->c_s_index != ~0)
- session_transport_closing_notify (&peer_ct->connection);
+ if (ct_close_is_reset (ct, s))
+ session_transport_reset_notify (&peer_ct->connection);
else
- ct_connection_free (peer_ct);
+ session_transport_closing_notify (&peer_ct->connection);
}
+ session_transport_closed_notify (&ct->connection);
+
+ /* It would be cleaner to call session_transport_delete_notify
+ * but then we can't control session cleanup lower */
+ session_set_state (s, SESSION_STATE_TRANSPORT_DELETED);
+ if (app_wrk)
+ app_worker_cleanup_notify (app_wrk, s, SESSION_CLEANUP_TRANSPORT);
if (ct->flags & CT_CONN_F_CLIENT)
{
/* Normal free for client session as the fifos are allocated through
* the connects segment manager in a segment that's not shared with
* the server */
- session_free_w_fifos (s);
ct_session_dealloc_fifos (ct, ct->client_rx_fifo, ct->client_tx_fifo);
+ session_program_cleanup (s);
+ ct_connection_free (ct);
}
else
{
/* Manual session and fifo segment cleanup to avoid implicit
* segment manager cleanups and notifications */
- app_wrk = app_worker_get_if_valid (s->app_wrk_index);
if (app_wrk)
- app_worker_cleanup_notify (app_wrk, s, SESSION_CLEANUP_SESSION);
+ {
+ /* Remove custom cleanup notify infra when/if switching to normal
+ * session cleanup. Note that ct is freed in the cb function */
+ app_worker_cleanup_notify_custom (app_wrk, s,
+ SESSION_CLEANUP_SESSION,
+ ct_session_cleanup_server_session);
+ }
+ else
+ {
+ ct_connection_free (ct);
+ }
+ }
+}
+
+static void
+ct_handle_cleanups (void *args)
+{
+ uword thread_index = pointer_to_uword (args);
+ const u32 max_cleanups = 100;
+ ct_cleanup_req_t *req;
+ ct_connection_t *ct;
+ u32 n_to_handle = 0;
+ ct_worker_t *wrk;
+ session_t *s;
+
+ wrk = ct_worker_get (thread_index);
+ wrk->have_cleanups = 0;
+ n_to_handle = clib_fifo_elts (wrk->pending_cleanups);
+ n_to_handle = clib_min (n_to_handle, max_cleanups);
+
+ while (n_to_handle)
+ {
+ clib_fifo_sub2 (wrk->pending_cleanups, req);
+ ct = ct_connection_get (req->ct_index, thread_index);
+ s = session_get (ct->c_s_index, ct->c_thread_index);
+ if (svm_fifo_has_event (s->tx_fifo) || (s->flags & SESSION_F_RX_EVT))
+ clib_fifo_add1 (wrk->pending_cleanups, *req);
+ else
+ ct_session_postponed_cleanup (ct);
+ n_to_handle -= 1;
+ }
- ct_session_dealloc_fifos (ct, s->rx_fifo, s->tx_fifo);
- session_free (s);
+ if (clib_fifo_elts (wrk->pending_cleanups))
+ {
+ wrk->have_cleanups = 1;
+ session_send_rpc_evt_to_thread_force (
+ thread_index, ct_handle_cleanups,
+ uword_to_pointer (thread_index, void *));
}
+}
- ct_connection_free (ct);
+static void
+ct_program_cleanup (ct_connection_t *ct)
+{
+ ct_cleanup_req_t *req;
+ uword thread_index;
+ ct_worker_t *wrk;
+
+ thread_index = ct->c_thread_index;
+ wrk = ct_worker_get (ct->c_thread_index);
+
+ clib_fifo_add2 (wrk->pending_cleanups, req);
+ req->ct_index = ct->c_c_index;
+
+ if (wrk->have_cleanups)
+ return;
+
+ wrk->have_cleanups = 1;
+ session_send_rpc_evt_to_thread_force (
+ thread_index, ct_handle_cleanups, uword_to_pointer (thread_index, void *));
+}
+
+static void
+ct_session_close (u32 ct_index, u32 thread_index)
+{
+ ct_connection_t *ct, *peer_ct;
+ session_t *s;
+
+ ct = ct_connection_get (ct_index, thread_index);
+ s = session_get (ct->c_s_index, ct->c_thread_index);
+ peer_ct = ct_connection_get (ct->peer_index, thread_index);
+ if (peer_ct)
+ {
+ peer_ct->peer_index = ~0;
+ /* Make sure session was allocated */
+ if (peer_ct->flags & CT_CONN_F_HALF_OPEN)
+ {
+ ct_session_connect_notify (s, SESSION_E_REFUSED);
+ ct->peer_index = ~0;
+ }
+ else if (peer_ct->c_s_index == ~0)
+ {
+ /* should not happen */
+ clib_warning ("ct peer without session");
+ ct_connection_free (peer_ct);
+ }
+ }
+
+ /* Do not send closed notify to make sure pending tx events are
+ * still delivered and program cleanup */
+ ct_program_cleanup (ct);
+}
+
+static void
+ct_session_reset (u32 ct_index, u32 thread_index)
+{
+ ct_connection_t *ct;
+ ct = ct_connection_get (ct_index, thread_index);
+ ct->flags |= CT_CONN_F_RESET;
+ ct_session_close (ct_index, thread_index);
}
static transport_connection_t *
@@ -966,12 +1265,17 @@ static int
ct_app_rx_evt (transport_connection_t * tc)
{
ct_connection_t *ct = (ct_connection_t *) tc, *peer_ct;
- session_t *ps;
+ session_t *ps, *s;
+ s = session_get (ct->c_s_index, ct->c_thread_index);
+ if (session_has_transport (s) || s->session_state < SESSION_STATE_READY)
+ return -1;
peer_ct = ct_connection_get (ct->peer_index, tc->thread_index);
- if (!peer_ct)
+ if (!peer_ct || (peer_ct->flags & CT_CONN_F_HALF_OPEN))
return -1;
ps = session_get (peer_ct->c_s_index, peer_ct->c_thread_index);
+ if (ps->session_state >= SESSION_STATE_TRANSPORT_CLOSING)
+ return -1;
return session_dequeue_notify (ps);
}
@@ -993,7 +1297,7 @@ format_ct_half_open (u8 *s, va_list *args)
{
u32 ho_index = va_arg (*args, u32);
u32 verbose = va_arg (*args, u32);
- ct_connection_t *ct = ct_connection_get (ho_index, 0);
+ ct_connection_t *ct = ct_half_open_get (ho_index);
s = format (s, "%-" SESSION_CLI_ID_LEN "U", format_ct_connection_id, ct);
if (verbose)
s = format (s, "%-" SESSION_CLI_STATE_LEN "s", "HALF-OPEN");
@@ -1042,27 +1346,33 @@ format_ct_session (u8 * s, va_list * args)
clib_error_t *
ct_enable_disable (vlib_main_t * vm, u8 is_en)
{
+ vlib_thread_main_t *vtm = &vlib_thread_main;
ct_main_t *cm = &ct_main;
+ ct_worker_t *wrk;
cm->n_workers = vlib_num_workers ();
- vec_validate (cm->connections, cm->n_workers);
+ cm->fwrk_thread = transport_cl_thread ();
+ vec_validate (cm->wrk, vtm->n_vlib_mains);
+ vec_foreach (wrk, cm->wrk)
+ clib_spinlock_init (&wrk->pending_connects_lock);
clib_spinlock_init (&cm->ho_reuseable_lock);
clib_rwlock_init (&cm->app_segs_lock);
+ vec_validate (cm->fwrk_pending_connects, cm->n_workers);
return 0;
}
-/* *INDENT-OFF* */
static const transport_proto_vft_t cut_thru_proto = {
.enable = ct_enable_disable,
.start_listen = ct_start_listen,
.stop_listen = ct_stop_listen,
.get_connection = ct_session_get,
.get_listener = ct_listener_get,
- .get_half_open = ct_half_open_get,
+ .get_half_open = ct_session_half_open_get,
.cleanup = ct_session_cleanup,
.cleanup_ho = ct_cleanup_ho,
.connect = ct_session_connect,
.close = ct_session_close,
+ .reset = ct_session_reset,
.custom_tx = ct_custom_tx,
.app_rx_evt = ct_app_rx_evt,
.format_listener = format_ct_listener,
@@ -1075,7 +1385,14 @@ static const transport_proto_vft_t cut_thru_proto = {
.service_type = TRANSPORT_SERVICE_VC,
},
};
-/* *INDENT-ON* */
+
+static inline int
+ct_session_can_tx (session_t *s)
+{
+ return (s->session_state == SESSION_STATE_READY ||
+ s->session_state == SESSION_STATE_CLOSING ||
+ s->session_state == SESSION_STATE_APP_CLOSED);
+}
int
ct_session_tx (session_t * s)
@@ -1083,6 +1400,8 @@ ct_session_tx (session_t * s)
ct_connection_t *ct, *peer_ct;
session_t *peer_s;
+ if (!ct_session_can_tx (s))
+ return 0;
ct = (ct_connection_t *) session_get_transport (s);
peer_ct = ct_connection_get (ct->peer_index, ct->c_thread_index);
if (!peer_ct)
@@ -1090,6 +1409,7 @@ ct_session_tx (session_t * s)
peer_s = session_get (peer_ct->c_s_index, peer_ct->c_thread_index);
if (peer_s->session_state >= SESSION_STATE_TRANSPORT_CLOSING)
return 0;
+ peer_s->flags |= SESSION_F_RX_EVT;
return session_enqueue_notify (peer_s);
}
diff --git a/src/vnet/session/application_local.h b/src/vnet/session/application_local.h
index 86edf243b22..fd2804c7baf 100644
--- a/src/vnet/session/application_local.h
+++ b/src/vnet/session/application_local.h
@@ -22,7 +22,8 @@
#define foreach_ct_flags \
_ (CLIENT, "client") \
- _ (HALF_OPEN, "half-open")
+ _ (HALF_OPEN, "half-open") \
+ _ (RESET, "reset")
enum
{
diff --git a/src/vnet/session/application_namespace.c b/src/vnet/session/application_namespace.c
index cd2636cff32..f547dcfc031 100644
--- a/src/vnet/session/application_namespace.c
+++ b/src/vnet/session/application_namespace.c
@@ -81,21 +81,20 @@ app_namespace_alloc (const u8 *ns_id)
return app_ns;
}
-int
-vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a)
+session_error_t
+vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t *a)
{
app_namespace_t *app_ns;
session_table_t *st;
u32 ns_index;
- int rv;
+ session_error_t rv;
if (a->is_add)
{
if (a->sw_if_index != APP_NAMESPACE_INVALID_INDEX
&& !vnet_get_sw_interface_or_null (vnet_get_main (),
a->sw_if_index))
- return VNET_API_ERROR_INVALID_SW_IF_INDEX;
-
+ return SESSION_E_INVALID;
if (a->sw_if_index != APP_NAMESPACE_INVALID_INDEX)
{
@@ -108,7 +107,7 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a)
}
if (a->sw_if_index == APP_NAMESPACE_INVALID_INDEX
&& a->ip4_fib_id == APP_NAMESPACE_INVALID_INDEX)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
app_ns = app_namespace_get_from_id (a->ns_id);
if (!app_ns)
@@ -119,11 +118,6 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a)
st->is_local = 1;
st->appns_index = app_namespace_index (app_ns);
app_ns->local_table_index = session_table_index (st);
- if (a->netns)
- {
- app_ns->netns = vec_dup (a->netns);
- vec_terminate_c_string (app_ns->netns);
- }
if (a->sock_name)
{
app_ns->sock_name = vec_dup (a->sock_name);
@@ -153,11 +147,11 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a)
{
ns_index = app_namespace_index_from_id (a->ns_id);
if (ns_index == APP_NAMESPACE_INVALID_INDEX)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
app_ns = app_namespace_get (ns_index);
if (!app_ns)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
application_namespace_cleanup (app_ns);
@@ -167,8 +161,6 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a)
st = session_table_get (app_ns->local_table_index);
session_table_free (st, FIB_PROTOCOL_MAX);
- if (app_ns->netns)
- vec_free (app_ns->netns);
if (app_ns->sock_name)
vec_free (app_ns->sock_name);
@@ -255,7 +247,6 @@ app_namespaces_init (void)
/* clang-format off */
vnet_app_namespace_add_del_args_t a = {
.ns_id = ns_id,
- .netns = 0,
.sock_name = 0,
.secret = 0,
.sw_if_index = APP_NAMESPACE_INVALID_INDEX,
@@ -272,7 +263,7 @@ app_ns_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
u8 is_add = 0, *ns_id = 0, secret_set = 0, sw_if_index_set = 0;
- u8 *netns = 0, *sock_name = 0;
+ u8 *sock_name = 0;
unformat_input_t _line_input, *line_input = &_line_input;
u32 sw_if_index, fib_id = APP_NAMESPACE_INVALID_INDEX;
vnet_main_t *vnm = vnet_get_main ();
@@ -302,8 +293,6 @@ app_ns_fn (vlib_main_t * vm, unformat_input_t * input,
sw_if_index_set = 1;
else if (unformat (line_input, "fib_id", &fib_id))
;
- else if (unformat (line_input, "netns %_%v%_", &netns))
- ;
else if (unformat (line_input, "sock-name %_%v%_", &sock_name))
;
else
@@ -329,7 +318,6 @@ app_ns_fn (vlib_main_t * vm, unformat_input_t * input,
/* clang-format off */
vnet_app_namespace_add_del_args_t args = {
.ns_id = ns_id,
- .netns = netns,
.secret = secret,
.sw_if_index = sw_if_index,
.sock_name = sock_name,
@@ -344,21 +332,18 @@ app_ns_fn (vlib_main_t * vm, unformat_input_t * input,
done:
vec_free (ns_id);
- vec_free (netns);
vec_free (sock_name);
unformat_free (line_input);
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (app_ns_command, static) = {
.path = "app ns",
.short_help = "app ns [add|del] id <namespace-id> secret <secret> "
- "sw_if_index <sw_if_index> if <interface> [netns <ns>]",
+ "sw_if_index <sw_if_index> if <interface>",
.function = app_ns_fn,
};
-/* *INDENT-ON* */
u8 *
format_app_namespace (u8 * s, va_list * args)
@@ -371,8 +356,6 @@ format_app_namespace (u8 * s, va_list * args)
if (app_ns->sw_if_index != (u32) ~0)
s = format (s, "\nInterface: %U", format_vnet_sw_if_index_name, vnm,
app_ns->sw_if_index);
- if (app_ns->netns)
- s = format (s, "\nNetns: %s", app_ns->netns);
if (app_ns->sock_name)
s = format (s, "\nSocket: %s", app_ns->sock_name);
@@ -401,7 +384,6 @@ app_namespace_show_api (vlib_main_t * vm, app_namespace_t * app_ns)
vlib_cli_output (vm, "%12s%12s%5s", "app index", "wrk index", "fd");
- /* *INDENT-OFF* */
pool_foreach (cs, app_ns->app_sockets) {
handle = (app_ns_api_handle_t *) &cs->private_data;
cf = clib_file_get (&file_main, handle->aah_file_index);
@@ -414,7 +396,6 @@ app_namespace_show_api (vlib_main_t * vm, app_namespace_t * app_ns)
vlib_cli_output (vm, "%12d%12d%5u", app_wrk->app_index,
app_wrk->wrk_map_index, cf->file_descriptor);
}
- /* *INDENT-ON* */
}
static clib_error_t *
@@ -482,8 +463,7 @@ show_app_ns_fn (vlib_main_t * vm, unformat_input_t * main_input,
}
do_ns_list:
- table_add_header_col (t, 6, "Index", "Secret", "Interface", "Id", "Netns",
- "Socket");
+ table_add_header_col (t, 5, "Index", "Secret", "Interface", "Id", "Socket");
int i = 0;
pool_foreach (app_ns, app_namespace_pool)
{
@@ -493,7 +473,6 @@ do_ns_list:
table_format_cell (t, i, j++, "%U", format_vnet_sw_if_index_name, vnm,
app_ns->sw_if_index);
table_format_cell (t, i, j++, "%s", app_ns->ns_id);
- table_format_cell (t, i, j++, "%s", app_ns->netns);
table_format_cell (t, i++, j++, "%s", app_ns->sock_name);
}
@@ -510,13 +489,11 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_app_ns_command, static) = {
.path = "show app ns",
.short_help = "show app ns [id <id> [api-clients]]",
.function = show_app_ns_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/session/application_namespace.h b/src/vnet/session/application_namespace.h
index 1750d41fff8..261325cbe0e 100644
--- a/src/vnet/session/application_namespace.h
+++ b/src/vnet/session/application_namespace.h
@@ -51,11 +51,6 @@ typedef struct _app_namespace
u8 *ns_id;
/**
- * Linux netns if one was provided
- */
- u8 *netns;
-
- /**
* Name of socket applications can use to attach to session layer
*/
u8 *sock_name;
@@ -69,7 +64,6 @@ typedef struct _app_namespace
typedef struct _vnet_app_namespace_add_del_args
{
u8 *ns_id;
- u8 *netns;
u8 *sock_name;
u64 secret;
u32 sw_if_index;
@@ -88,7 +82,8 @@ const u8 *app_namespace_id (app_namespace_t * app_ns);
const u8 *app_namespace_id_from_index (u32 index);
u32 app_namespace_index_from_id (const u8 *ns_id);
void app_namespaces_init (void);
-int vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a);
+session_error_t
+vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t *a);
u32 app_namespace_get_fib_index (app_namespace_t * app_ns, u8 fib_proto);
session_table_t *app_namespace_get_local_table (app_namespace_t * app_ns);
diff --git a/src/vnet/session/application_worker.c b/src/vnet/session/application_worker.c
index be8a9e86bd5..befdb7c7002 100644
--- a/src/vnet/session/application_worker.c
+++ b/src/vnet/session/application_worker.c
@@ -26,6 +26,7 @@ app_worker_t *
app_worker_alloc (application_t * app)
{
app_worker_t *app_wrk;
+
pool_get (app_workers, app_wrk);
clib_memset (app_wrk, 0, sizeof (*app_wrk));
app_wrk->wrk_index = app_wrk - app_workers;
@@ -33,6 +34,8 @@ app_worker_alloc (application_t * app)
app_wrk->wrk_map_index = ~0;
app_wrk->connects_seg_manager = APP_INVALID_SEGMENT_MANAGER_INDEX;
clib_spinlock_init (&app_wrk->detached_seg_managers_lock);
+ vec_validate (app_wrk->wrk_evts, vlib_num_workers ());
+ vec_validate (app_wrk->wrk_mq_congested, vlib_num_workers ());
APP_DBG ("New app %v worker %u", app->name, app_wrk->wrk_index);
return app_wrk;
}
@@ -55,26 +58,34 @@ void
app_worker_free (app_worker_t * app_wrk)
{
application_t *app = application_get (app_wrk->app_index);
+ session_handle_t handle, *handles = 0, *sh;
vnet_unlisten_args_t _a, *a = &_a;
- u64 handle, *handles = 0, *sm_indices = 0;
segment_manager_t *sm;
- session_handle_t *sh;
+ u64 *sm_indices = 0;
session_t *ls;
u32 sm_index;
int i;
/*
+ * Cleanup vpp wrk events
+ */
+ app_worker_del_all_events (app_wrk);
+ for (i = 0; i < vec_len (app_wrk->wrk_evts); i++)
+ clib_fifo_free (app_wrk->wrk_evts[i]);
+
+ vec_free (app_wrk->wrk_evts);
+ vec_free (app_wrk->wrk_mq_congested);
+
+ /*
* Listener cleanup
*/
- /* *INDENT-OFF* */
hash_foreach (handle, sm_index, app_wrk->listeners_table, ({
ls = listen_session_get_from_handle (handle);
vec_add1 (handles, app_listen_session_handle (ls));
vec_add1 (sm_indices, sm_index);
sm = segment_manager_get (sm_index);
}));
- /* *INDENT-ON* */
for (i = 0; i < vec_len (handles); i++)
{
@@ -91,7 +102,7 @@ app_worker_free (app_worker_t * app_wrk)
segment_manager_init_free (sm);
}
}
- vec_reset_length (handles);
+ vec_free (handles);
vec_free (sm_indices);
hash_free (app_wrk->listeners_table);
@@ -175,31 +186,85 @@ app_worker_alloc_session_fifos (segment_manager_t * sm, session_t * s)
}
int
+app_worker_alloc_wrk_cl_session (app_worker_t *app_wrk, session_t *ls)
+{
+ svm_fifo_t *rx_fifo = 0, *tx_fifo = 0;
+ segment_manager_t *sm;
+ session_handle_t lsh;
+ app_listener_t *al;
+ session_t *s;
+
+ al = app_listener_get (ls->al_index);
+ sm = app_worker_get_listen_segment_manager (app_wrk, ls);
+ lsh = session_handle (ls);
+
+ s = session_alloc (0 /* listener on main worker */);
+ session_set_state (s, SESSION_STATE_LISTENING);
+ s->flags |= SESSION_F_IS_CLESS;
+ s->app_wrk_index = app_wrk->wrk_index;
+ ls = session_get_from_handle (lsh);
+ s->session_type = ls->session_type;
+ s->connection_index = ls->connection_index;
+
+ segment_manager_alloc_session_fifos (sm, s->thread_index, &rx_fifo,
+ &tx_fifo);
+
+ rx_fifo->shr->master_session_index = s->session_index;
+ rx_fifo->master_thread_index = s->thread_index;
+
+ tx_fifo->shr->master_session_index = s->session_index;
+ tx_fifo->master_thread_index = s->thread_index;
+
+ s->rx_fifo = rx_fifo;
+ s->tx_fifo = tx_fifo;
+
+ vec_validate (al->cl_listeners, app_wrk->wrk_map_index);
+ al->cl_listeners[app_wrk->wrk_map_index] = s->session_index;
+
+ return 0;
+}
+
+void
+app_worker_free_wrk_cl_session (app_worker_t *app_wrk, session_t *ls)
+{
+ app_listener_t *al;
+ session_t *s;
+
+ al = app_listener_get (ls->al_index);
+
+ s = app_listener_get_wrk_cl_session (al, app_wrk->wrk_map_index);
+ segment_manager_dealloc_fifos (s->rx_fifo, s->tx_fifo);
+ session_free (s);
+
+ al->cl_listeners[app_wrk->wrk_map_index] = SESSION_INVALID_INDEX;
+}
+
+int
app_worker_init_listener (app_worker_t * app_wrk, session_t * ls)
{
segment_manager_t *sm;
/* Allocate segment manager. All sessions derived out of a listen session
- * have fifos allocated by the same segment manager. */
+ * have fifos allocated by the same segment manager.
+ * TODO(fcoras): limit memory consumption by cless listeners */
if (!(sm = app_worker_alloc_segment_manager (app_wrk)))
return SESSION_E_ALLOC;
+ /* Once the first segment is mapped, don't remove it until unlisten */
+ sm->first_is_protected = 1;
+
/* Keep track of the segment manager for the listener or this worker */
hash_set (app_wrk->listeners_table, listen_session_get_handle (ls),
segment_manager_index (sm));
- if (transport_connection_is_cless (session_get_transport (ls)))
- {
- if (ls->rx_fifo)
- return SESSION_E_NOSUPPORT;
- return app_worker_alloc_session_fifos (sm, ls);
- }
+ if (ls->flags & SESSION_F_IS_CLESS)
+ return app_worker_alloc_wrk_cl_session (app_wrk, ls);
+
return 0;
}
-int
-app_worker_start_listen (app_worker_t * app_wrk,
- app_listener_t * app_listener)
+session_error_t
+app_worker_start_listen (app_worker_t *app_wrk, app_listener_t *app_listener)
{
session_t *ls;
int rv;
@@ -263,17 +328,14 @@ app_worker_stop_listen_session (app_worker_t * app_wrk, session_t * ls)
if (PREDICT_FALSE (!sm_indexp))
return;
- /* Dealloc fifos, if any (dgram listeners) */
- if (ls->rx_fifo)
- {
- segment_manager_dealloc_fifos (ls->rx_fifo, ls->tx_fifo);
- ls->tx_fifo = ls->rx_fifo = 0;
- }
+ if (ls->flags & SESSION_F_IS_CLESS)
+ app_worker_free_wrk_cl_session (app_wrk, ls);
/* Try to cleanup segment manager */
sm = segment_manager_get (*sm_indexp);
if (sm)
{
+ sm->first_is_protected = 0;
segment_manager_app_detach (sm);
if (!segment_manager_has_fifos (sm))
{
@@ -334,8 +396,10 @@ app_worker_init_accepted (session_t * s)
listener = listen_session_get_from_handle (s->listener_handle);
app_wrk = application_listener_select_worker (listener);
- s->app_wrk_index = app_wrk->wrk_index;
+ if (PREDICT_FALSE (app_worker_mq_is_congested (app_wrk)))
+ return -1;
+ s->app_wrk_index = app_wrk->wrk_index;
app = application_get (app_wrk->app_index);
if (app->cb_fns.fifo_tuning_callback)
s->flags |= SESSION_F_CUSTOM_FIFO_TUNING;
@@ -348,10 +412,35 @@ app_worker_init_accepted (session_t * s)
}
int
+app_worker_listened_notify (app_worker_t *app_wrk, session_handle_t alsh,
+ u32 opaque, session_error_t err)
+{
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_BOUND,
+ .as_u64[0] = alsh,
+ .as_u64[1] = (u64) opaque << 32 | (u32) err };
+
+ app_worker_add_event_custom (app_wrk, 0 /* thread index */, &evt);
+
+ return 0;
+}
+
+int
+app_worker_unlisten_reply (app_worker_t *app_wrk, session_handle_t sh,
+ u32 opaque, session_error_t err)
+{
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_UNLISTEN_REPLY,
+ .as_u64[0] = sh,
+ .as_u64[1] = (u64) opaque << 32 | (u32) err };
+
+ app_worker_add_event_custom (app_wrk, 0 /* thread index */, &evt);
+ return 0;
+}
+
+int
app_worker_accept_notify (app_worker_t * app_wrk, session_t * s)
{
- application_t *app = application_get (app_wrk->app_index);
- return app->cb_fns.session_accept_callback (s);
+ app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_ACCEPTED);
+ return 0;
}
int
@@ -365,7 +454,7 @@ app_worker_init_connected (app_worker_t * app_wrk, session_t * s)
/* Allocate fifos for session, unless the app is a builtin proxy */
if (application_is_builtin_proxy (app))
- return 0;
+ return app->cb_fns.proxy_alloc_session_fifos (s);
sm = app_worker_get_connect_segment_manager (app_wrk);
return app_worker_alloc_session_fifos (sm, s);
@@ -375,9 +464,13 @@ int
app_worker_connect_notify (app_worker_t * app_wrk, session_t * s,
session_error_t err, u32 opaque)
{
- application_t *app = application_get (app_wrk->app_index);
- return app->cb_fns.session_connected_callback (app_wrk->wrk_index, opaque,
- s, err);
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_CONNECTED,
+ .as_u64[0] = s ? s->session_index : ~0,
+ .as_u64[1] = (u64) opaque << 32 | (u32) err };
+ u32 thread_index = s ? s->thread_index : vlib_get_thread_index ();
+
+ app_worker_add_event_custom (app_wrk, thread_index, &evt);
+ return 0;
}
int
@@ -385,7 +478,7 @@ app_worker_add_half_open (app_worker_t *app_wrk, session_handle_t sh)
{
session_handle_t *shp;
- ASSERT (vlib_get_thread_index () == 0);
+ ASSERT (session_vlib_thread_is_cl_thread ());
pool_get (app_wrk->half_open_table, shp);
*shp = sh;
@@ -395,36 +488,28 @@ app_worker_add_half_open (app_worker_t *app_wrk, session_handle_t sh)
int
app_worker_del_half_open (app_worker_t *app_wrk, session_t *s)
{
- application_t *app = application_get (app_wrk->app_index);
- ASSERT (vlib_get_thread_index () <= 1);
- pool_put_index (app_wrk->half_open_table, s->ho_index);
- if (app->cb_fns.half_open_cleanup_callback)
- app->cb_fns.half_open_cleanup_callback (s);
+ app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_HALF_CLEANUP);
return 0;
}
int
app_worker_close_notify (app_worker_t * app_wrk, session_t * s)
{
- application_t *app = application_get (app_wrk->app_index);
- app->cb_fns.session_disconnect_callback (s);
+ app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_DISCONNECTED);
return 0;
}
int
app_worker_transport_closed_notify (app_worker_t * app_wrk, session_t * s)
{
- application_t *app = application_get (app_wrk->app_index);
- if (app->cb_fns.session_transport_closed_callback)
- app->cb_fns.session_transport_closed_callback (s);
+ app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_TRANSPORT_CLOSED);
return 0;
}
int
app_worker_reset_notify (app_worker_t * app_wrk, session_t * s)
{
- application_t *app = application_get (app_wrk->app_index);
- app->cb_fns.session_reset_callback (s);
+ app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_RESET);
return 0;
}
@@ -432,29 +517,33 @@ int
app_worker_cleanup_notify (app_worker_t * app_wrk, session_t * s,
session_cleanup_ntf_t ntf)
{
- application_t *app = application_get (app_wrk->app_index);
- if (app->cb_fns.session_cleanup_callback)
- app->cb_fns.session_cleanup_callback (s, ntf);
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_CLEANUP,
+ .as_u64[0] = (u64) ntf << 32 | s->session_index,
+ .as_u64[1] = pointer_to_uword (session_cleanup) };
+
+ app_worker_add_event_custom (app_wrk, s->thread_index, &evt);
+
return 0;
}
int
-app_worker_builtin_rx (app_worker_t * app_wrk, session_t * s)
+app_worker_cleanup_notify_custom (app_worker_t *app_wrk, session_t *s,
+ session_cleanup_ntf_t ntf,
+ void (*cleanup_cb) (session_t *s))
{
- application_t *app = application_get (app_wrk->app_index);
- app->cb_fns.builtin_app_rx_callback (s);
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_CLEANUP,
+ .as_u64[0] = (u64) ntf << 32 | s->session_index,
+ .as_u64[1] = pointer_to_uword (cleanup_cb) };
+
+ app_worker_add_event_custom (app_wrk, s->thread_index, &evt);
+
return 0;
}
int
-app_worker_builtin_tx (app_worker_t * app_wrk, session_t * s)
+app_worker_rx_notify (app_worker_t *app_wrk, session_t *s)
{
- application_t *app = application_get (app_wrk->app_index);
-
- if (!app->cb_fns.builtin_app_tx_callback)
- return 0;
-
- app->cb_fns.builtin_app_tx_callback (s);
+ app_worker_add_event (app_wrk, s, SESSION_IO_EVT_RX);
return 0;
}
@@ -462,8 +551,11 @@ int
app_worker_migrate_notify (app_worker_t * app_wrk, session_t * s,
session_handle_t new_sh)
{
- application_t *app = application_get (app_wrk->app_index);
- app->cb_fns.session_migrate_callback (s, new_sh);
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_MIGRATED,
+ .as_u64[0] = s->session_index,
+ .as_u64[1] = new_sh };
+
+ app_worker_add_event_custom (app_wrk, s->thread_index, &evt);
return 0;
}
@@ -472,6 +564,7 @@ app_worker_own_session (app_worker_t * app_wrk, session_t * s)
{
segment_manager_t *sm;
svm_fifo_t *rxf, *txf;
+ int rv;
if (s->session_state == SESSION_STATE_LISTENING)
return application_change_listener_owner (s, app_wrk);
@@ -488,8 +581,8 @@ app_worker_own_session (app_worker_t * app_wrk, session_t * s)
s->tx_fifo = 0;
sm = app_worker_get_connect_segment_manager (app_wrk);
- if (app_worker_alloc_session_fifos (sm, s))
- return -1;
+ if ((rv = app_worker_alloc_session_fifos (sm, s)))
+ return rv;
if (!svm_fifo_is_empty_cons (rxf))
svm_fifo_clone (s->rx_fifo, rxf);
@@ -506,6 +599,9 @@ int
app_worker_connect_session (app_worker_t *app_wrk, session_endpoint_cfg_t *sep,
session_handle_t *rsh)
{
+ if (PREDICT_FALSE (app_worker_mq_is_congested (app_wrk)))
+ return SESSION_E_REFUSED;
+
sep->app_wrk_index = app_wrk->wrk_index;
return session_open (sep, rsh);
@@ -549,14 +645,12 @@ app_worker_first_listener (app_worker_t * app_wrk, u8 fib_proto,
sst = session_type_from_proto_and_ip (transport_proto,
fib_proto == FIB_PROTOCOL_IP4);
- /* *INDENT-OFF* */
hash_foreach (handle, sm_index, app_wrk->listeners_table, ({
listener = listen_session_get_from_handle (handle);
if (listener->session_type == sst
&& !(listener->flags & SESSION_F_PROXY))
return listener;
}));
- /* *INDENT-ON* */
return 0;
}
@@ -573,13 +667,11 @@ app_worker_proxy_listener (app_worker_t * app_wrk, u8 fib_proto,
sst = session_type_from_proto_and_ip (transport_proto,
fib_proto == FIB_PROTOCOL_IP4);
- /* *INDENT-OFF* */
hash_foreach (handle, sm_index, app_wrk->listeners_table, ({
listener = listen_session_get_from_handle (handle);
if (listener->session_type == sst && (listener->flags & SESSION_F_PROXY))
return listener;
}));
- /* *INDENT-ON* */
return 0;
}
@@ -590,130 +682,178 @@ app_worker_proxy_listener (app_worker_t * app_wrk, u8 fib_proto,
int
app_worker_add_segment_notify (app_worker_t * app_wrk, u64 segment_handle)
{
- application_t *app = application_get (app_wrk->app_index);
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_APP_ADD_SEGMENT,
+ .as_u64[1] = segment_handle };
- return app->cb_fns.add_segment_callback (app_wrk->wrk_index,
- segment_handle);
+ app_worker_add_event_custom (app_wrk, vlib_get_thread_index (), &evt);
+
+ return 0;
}
int
app_worker_del_segment_notify (app_worker_t * app_wrk, u64 segment_handle)
{
- application_t *app = application_get (app_wrk->app_index);
- return app->cb_fns.del_segment_callback (app_wrk->wrk_index,
- segment_handle);
-}
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_APP_DEL_SEGMENT,
+ .as_u64[1] = segment_handle };
-static inline u8
-app_worker_application_is_builtin (app_worker_t * app_wrk)
-{
- return app_wrk->app_is_builtin;
+ app_worker_add_event_custom (app_wrk, vlib_get_thread_index (), &evt);
+
+ return 0;
}
-static inline int
-app_send_io_evt_rx (app_worker_t * app_wrk, session_t * s)
+static int
+app_wrk_send_fd (app_worker_t *app_wrk, int fd)
{
- session_event_t *evt;
- svm_msg_q_msg_t msg;
- svm_msg_q_t *mq;
+ if (!appns_sapi_enabled ())
+ {
+ vl_api_registration_t *reg;
+ clib_error_t *error;
- if (app_worker_application_is_builtin (app_wrk))
- return app_worker_builtin_rx (app_wrk, s);
+ reg =
+ vl_mem_api_client_index_to_registration (app_wrk->api_client_index);
+ if (!reg)
+ {
+ clib_warning ("no api registration for client: %u",
+ app_wrk->api_client_index);
+ return -1;
+ }
- if (svm_fifo_has_event (s->rx_fifo))
- return 0;
+ if (vl_api_registration_file_index (reg) == VL_API_INVALID_FI)
+ return -1;
- mq = app_wrk->event_queue;
- svm_msg_q_lock (mq);
+ error = vl_api_send_fd_msg (reg, &fd, 1);
+ if (error)
+ {
+ clib_error_report (error);
+ return -1;
+ }
- if (PREDICT_FALSE (svm_msg_q_is_full (mq)))
- {
- clib_warning ("evt q full");
- svm_msg_q_unlock (mq);
- return -1;
+ return 0;
}
- if (PREDICT_FALSE (svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING)))
+ app_sapi_msg_t smsg = { 0 };
+ app_namespace_t *app_ns;
+ clib_error_t *error;
+ application_t *app;
+ clib_socket_t *cs;
+ u32 cs_index;
+
+ app = application_get (app_wrk->app_index);
+ app_ns = app_namespace_get (app->ns_index);
+ cs_index = appns_sapi_handle_sock_index (app_wrk->api_client_index);
+ cs = appns_sapi_get_socket (app_ns, cs_index);
+ if (PREDICT_FALSE (!cs))
+ return -1;
+
+ /* There's no payload for the message only the type */
+ smsg.type = APP_SAPI_MSG_TYPE_SEND_FDS;
+ error = clib_socket_sendmsg (cs, &smsg, sizeof (smsg), &fd, 1);
+ if (error)
{
- clib_warning ("evt q rings full");
- svm_msg_q_unlock (mq);
+ clib_error_report (error);
return -1;
}
- msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
- evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg);
- evt->session_index = s->rx_fifo->shr->client_session_index;
- evt->event_type = SESSION_IO_EVT_RX;
-
- (void) svm_fifo_set_event (s->rx_fifo);
- svm_msg_q_add_and_unlock (mq, &msg);
-
return 0;
}
-static inline int
-app_send_io_evt_tx (app_worker_t * app_wrk, session_t * s)
+void
+app_worker_add_event (app_worker_t *app_wrk, session_t *s,
+ session_evt_type_t evt_type)
{
- svm_msg_q_t *mq;
session_event_t *evt;
- svm_msg_q_msg_t msg;
- if (app_worker_application_is_builtin (app_wrk))
- return app_worker_builtin_tx (app_wrk, s);
+ ASSERT (s->thread_index == vlib_get_thread_index ());
+ clib_fifo_add2 (app_wrk->wrk_evts[s->thread_index], evt);
+ evt->session_index = s->session_index;
+ evt->event_type = evt_type;
+ evt->postponed = 0;
- mq = app_wrk->event_queue;
- svm_msg_q_lock (mq);
-
- if (PREDICT_FALSE (svm_msg_q_is_full (mq)))
+ /* First event for this app_wrk. Schedule it for handling in session input */
+ if (clib_fifo_elts (app_wrk->wrk_evts[s->thread_index]) == 1)
{
- clib_warning ("evt q full");
- svm_msg_q_unlock (mq);
- return -1;
+ session_worker_t *wrk = session_main_get_worker (s->thread_index);
+ session_wrk_program_app_wrk_evts (wrk, app_wrk->wrk_index);
}
+}
+
+void
+app_worker_add_event_custom (app_worker_t *app_wrk, u32 thread_index,
+ session_event_t *evt)
+{
+ clib_fifo_add1 (app_wrk->wrk_evts[thread_index], *evt);
- if (PREDICT_FALSE (svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING)))
+ /* First event for this app_wrk. Schedule it for handling in session input */
+ if (clib_fifo_elts (app_wrk->wrk_evts[thread_index]) == 1)
{
- clib_warning ("evt q rings full");
- svm_msg_q_unlock (mq);
- return -1;
+ session_worker_t *wrk = session_main_get_worker (thread_index);
+ session_wrk_program_app_wrk_evts (wrk, app_wrk->wrk_index);
}
+}
- msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
- evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg);
- evt->event_type = SESSION_IO_EVT_TX;
- evt->session_index = s->tx_fifo->shr->client_session_index;
+always_inline void
+app_wrk_send_ctrl_evt_inline (app_worker_t *app_wrk, u8 evt_type, void *msg,
+ u32 msg_len, int fd)
+{
+ svm_msg_q_msg_t _mq_msg, *mq_msg = &_mq_msg;
+ svm_msg_q_t *mq = app_wrk->event_queue;
+ session_event_t *evt;
- svm_msg_q_add_and_unlock (mq, &msg);
- return 0;
+ ASSERT (!svm_msg_q_or_ring_is_full (mq, SESSION_MQ_CTRL_EVT_RING));
+ *mq_msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_CTRL_EVT_RING);
+
+ evt = svm_msg_q_msg_data (mq, mq_msg);
+ clib_memset (evt, 0, sizeof (*evt));
+ evt->event_type = evt_type;
+ clib_memcpy_fast (evt->data, msg, msg_len);
+
+ if (fd != -1)
+ app_wrk_send_fd (app_wrk, fd);
+
+ svm_msg_q_add_raw (mq, mq_msg);
+}
+
+void
+app_wrk_send_ctrl_evt_fd (app_worker_t *app_wrk, u8 evt_type, void *msg,
+ u32 msg_len, int fd)
+{
+ app_wrk_send_ctrl_evt_inline (app_wrk, evt_type, msg, msg_len, fd);
}
-/* *INDENT-OFF* */
-typedef int (app_send_evt_handler_fn) (app_worker_t *app,
- session_t *s);
-static app_send_evt_handler_fn * const app_send_evt_handler_fns[2] = {
- app_send_io_evt_rx,
- app_send_io_evt_tx,
-};
-/* *INDENT-ON* */
+void
+app_wrk_send_ctrl_evt (app_worker_t *app_wrk, u8 evt_type, void *msg,
+ u32 msg_len)
+{
+ app_wrk_send_ctrl_evt_inline (app_wrk, evt_type, msg, msg_len, -1);
+}
-/**
- * Send event to application
- *
- * Logic from queue perspective is blocking. However, if queue is full,
- * we return.
- */
-int
-app_worker_lock_and_send_event (app_worker_t * app, session_t * s,
- u8 evt_type)
+u8
+app_worker_mq_wrk_is_congested (app_worker_t *app_wrk, u32 thread_index)
+{
+ return app_wrk->wrk_mq_congested[thread_index] > 0;
+}
+
+void
+app_worker_set_mq_wrk_congested (app_worker_t *app_wrk, u32 thread_index)
+{
+ clib_atomic_fetch_add_relax (&app_wrk->mq_congested, 1);
+ ASSERT (thread_index == vlib_get_thread_index ());
+ app_wrk->wrk_mq_congested[thread_index] = 1;
+}
+
+void
+app_worker_unset_wrk_mq_congested (app_worker_t *app_wrk, u32 thread_index)
{
- return app_send_evt_handler_fns[evt_type] (app, s);
+ clib_atomic_fetch_sub_relax (&app_wrk->mq_congested, 1);
+ ASSERT (thread_index == vlib_get_thread_index ());
+ app_wrk->wrk_mq_congested[thread_index] = 0;
}
u8 *
format_app_worker_listener (u8 * s, va_list * args)
{
app_worker_t *app_wrk = va_arg (*args, app_worker_t *);
- u64 handle = va_arg (*args, u64);
+ session_handle_t handle = va_arg (*args, u64);
u32 sm_index = va_arg (*args, u32);
int verbose = va_arg (*args, int);
session_t *listener;
@@ -760,10 +900,12 @@ format_app_worker (u8 * s, va_list * args)
app_worker_t *app_wrk = va_arg (*args, app_worker_t *);
u32 indent = 1;
- s = format (s, "%U wrk-index %u app-index %u map-index %u "
- "api-client-index %d\n", format_white_space, indent,
- app_wrk->wrk_index, app_wrk->app_index, app_wrk->wrk_map_index,
- app_wrk->api_client_index);
+ s = format (s,
+ "%U wrk-index %u app-index %u map-index %u "
+ "api-client-index %d mq-cong %u\n",
+ format_white_space, indent, app_wrk->wrk_index,
+ app_wrk->app_index, app_wrk->wrk_map_index,
+ app_wrk->api_client_index, app_wrk->mq_congested);
return s;
}
diff --git a/src/vnet/session/mma_template.h b/src/vnet/session/mma_template.h
index dc3545a4ffe..2c0230c2869 100644
--- a/src/vnet/session/mma_template.h
+++ b/src/vnet/session/mma_template.h
@@ -41,11 +41,9 @@ typedef struct
{
u32 action_index;
u32 *next_indices;
- /* *INDENT-OFF* */
RTT (mma_mask_or_match) mask;
RTT (mma_mask_or_match) match;
RTT (mma_mask_or_match) max_match;
- /* *INDENT-ON* */
} RTT (mma_rule);
typedef int (*RTT (rule_cmp_fn)) (RTT (mma_rule) * rule1,
diff --git a/src/vnet/session/segment_manager.c b/src/vnet/session/segment_manager.c
index e04f626eab2..80bebdca9b5 100644
--- a/src/vnet/session/segment_manager.c
+++ b/src/vnet/session/segment_manager.c
@@ -89,28 +89,30 @@ segment_manager_segment_index (segment_manager_t * sm, fifo_segment_t * seg)
*/
static inline int
segment_manager_add_segment_inline (segment_manager_t *sm, uword segment_size,
- u8 notify_app, u8 flags)
+ u8 notify_app, u8 flags, u8 need_lock)
{
segment_manager_main_t *smm = &sm_main;
segment_manager_props_t *props;
+ app_worker_t *app_wrk;
fifo_segment_t *fs;
u32 fs_index = ~0;
u8 *seg_name;
int rv;
props = segment_manager_properties_get (sm);
+ app_wrk = app_worker_get (sm->app_wrk_index);
/* Not configured for addition of new segments and not first */
if (!props->add_segment && !segment_size)
{
- clib_warning ("cannot allocate new segment");
- return VNET_API_ERROR_INVALID_VALUE;
+ SESSION_DBG ("cannot allocate new segment");
+ return SESSION_E_INVALID;
}
/*
* Allocate fifo segment and grab lock if needed
*/
- if (vlib_num_workers ())
+ if (need_lock)
clib_rwlock_writer_lock (&sm->segments_rwlock);
pool_get_zero (sm->segments, fs);
@@ -119,18 +121,24 @@ segment_manager_add_segment_inline (segment_manager_t *sm, uword segment_size,
* Allocate ssvm segment
*/
segment_size = segment_size ? segment_size : props->add_segment_size;
- segment_size = round_pow2 (segment_size, clib_mem_get_page_size ());
-
- if (props->segment_type != SSVM_SEGMENT_PRIVATE)
+ /* add overhead to ensure the result segment size is at least
+ * of that requested */
+ segment_size +=
+ sizeof (fifo_segment_header_t) +
+ vlib_thread_main.n_vlib_mains * sizeof (fifo_segment_slice_t) +
+ FIFO_SEGMENT_ALLOC_OVERHEAD;
+
+ if (props->huge_page)
{
- seg_name = format (0, "%d-%d%c", getpid (), smm->seg_name_counter++, 0);
+ uword hugepage_size = clib_mem_get_default_hugepage_size ();
+ segment_size = round_pow2 (segment_size, hugepage_size);
+ fs->ssvm.huge_page = 1;
}
else
- {
- app_worker_t *app_wrk = app_worker_get (sm->app_wrk_index);
- application_t *app = application_get (app_wrk->app_index);
- seg_name = format (0, "%v segment%c", app->name, 0);
- }
+ segment_size = round_pow2 (segment_size, clib_mem_get_page_size ());
+
+ seg_name = format (0, "seg-%u-%u-%u%c", app_wrk->app_index,
+ app_wrk->wrk_index, smm->seg_name_counter++, 0);
fs->ssvm.ssvm_size = segment_size;
fs->ssvm.name = seg_name;
@@ -154,6 +162,8 @@ segment_manager_add_segment_inline (segment_manager_t *sm, uword segment_size,
* Save segment index before dropping lock, if any held
*/
fs_index = fs - sm->segments;
+ fs->fs_index = fs_index;
+ fs->sm_index = segment_manager_index (sm);
/*
* Set watermarks in segment
@@ -172,11 +182,14 @@ segment_manager_add_segment_inline (segment_manager_t *sm, uword segment_size,
app_wrk = app_worker_get (sm->app_wrk_index);
rv = app_worker_add_segment_notify (app_wrk, fs_handle);
if (rv)
- return rv;
+ {
+ fs_index = rv;
+ goto done;
+ }
}
done:
- if (vlib_num_workers ())
+ if (need_lock)
clib_rwlock_writer_unlock (&sm->segments_rwlock);
return fs_index;
@@ -186,14 +199,16 @@ int
segment_manager_add_segment (segment_manager_t *sm, uword segment_size,
u8 notify_app)
{
- return segment_manager_add_segment_inline (sm, segment_size, notify_app, 0);
+ return segment_manager_add_segment_inline (sm, segment_size, notify_app,
+ 0 /* flags */, 0 /* need_lock */);
}
int
segment_manager_add_segment2 (segment_manager_t *sm, uword segment_size,
u8 flags)
{
- return segment_manager_add_segment_inline (sm, segment_size, 0, flags);
+ return segment_manager_add_segment_inline (sm, segment_size, 0, flags,
+ vlib_num_workers ());
}
/**
@@ -235,7 +250,8 @@ segment_manager_get_segment_if_valid (segment_manager_t * sm,
* Removes segment after acquiring writer lock
*/
static inline void
-sm_lock_and_del_segment_inline (segment_manager_t * sm, u32 fs_index)
+sm_lock_and_del_segment_inline (segment_manager_t *sm, u32 fs_index,
+ u8 check_if_empty)
{
fifo_segment_t *fs;
u8 is_prealloc;
@@ -246,6 +262,9 @@ sm_lock_and_del_segment_inline (segment_manager_t * sm, u32 fs_index)
if (!fs)
goto done;
+ if (check_if_empty && fifo_segment_has_fifos (fs))
+ goto done;
+
is_prealloc = fifo_segment_flags (fs) & FIFO_SEGMENT_F_IS_PREALLOCATED;
if (is_prealloc && !segment_manager_app_detached (sm))
goto done;
@@ -259,7 +278,7 @@ done:
void
segment_manager_lock_and_del_segment (segment_manager_t * sm, u32 fs_index)
{
- sm_lock_and_del_segment_inline (sm, fs_index);
+ sm_lock_and_del_segment_inline (sm, fs_index, 0 /* check_if_empty */);
}
/**
@@ -326,12 +345,6 @@ segment_manager_segment_reader_unlock (segment_manager_t * sm)
clib_rwlock_reader_unlock (&sm->segments_rwlock);
}
-void
-segment_manager_segment_writer_unlock (segment_manager_t * sm)
-{
- clib_rwlock_writer_unlock (&sm->segments_rwlock);
-}
-
segment_manager_t *
segment_manager_alloc (void)
{
@@ -405,7 +418,7 @@ segment_manager_init_first (segment_manager_t * sm)
fs_index = segment_manager_add_segment (sm, max_seg_size, 0);
if (fs_index < 0)
{
- clib_warning ("Failed to preallocate segment %d", i);
+ SESSION_DBG ("Failed to preallocate segment %d", i);
return fs_index;
}
@@ -427,7 +440,7 @@ segment_manager_init_first (segment_manager_t * sm)
fs_index = segment_manager_add_segment (sm, first_seg_size, 0);
if (fs_index < 0)
{
- clib_warning ("Failed to allocate segment");
+ SESSION_DBG ("Failed to allocate segment");
return fs_index;
}
@@ -445,7 +458,7 @@ segment_manager_init_first (segment_manager_t * sm)
for (; i < fs->n_slices; i++)
{
if (fifo_segment_prealloc_fifo_hdrs (fs, i, hdrs_per_slice))
- return VNET_API_ERROR_SVM_SEGMENT_CREATE_FAIL;
+ return SESSION_E_SEG_CREATE;
}
}
@@ -486,11 +499,9 @@ segment_manager_free (segment_manager_t * sm)
* the manager is explicitly deleted/detached by the app. */
clib_rwlock_writer_lock (&sm->segments_rwlock);
- /* *INDENT-OFF* */
pool_foreach (fifo_segment, sm->segments) {
segment_manager_del_segment (sm, fifo_segment);
}
- /* *INDENT-ON* */
pool_free (sm->segments);
clib_rwlock_writer_unlock (&sm->segments_rwlock);
@@ -569,7 +580,6 @@ segment_manager_has_fifos (segment_manager_t * sm)
fifo_segment_t *seg;
u8 first = 1;
- /* *INDENT-OFF* */
segment_manager_foreach_segment_w_lock (seg, sm, ({
if (CLIB_DEBUG && !first && !fifo_segment_has_fifos (seg)
&& !(fifo_segment_flags (seg) & FIFO_SEGMENT_F_IS_PREALLOCATED))
@@ -584,7 +594,6 @@ segment_manager_has_fifos (segment_manager_t * sm)
return 1;
}
}));
- /* *INDENT-ON* */
return 0;
}
@@ -604,7 +613,6 @@ segment_manager_del_sessions (segment_manager_t * sm)
ASSERT (pool_elts (sm->segments) != 0);
/* Across all fifo segments used by the server */
- /* *INDENT-OFF* */
segment_manager_foreach_segment_w_lock (fs, sm, ({
for (slice_index = 0; slice_index < fs->n_slices; slice_index++)
{
@@ -629,7 +637,6 @@ segment_manager_del_sessions (segment_manager_t * sm)
* sessions if the segment can be removed.
*/
}));
- /* *INDENT-ON* */
vec_foreach (handle, handles)
{
@@ -695,19 +702,16 @@ segment_manager_del_sessions_filter (segment_manager_t *sm,
}
int
-segment_manager_try_alloc_fifos (fifo_segment_t * fifo_segment,
- u32 thread_index,
+segment_manager_try_alloc_fifos (fifo_segment_t *fs, u32 thread_index,
u32 rx_fifo_size, u32 tx_fifo_size,
- svm_fifo_t ** rx_fifo, svm_fifo_t ** tx_fifo)
+ svm_fifo_t **rx_fifo, svm_fifo_t **tx_fifo)
{
rx_fifo_size = clib_max (rx_fifo_size, sm_main.default_fifo_size);
- *rx_fifo = fifo_segment_alloc_fifo_w_slice (fifo_segment, thread_index,
- rx_fifo_size,
+ *rx_fifo = fifo_segment_alloc_fifo_w_slice (fs, thread_index, rx_fifo_size,
FIFO_SEGMENT_RX_FIFO);
tx_fifo_size = clib_max (tx_fifo_size, sm_main.default_fifo_size);
- *tx_fifo = fifo_segment_alloc_fifo_w_slice (fifo_segment, thread_index,
- tx_fifo_size,
+ *tx_fifo = fifo_segment_alloc_fifo_w_slice (fs, thread_index, tx_fifo_size,
FIFO_SEGMENT_TX_FIFO);
if (*rx_fifo == 0)
@@ -715,45 +719,37 @@ segment_manager_try_alloc_fifos (fifo_segment_t * fifo_segment,
/* This would be very odd, but handle it... */
if (*tx_fifo != 0)
{
- fifo_segment_free_fifo (fifo_segment, *tx_fifo);
+ fifo_segment_free_fifo (fs, *tx_fifo);
*tx_fifo = 0;
}
- return -1;
+ return SESSION_E_SEG_NO_SPACE;
}
if (*tx_fifo == 0)
{
if (*rx_fifo != 0)
{
- fifo_segment_free_fifo (fifo_segment, *rx_fifo);
+ fifo_segment_free_fifo (fs, *rx_fifo);
*rx_fifo = 0;
}
- return -1;
+ return SESSION_E_SEG_NO_SPACE;
}
return 0;
}
-int
-segment_manager_alloc_session_fifos (segment_manager_t * sm,
- u32 thread_index,
- svm_fifo_t ** rx_fifo,
- svm_fifo_t ** tx_fifo)
+static inline int
+sm_lookup_segment_and_alloc_fifos (segment_manager_t *sm,
+ segment_manager_props_t *props,
+ u32 thread_index, svm_fifo_t **rx_fifo,
+ svm_fifo_t **tx_fifo)
{
- int alloc_fail = 1, rv = 0, new_fs_index;
- uword free_bytes, max_free_bytes = 0;
- segment_manager_props_t *props;
- fifo_segment_t *fs = 0, *cur;
- u32 sm_index, fs_index;
-
- props = segment_manager_properties_get (sm);
-
- /*
- * Find the first free segment to allocate the fifos in
- */
+ uword free_bytes, max_free_bytes;
+ fifo_segment_t *cur, *fs = 0;
- segment_manager_segment_reader_lock (sm);
+ max_free_bytes = props->rx_fifo_size + props->tx_fifo_size - 1;
- pool_foreach (cur, sm->segments) {
+ pool_foreach (cur, sm->segments)
+ {
if (fifo_segment_flags (cur) & FIFO_SEGMENT_F_CUSTOM_USE)
continue;
free_bytes = fifo_segment_available_bytes (cur);
@@ -762,63 +758,93 @@ segment_manager_alloc_session_fifos (segment_manager_t * sm,
max_free_bytes = free_bytes;
fs = cur;
}
- }
-
- if (fs)
- {
- alloc_fail = segment_manager_try_alloc_fifos (fs, thread_index,
- props->rx_fifo_size,
- props->tx_fifo_size,
- rx_fifo, tx_fifo);
- /* On success, keep lock until fifos are initialized */
- if (!alloc_fail)
- goto alloc_success;
}
- segment_manager_segment_reader_unlock (sm);
+ if (PREDICT_FALSE (!fs))
+ return SESSION_E_SEG_NO_SPACE;
- /*
- * Allocation failed, see if we can add a new segment
- */
- if (props->add_segment)
+ return segment_manager_try_alloc_fifos (
+ fs, thread_index, props->rx_fifo_size, props->tx_fifo_size, rx_fifo,
+ tx_fifo);
+}
+
+static int
+sm_lock_and_alloc_segment_and_fifos (segment_manager_t *sm,
+ segment_manager_props_t *props,
+ u32 thread_index, svm_fifo_t **rx_fifo,
+ svm_fifo_t **tx_fifo)
+{
+ int new_fs_index, rv;
+ fifo_segment_t *fs;
+
+ if (!props->add_segment)
+ return SESSION_E_SEG_NO_SPACE;
+
+ clib_rwlock_writer_lock (&sm->segments_rwlock);
+
+ /* Make sure there really is no free space. Another worker might've freed
+ * some fifos or allocated a segment */
+ rv = sm_lookup_segment_and_alloc_fifos (sm, props, thread_index, rx_fifo,
+ tx_fifo);
+ if (!rv)
+ goto done;
+
+ new_fs_index =
+ segment_manager_add_segment (sm, 0 /* segment_size*/, 1 /* notify_app */);
+ if (new_fs_index < 0)
{
- if ((new_fs_index = segment_manager_add_segment (sm, 0, 1)) < 0)
- {
- clib_warning ("Failed to add new segment");
- return SESSION_E_SEG_CREATE;
- }
- fs = segment_manager_get_segment_w_lock (sm, new_fs_index);
- alloc_fail = segment_manager_try_alloc_fifos (fs, thread_index,
- props->rx_fifo_size,
- props->tx_fifo_size,
- rx_fifo, tx_fifo);
- if (alloc_fail)
- {
- clib_warning ("Added a segment, still can't allocate a fifo");
- segment_manager_segment_reader_unlock (sm);
- return SESSION_E_SEG_NO_SPACE2;
- }
+ rv = SESSION_E_SEG_CREATE;
+ goto done;
}
- else
+ fs = segment_manager_get_segment (sm, new_fs_index);
+ rv = segment_manager_try_alloc_fifos (fs, thread_index, props->rx_fifo_size,
+ props->tx_fifo_size, rx_fifo, tx_fifo);
+ if (rv)
{
- SESSION_DBG ("Can't add new seg and no space to allocate fifos!");
- return SESSION_E_SEG_NO_SPACE;
+ SESSION_DBG ("Added a segment, still can't allocate a fifo");
+ rv = SESSION_E_SEG_NO_SPACE2;
+ goto done;
}
-alloc_success:
- ASSERT (rx_fifo && tx_fifo);
+done:
+
+ clib_rwlock_writer_unlock (&sm->segments_rwlock);
+
+ return rv;
+}
+
+int
+segment_manager_alloc_session_fifos (segment_manager_t * sm,
+ u32 thread_index,
+ svm_fifo_t ** rx_fifo,
+ svm_fifo_t ** tx_fifo)
+{
+ segment_manager_props_t *props;
+ int rv;
+
+ props = segment_manager_properties_get (sm);
- sm_index = segment_manager_index (sm);
- fs_index = segment_manager_segment_index (sm, fs);
- (*tx_fifo)->segment_manager = sm_index;
- (*rx_fifo)->segment_manager = sm_index;
- (*tx_fifo)->segment_index = fs_index;
- (*rx_fifo)->segment_index = fs_index;
+ /*
+ * Fast path: find the first segment with enough free space and
+ * try to allocate the fifos. Done with reader lock
+ */
+
+ segment_manager_segment_reader_lock (sm);
+
+ rv = sm_lookup_segment_and_alloc_fifos (sm, props, thread_index, rx_fifo,
+ tx_fifo);
- /* Drop the lock after app is notified */
segment_manager_segment_reader_unlock (sm);
- return rv;
+ /*
+ * Slow path: if no fifo segment or alloc fail grab writer lock and try
+ * to allocate new segment
+ */
+ if (PREDICT_FALSE (rv < 0))
+ return sm_lock_and_alloc_segment_and_fifos (sm, props, thread_index,
+ rx_fifo, tx_fifo);
+
+ return 0;
}
void
@@ -827,10 +853,15 @@ segment_manager_dealloc_fifos (svm_fifo_t * rx_fifo, svm_fifo_t * tx_fifo)
segment_manager_t *sm;
fifo_segment_t *fs;
u32 segment_index;
+ u8 try_delete = 0;
if (!rx_fifo || !tx_fifo)
return;
+ /* Thread that allocated the fifos must be the one to clean them up */
+ ASSERT (rx_fifo->master_thread_index == vlib_get_thread_index () ||
+ rx_fifo->refcnt > 1 || vlib_thread_is_main_w_barrier ());
+
/* It's possible to have no segment manager if the session was removed
* as result of a detach. */
if (!(sm = segment_manager_get_if_valid (rx_fifo->segment_manager)))
@@ -842,26 +873,30 @@ segment_manager_dealloc_fifos (svm_fifo_t * rx_fifo, svm_fifo_t * tx_fifo)
fifo_segment_free_fifo (fs, tx_fifo);
/*
- * Try to remove svm segment if it has no fifos. This can be done only if
+ * Try to remove fifo segment if it has no fifos. This can be done only if
* the segment is not the first in the segment manager or if it is first
* and it is not protected. Moreover, if the segment is first and the app
* has detached from the segment manager, remove the segment manager.
*/
if (!fifo_segment_has_fifos (fs))
{
- segment_manager_segment_reader_unlock (sm);
+ /* If first, remove only if not protected */
+ try_delete = segment_index != 0 || !sm->first_is_protected;
+ }
+
+ segment_manager_segment_reader_unlock (sm);
- /* Remove segment if it holds no fifos or first but not protected */
- if (segment_index != 0 || !sm->first_is_protected)
- sm_lock_and_del_segment_inline (sm, segment_index);
+ if (PREDICT_FALSE (try_delete))
+ {
+ /* Only remove if empty after writer lock acquired */
+ sm_lock_and_del_segment_inline (sm, segment_index,
+ 1 /* check_if_empty */);
/* Remove segment manager if no sessions and detached from app */
if (segment_manager_app_detached (sm)
&& !segment_manager_has_fifos (sm))
segment_manager_free_safe (sm);
}
- else
- segment_manager_segment_reader_unlock (sm);
}
void
@@ -920,12 +955,10 @@ segment_manager_alloc_queue (fifo_segment_t * segment,
fifo_evt_size = sizeof (session_event_t);
notif_q_size = clib_max (16, props->evt_q_size >> 4);
- /* *INDENT-OFF* */
svm_msg_q_ring_cfg_t rc[SESSION_MQ_N_RINGS] = {
{props->evt_q_size, fifo_evt_size, 0},
{notif_q_size, session_evt_size, 0}
};
- /* *INDENT-ON* */
cfg->consumer_pid = 0;
cfg->n_rings = 2;
cfg->q_nitems = props->evt_q_size;
@@ -984,79 +1017,111 @@ segment_manager_main_init (void)
sm->default_low_watermark = 50;
}
+static u8 *
+format_segment_manager (u8 *s, va_list *args)
+{
+ segment_manager_t *sm = va_arg (*args, segment_manager_t *);
+ int verbose = va_arg (*args, int);
+ app_worker_t *app_wrk;
+ uword max_fifo_size;
+ fifo_segment_t *seg;
+ application_t *app;
+ u8 custom_logic;
+
+ app_wrk = app_worker_get_if_valid (sm->app_wrk_index);
+ app = app_wrk ? application_get (app_wrk->app_index) : 0;
+ custom_logic = (app && (app->cb_fns.fifo_tuning_callback)) ? 1 : 0;
+ max_fifo_size = sm->max_fifo_size;
+
+ s = format (s,
+ "[%u] %v app-wrk: %u segs: %u max-fifo-sz: %U "
+ "wmarks: %u %u %s flags: 0x%x",
+ segment_manager_index (sm), app ? app->name : 0,
+ sm->app_wrk_index, pool_elts (sm->segments), format_memory_size,
+ max_fifo_size, sm->high_watermark, sm->low_watermark,
+ custom_logic ? "custom-tuning" : "no-tuning", sm->flags);
+
+ if (!verbose || !pool_elts (sm->segments))
+ return s;
+
+ s = format (s, "\n\n");
+
+ segment_manager_foreach_segment_w_lock (
+ seg, sm, ({ s = format (s, " *%U", format_fifo_segment, seg, verbose); }));
+
+ return s;
+}
+
static clib_error_t *
segment_manager_show_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
+ unformat_input_t _line_input, *line_input = &_line_input;
segment_manager_main_t *smm = &sm_main;
u8 show_segments = 0, verbose = 0;
- uword max_fifo_size;
segment_manager_t *sm;
- fifo_segment_t *seg;
- app_worker_t *app_wrk;
- application_t *app;
- u8 custom_logic;
+ u32 sm_index = ~0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ {
+ vlib_cli_output (vm, "%d segment managers allocated",
+ pool_elts (smm->segment_managers));
+ return 0;
+ }
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (input, "segments"))
+ if (unformat (line_input, "segments"))
show_segments = 1;
- else if (unformat (input, "verbose"))
+ else if (unformat (line_input, "verbose"))
verbose = 1;
+ else if (unformat (line_input, "index %u", &sm_index))
+ ;
else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
+ {
+ vlib_cli_output (vm, "unknown input [%U]", format_unformat_error,
+ line_input);
+ goto done;
+ }
}
- vlib_cli_output (vm, "%d segment managers allocated",
- pool_elts (smm->segment_managers));
- if (verbose && pool_elts (smm->segment_managers))
+
+ if (!pool_elts (smm->segment_managers))
+ goto done;
+
+ if (sm_index != ~0)
{
- vlib_cli_output (vm, "%-6s%=10s%=10s%=13s%=11s%=11s%=12s",
- "Index", "AppIndex", "Segments", "MaxFifoSize",
- "HighWater", "LowWater", "FifoTuning");
+ sm = segment_manager_get_if_valid (sm_index);
+ if (!sm)
+ {
+ vlib_cli_output (vm, "segment manager %u not allocated", sm_index);
+ goto done;
+ }
+ vlib_cli_output (vm, "%U", format_segment_manager, sm, 1 /* verbose */);
+ goto done;
+ }
- /* *INDENT-OFF* */
+ if (verbose || show_segments)
+ {
pool_foreach (sm, smm->segment_managers) {
- app_wrk = app_worker_get_if_valid (sm->app_wrk_index);
- app = app_wrk ? application_get (app_wrk->app_index) : 0;
- custom_logic = (app && (app->cb_fns.fifo_tuning_callback)) ? 1 : 0;
- max_fifo_size = sm->max_fifo_size;
-
- vlib_cli_output (vm, "%-6d%=10d%=10d%=13U%=11d%=11d%=12s",
- segment_manager_index (sm),
- sm->app_wrk_index, pool_elts (sm->segments),
- format_memory_size, max_fifo_size,
- sm->high_watermark, sm->low_watermark,
- custom_logic ? "custom" : "none");
+ vlib_cli_output (vm, "%U", format_segment_manager, sm,
+ show_segments);
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "\n");
}
- if (show_segments)
- {
- vlib_cli_output (vm, "%U", format_fifo_segment, 0, verbose);
- /* *INDENT-OFF* */
- pool_foreach (sm, smm->segment_managers) {
- segment_manager_foreach_segment_w_lock (seg, sm, ({
- vlib_cli_output (vm, "%U", format_fifo_segment, seg, verbose);
- }));
- }
- /* *INDENT-ON* */
+done:
+
+ unformat_free (line_input);
- }
return 0;
}
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (segment_manager_show_command, static) =
-{
+VLIB_CLI_COMMAND (segment_manager_show_command, static) = {
.path = "show segment-manager",
- .short_help = "show segment-manager [segments][verbose]",
+ .short_help = "show segment-manager [segments][verbose][index <nn>]",
.function = segment_manager_show_fn,
};
-/* *INDENT-ON* */
void
segment_manager_format_sessions (segment_manager_t * sm, int verbose)
@@ -1085,7 +1150,6 @@ segment_manager_format_sessions (segment_manager_t * sm, int verbose)
clib_rwlock_reader_lock (&sm->segments_rwlock);
- /* *INDENT-OFF* */
pool_foreach (fs, sm->segments) {
for (slice_index = 0; slice_index < fs->n_slices; slice_index++)
{
@@ -1117,7 +1181,6 @@ segment_manager_format_sessions (segment_manager_t * sm, int verbose)
vec_free (s);
}
}
- /* *INDENT-ON* */
clib_rwlock_reader_unlock (&sm->segments_rwlock);
}
diff --git a/src/vnet/session/segment_manager.h b/src/vnet/session/segment_manager.h
index 5a3d772ff02..1e99c4605a6 100644
--- a/src/vnet/session/segment_manager.h
+++ b/src/vnet/session/segment_manager.h
@@ -40,6 +40,7 @@ typedef struct _segment_manager_props
u8 high_watermark; /**< memory usage high watermark % */
u8 low_watermark; /**< memory usage low watermark % */
u8 pct_first_alloc; /**< pct of fifo size to alloc */
+ u8 huge_page; /**< use hugepage */
} segment_manager_props_t;
typedef enum seg_manager_flag_
@@ -102,8 +103,23 @@ segment_manager_t *segment_manager_get (u32 index);
segment_manager_t *segment_manager_get_if_valid (u32 index);
u32 segment_manager_index (segment_manager_t * sm);
+/**
+ * Add segment without lock
+ *
+ * @param sm Segment manager
+ * @param segment_size Size of segment to be added
+ * @param notify_app Flag set if app notification requested
+ */
int segment_manager_add_segment (segment_manager_t *sm, uword segment_size,
u8 notify_app);
+
+/**
+ * Add segment with lock
+ *
+ * @param sm Segment manager
+ * @param segment_size Size of segment to be added
+ * @param flags Flags to be set on segment
+ */
int segment_manager_add_segment2 (segment_manager_t *sm, uword segment_size,
u8 flags);
void segment_manager_del_segment (segment_manager_t * sm,
@@ -122,7 +138,6 @@ u64 segment_manager_make_segment_handle (u32 segment_manager_index,
u64 segment_manager_segment_handle (segment_manager_t * sm,
fifo_segment_t * segment);
void segment_manager_segment_reader_unlock (segment_manager_t * sm);
-void segment_manager_segment_writer_unlock (segment_manager_t * sm);
int segment_manager_alloc_session_fifos (segment_manager_t * sm,
u32 thread_index,
@@ -175,7 +190,9 @@ static inline void
segment_manager_parse_segment_handle (u64 segment_handle, u32 * sm_index,
u32 * segment_index)
{
- *sm_index = segment_handle >> 32;
+ /* Upper 8 bits zeroed out as they may be used for cut-through segments.
+ * See @ref ct_alloc_segment */
+ *sm_index = (segment_handle >> 32) & 0xFFFFFF;
*segment_index = segment_handle & 0xFFFFFFFF;
}
diff --git a/src/vnet/session/session.api b/src/vnet/session/session.api
index d2a942fb68b..6affae4112d 100644
--- a/src/vnet/session/session.api
+++ b/src/vnet/session/session.api
@@ -117,38 +117,6 @@ autoreply define app_del_cert_key_pair {
u32 index;
};
-/** \brief Application add TLS certificate
- ### WILL BE DEPRECATED POST 20.01 ###
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param cert_len - certificate length
- @param cert - certificate as a string
-*/
-autoreply define application_tls_cert_add {
- option deprecated="to be removed post 21.06";
- u32 client_index;
- u32 context;
- u32 app_index;
- u16 cert_len;
- u8 cert[cert_len];
-};
-
-/** \brief Application add TLS key
- ### WILL BE DEPRECATED POST 20.01 ###
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param key_len - certificate length
- @param key - PEM encoded key as a string
-*/
-autoreply define application_tls_key_add {
- option deprecated="to be removed post 21.06";
- u32 client_index;
- u32 context;
- u32 app_index;
- u16 key_len;
- u8 key[key_len];
-};
-
/** \brief add/del application worker
@param client_index - opaque cookie to identify the sender
client to vpp direction only
@@ -251,9 +219,49 @@ define app_namespace_add_del {
@param ip6_fib_id - id of ip6 fib that "supports" the namespace. Ignored
if sw_if_index set.
@param namespace_id - namespace id
+ @param sock_name - socket name (path, abstract socket name)
+*/
+define app_namespace_add_del_v4 {
+ option deprecated;
+ u32 client_index;
+ u32 context;
+ u64 secret;
+ bool is_add [default=true];
+ vl_api_interface_index_t sw_if_index [default=0xffffffff];
+ u32 ip4_fib_id;
+ u32 ip6_fib_id;
+ string namespace_id[64];
+ string sock_name[];
+};
+
+/** \brief Reply for app namespace add/del
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param appns_index - app namespace index
+*/
+define app_namespace_add_del_v4_reply
+{
+ u32 context;
+ i32 retval;
+ u32 appns_index;
+};
+
+/** \brief add/del application namespace
+ @param client_index - opaque cookie to identify the sender
+ client to vpp direction only
+ @param context - sender context, to match reply w/ request
+ @param secret - secret shared between app and vpp
+ @param sw_if_index - local interface that "supports" namespace. Set to
+ ~0 if no preference
+ @param ip4_fib_id - id of ip4 fib that "supports" the namespace. Ignored
+ if sw_if_index set.
+ @param ip6_fib_id - id of ip6 fib that "supports" the namespace. Ignored
+ if sw_if_index set.
+ @param namespace_id - namespace id
@param netns - linux net namespace
*/
define app_namespace_add_del_v2 {
+ option deprecated;
u32 client_index;
u32 context;
u64 secret;
@@ -280,6 +288,7 @@ define app_namespace_add_del_v2 {
@param sock_name - socket name (path, abstract socket name)
*/
define app_namespace_add_del_v3 {
+ option deprecated;
u32 client_index;
u32 context;
u64 secret;
@@ -312,6 +321,7 @@ define app_namespace_add_del_reply
*/
define app_namespace_add_del_v2_reply
{
+ option deprecated;
u32 context;
i32 retval;
u32 appns_index;
@@ -319,6 +329,7 @@ define app_namespace_add_del_v2_reply
define app_namespace_add_del_v3_reply
{
+ option deprecated;
u32 context;
i32 retval;
u32 appns_index;
diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c
index f33dbea9a1e..67e7ee39001 100644
--- a/src/vnet/session/session.c
+++ b/src/vnet/session/session.c
@@ -17,10 +17,13 @@
* @brief Session and session manager
*/
+#include <vnet/plugin/plugin.h>
#include <vnet/session/session.h>
#include <vnet/session/application.h>
#include <vnet/dpo/load_balance.h>
#include <vnet/fib/ip4_fib.h>
+#include <vlib/stats/stats.h>
+#include <vlib/dma/dma.h>
session_main_t session_main;
@@ -36,8 +39,7 @@ session_send_evt_to_thread (void *data, void *args, u32 thread_index,
mq = wrk->vpp_event_queue;
if (PREDICT_FALSE (svm_msg_q_lock (mq)))
return -1;
- if (PREDICT_FALSE (svm_msg_q_is_full (mq)
- || svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING)))
+ if (PREDICT_FALSE (svm_msg_q_or_ring_is_full (mq, SESSION_MQ_IO_EVT_RING)))
{
svm_msg_q_unlock (mq);
return -2;
@@ -58,7 +60,7 @@ session_send_evt_to_thread (void *data, void *args, u32 thread_index,
evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg);
evt->session_index = *(u32 *) data;
break;
- case SESSION_IO_EVT_BUILTIN_TX:
+ case SESSION_IO_EVT_TX_MAIN:
case SESSION_CTRL_EVT_CLOSE:
case SESSION_CTRL_EVT_RESET:
msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
@@ -95,6 +97,13 @@ session_send_io_evt_to_thread_custom (void *data, u32 thread_index,
}
int
+session_program_tx_io_evt (session_handle_tu_t sh, session_evt_type_t evt_type)
+{
+ return session_send_evt_to_thread ((void *) &sh.session_index, 0,
+ (u32) sh.thread_index, evt_type);
+}
+
+int
session_send_ctrl_evt_to_thread (session_t * s, session_evt_type_t evt_type)
{
/* only events supported are disconnect, shutdown and reset */
@@ -202,39 +211,25 @@ session_alloc (u32 thread_index)
{
session_worker_t *wrk = &session_main.wrk[thread_index];
session_t *s;
- u8 will_expand = 0;
- pool_get_aligned_will_expand (wrk->sessions, will_expand,
- CLIB_CACHE_LINE_BYTES);
- /* If we have peekers, let them finish */
- if (PREDICT_FALSE (will_expand && vlib_num_workers ()))
- {
- clib_rwlock_writer_lock (&wrk->peekers_rw_locks);
- pool_get_aligned (wrk->sessions, s, CLIB_CACHE_LINE_BYTES);
- clib_rwlock_writer_unlock (&wrk->peekers_rw_locks);
- }
- else
- {
- pool_get_aligned (wrk->sessions, s, CLIB_CACHE_LINE_BYTES);
- }
+
+ pool_get_aligned_safe (wrk->sessions, s, CLIB_CACHE_LINE_BYTES);
clib_memset (s, 0, sizeof (*s));
s->session_index = s - wrk->sessions;
s->thread_index = thread_index;
- s->app_index = APP_INVALID_INDEX;
+ s->al_index = APP_INVALID_INDEX;
+
return s;
}
void
session_free (session_t * s)
{
- if (CLIB_DEBUG)
- {
- u8 thread_index = s->thread_index;
- clib_memset (s, 0xFA, sizeof (*s));
- pool_put (session_main.wrk[thread_index].sessions, s);
- return;
- }
+ session_worker_t *wrk = &session_main.wrk[s->thread_index];
+
SESSION_EVT (SESSION_EVT_FREE, s);
- pool_put (session_main.wrk[s->thread_index].sessions, s);
+ if (CLIB_DEBUG)
+ clib_memset (s, 0xFA, sizeof (*s));
+ pool_put (wrk->sessions, s);
}
u8
@@ -252,35 +247,48 @@ session_is_valid (u32 si, u8 thread_index)
|| s->session_state <= SESSION_STATE_LISTENING)
return 1;
- if (s->session_state == SESSION_STATE_CONNECTING &&
+ if ((s->session_state == SESSION_STATE_CONNECTING ||
+ s->session_state == SESSION_STATE_TRANSPORT_CLOSED) &&
(s->flags & SESSION_F_HALF_OPEN))
return 1;
tc = session_get_transport (s);
- if (s->connection_index != tc->c_index
- || s->thread_index != tc->thread_index || tc->s_index != si)
+ if (s->connection_index != tc->c_index ||
+ s->thread_index != tc->thread_index || tc->s_index != si)
return 0;
return 1;
}
+void
+session_cleanup (session_t *s)
+{
+ segment_manager_dealloc_fifos (s->rx_fifo, s->tx_fifo);
+ session_free (s);
+}
+
static void
session_cleanup_notify (session_t * s, session_cleanup_ntf_t ntf)
{
app_worker_t *app_wrk;
app_wrk = app_worker_get_if_valid (s->app_wrk_index);
- if (!app_wrk)
- return;
+ if (PREDICT_FALSE (!app_wrk))
+ {
+ if (ntf == SESSION_CLEANUP_TRANSPORT)
+ return;
+
+ session_cleanup (s);
+ return;
+ }
app_worker_cleanup_notify (app_wrk, s, ntf);
}
void
-session_free_w_fifos (session_t * s)
+session_program_cleanup (session_t *s)
{
+ ASSERT (s->session_state == SESSION_STATE_TRANSPORT_DELETED);
session_cleanup_notify (s, SESSION_CLEANUP_SESSION);
- segment_manager_dealloc_fifos (s->rx_fifo, s->tx_fifo);
- session_free (s);
}
/**
@@ -297,7 +305,7 @@ session_delete (session_t * s)
if ((rv = session_lookup_del_session (s)))
clib_warning ("session %u hash delete rv %d", s->session_index, rv);
- session_free_w_fifos (s);
+ session_program_cleanup (s);
}
void
@@ -312,16 +320,27 @@ session_cleanup_half_open (session_handle_t ho_handle)
* session should be removed. */
if (ho->connection_index == ~0)
{
- ho->session_state = SESSION_STATE_CLOSED;
+ session_set_state (ho, SESSION_STATE_CLOSED);
return;
}
/* Migrated transports are no longer half-opens */
transport_cleanup (session_get_transport_proto (ho),
- ho->connection_index, ho->app_index /* overloaded */);
+ ho->connection_index, ho->al_index /* overloaded */);
+ }
+ else if (ho->session_state != SESSION_STATE_TRANSPORT_DELETED)
+ {
+ /* Cleanup half-open session lookup table if need be */
+ if (ho->session_state != SESSION_STATE_TRANSPORT_CLOSED)
+ {
+ transport_connection_t *tc;
+ tc = transport_get_half_open (session_get_transport_proto (ho),
+ ho->connection_index);
+ if (tc && !(tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP))
+ session_lookup_del_half_open (tc);
+ }
+ transport_cleanup_half_open (session_get_transport_proto (ho),
+ ho->connection_index);
}
- else
- transport_cleanup_half_open (session_get_transport_proto (ho),
- ho->connection_index);
session_free (ho);
}
@@ -330,10 +349,12 @@ session_half_open_free (session_t *ho)
{
app_worker_t *app_wrk;
- ASSERT (vlib_get_thread_index () <= 1);
- app_wrk = app_worker_get (ho->app_wrk_index);
- app_worker_del_half_open (app_wrk, ho);
- session_free (ho);
+ ASSERT (vlib_get_thread_index () <= transport_cl_thread ());
+ app_wrk = app_worker_get_if_valid (ho->app_wrk_index);
+ if (app_wrk)
+ app_worker_del_half_open (app_wrk, ho);
+ else
+ session_free (ho);
}
static void
@@ -346,16 +367,26 @@ session_half_open_free_rpc (void *args)
void
session_half_open_delete_notify (transport_connection_t *tc)
{
+ session_t *ho = ho_session_get (tc->s_index);
+
+ /* Cleanup half-open lookup table if need be */
+ if (ho->session_state != SESSION_STATE_TRANSPORT_CLOSED)
+ {
+ if (!(tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP))
+ session_lookup_del_half_open (tc);
+ }
+ session_set_state (ho, SESSION_STATE_TRANSPORT_DELETED);
+
/* Notification from ctrl thread accepted without rpc */
- if (!tc->thread_index)
+ if (tc->thread_index == transport_cl_thread ())
{
- session_half_open_free (ho_session_get (tc->s_index));
+ session_half_open_free (ho);
}
else
{
void *args = uword_to_pointer ((uword) tc->s_index, void *);
- session_send_rpc_evt_to_thread_force (0, session_half_open_free_rpc,
- args);
+ session_send_rpc_evt_to_thread_force (transport_cl_thread (),
+ session_half_open_free_rpc, args);
}
}
@@ -364,6 +395,9 @@ session_half_open_migrate_notify (transport_connection_t *tc)
{
session_t *ho;
+ /* Support half-open migrations only for transports with no lookup */
+ ASSERT (tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP);
+
ho = ho_session_get (tc->s_index);
ho->flags |= SESSION_F_IS_MIGRATING;
ho->connection_index = ~0;
@@ -383,8 +417,8 @@ session_half_open_migrated_notify (transport_connection_t *tc)
return -1;
}
ho->connection_index = tc->c_index;
- /* Overload app index for half-open with new thread */
- ho->app_index = tc->thread_index;
+ /* Overload al_index for half-open with new thread */
+ ho->al_index = tc->thread_index;
return 0;
}
@@ -399,7 +433,7 @@ session_alloc_for_connection (transport_connection_t * tc)
s = session_alloc (thread_index);
s->session_type = session_type_from_proto_and_ip (tc->proto, tc->is_ip4);
- s->session_state = SESSION_STATE_CLOSED;
+ session_set_state (s, SESSION_STATE_CLOSED);
/* Attach transport to session and vice versa */
s->connection_index = tc->c_index;
@@ -546,10 +580,162 @@ session_fifo_tuning (session_t * s, svm_fifo_t * f,
}
}
+void
+session_wrk_program_app_wrk_evts (session_worker_t *wrk, u32 app_wrk_index)
+{
+ u8 need_interrupt;
+
+ ASSERT ((wrk - session_main.wrk) == vlib_get_thread_index ());
+ need_interrupt = clib_bitmap_is_zero (wrk->app_wrks_pending_ntf);
+ wrk->app_wrks_pending_ntf =
+ clib_bitmap_set (wrk->app_wrks_pending_ntf, app_wrk_index, 1);
+
+ if (need_interrupt)
+ vlib_node_set_interrupt_pending (wrk->vm, session_input_node.index);
+}
+
+always_inline void
+session_program_io_event (app_worker_t *app_wrk, session_t *s,
+ session_evt_type_t et, u8 is_cl)
+{
+ if (is_cl)
+ {
+ /* Special events for connectionless sessions */
+ et += SESSION_IO_EVT_BUILTIN_RX - SESSION_IO_EVT_RX;
+
+ ASSERT (s->thread_index == 0 || et == SESSION_IO_EVT_TX_MAIN);
+ session_event_t evt = {
+ .event_type = et,
+ .session_handle = session_handle (s),
+ };
+
+ app_worker_add_event_custom (app_wrk, vlib_get_thread_index (), &evt);
+ }
+ else
+ {
+ app_worker_add_event (app_wrk, s, et);
+ }
+}
+
+static inline int
+session_notify_subscribers (u32 app_index, session_t *s, svm_fifo_t *f,
+ session_evt_type_t evt_type)
+{
+ app_worker_t *app_wrk;
+ application_t *app;
+ u8 is_cl;
+ int i;
+
+ app = application_get (app_index);
+ if (!app)
+ return -1;
+
+ is_cl = s->thread_index != vlib_get_thread_index ();
+ for (i = 0; i < f->shr->n_subscribers; i++)
+ {
+ app_wrk = application_get_worker (app, f->shr->subscribers[i]);
+ if (!app_wrk)
+ continue;
+ session_program_io_event (app_wrk, s, evt_type, is_cl ? 1 : 0);
+ }
+
+ return 0;
+}
+
+always_inline int
+session_enqueue_notify_inline (session_t *s, u8 is_cl)
+{
+ app_worker_t *app_wrk;
+
+ app_wrk = app_worker_get_if_valid (s->app_wrk_index);
+ if (PREDICT_FALSE (!app_wrk))
+ return -1;
+
+ session_program_io_event (app_wrk, s, SESSION_IO_EVT_RX, is_cl);
+
+ if (PREDICT_FALSE (svm_fifo_n_subscribers (s->rx_fifo)))
+ return session_notify_subscribers (app_wrk->app_index, s, s->rx_fifo,
+ SESSION_IO_EVT_RX);
+
+ return 0;
+}
+
+int
+session_enqueue_notify (session_t *s)
+{
+ return session_enqueue_notify_inline (s, 0 /* is_cl */);
+}
+
+int
+session_enqueue_notify_cl (session_t *s)
+{
+ return session_enqueue_notify_inline (s, 1 /* is_cl */);
+}
+
+int
+session_dequeue_notify (session_t *s)
+{
+ app_worker_t *app_wrk;
+ u8 is_cl;
+
+ /* Unset as soon as event is requested */
+ svm_fifo_clear_deq_ntf (s->tx_fifo);
+
+ app_wrk = app_worker_get_if_valid (s->app_wrk_index);
+ if (PREDICT_FALSE (!app_wrk))
+ return -1;
+
+ is_cl = s->session_state == SESSION_STATE_LISTENING ||
+ s->session_state == SESSION_STATE_OPENED;
+ session_program_io_event (app_wrk, s, SESSION_IO_EVT_TX, is_cl ? 1 : 0);
+
+ if (PREDICT_FALSE (svm_fifo_n_subscribers (s->tx_fifo)))
+ return session_notify_subscribers (app_wrk->app_index, s, s->tx_fifo,
+ SESSION_IO_EVT_TX);
+
+ return 0;
+}
+
+/**
+ * Flushes queue of sessions that are to be notified of new data
+ * enqueued events.
+ *
+ * @param transport_proto transport protocol for which queue to be flushed
+ * @param thread_index Thread index for which the flush is to be performed.
+ * @return 0 on success or a positive number indicating the number of
+ * failures due to API queue being full.
+ */
+void
+session_main_flush_enqueue_events (transport_proto_t transport_proto,
+ u32 thread_index)
+{
+ session_worker_t *wrk = session_main_get_worker (thread_index);
+ session_handle_t *handles;
+ session_t *s;
+ u32 i, is_cl;
+
+ handles = wrk->session_to_enqueue[transport_proto];
+
+ for (i = 0; i < vec_len (handles); i++)
+ {
+ s = session_get_from_handle (handles[i]);
+ session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED,
+ 0 /* TODO/not needed */);
+ is_cl =
+ s->thread_index != thread_index || (s->flags & SESSION_F_IS_CLESS);
+ if (!is_cl)
+ session_enqueue_notify_inline (s, 0);
+ else
+ session_enqueue_notify_inline (s, 1);
+ }
+
+ vec_reset_length (handles);
+ wrk->session_to_enqueue[transport_proto] = handles;
+}
+
/*
- * Enqueue data for delivery to session peer. Does not notify peer of enqueue
- * event but on request can queue notification events for later delivery by
- * calling stream_server_flush_enqueue_events().
+ * Enqueue data for delivery to app. If requested, it queues app notification
+ * event for later delivery.
*
* @param tc Transport connection which is to be enqueued data
* @param b Buffer to be enqueued
@@ -598,15 +784,14 @@ session_enqueue_stream_connection (transport_connection_t * tc,
if (queue_event)
{
- /* Queue RX event on this fifo. Eventually these will need to be flushed
- * by calling stream_server_flush_enqueue_events () */
- session_worker_t *wrk;
-
- wrk = session_main_get_worker (s->thread_index);
+ /* Queue RX event on this fifo. Eventually these will need to be
+ * flushed by calling @ref session_main_flush_enqueue_events () */
if (!(s->flags & SESSION_F_RX_EVT))
{
+ session_worker_t *wrk = session_main_get_worker (s->thread_index);
+ ASSERT (s->thread_index == vlib_get_thread_index ());
s->flags |= SESSION_F_RX_EVT;
- vec_add1 (wrk->session_to_enqueue[tc->proto], s->session_index);
+ vec_add1 (wrk->session_to_enqueue[tc->proto], session_handle (s));
}
session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0);
@@ -615,10 +800,11 @@ session_enqueue_stream_connection (transport_connection_t * tc,
return enqueued;
}
-int
-session_enqueue_dgram_connection (session_t * s,
- session_dgram_hdr_t * hdr,
- vlib_buffer_t * b, u8 proto, u8 queue_event)
+always_inline int
+session_enqueue_dgram_connection_inline (session_t *s,
+ session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto,
+ u8 queue_event, u32 is_cl)
{
int rv;
@@ -627,12 +813,10 @@ session_enqueue_dgram_connection (session_t * s,
if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT)))
{
- /* *INDENT-OFF* */
svm_fifo_seg_t segs[2] = {
{ (u8 *) hdr, sizeof (*hdr) },
{ vlib_buffer_get_current (b), b->current_length }
};
- /* *INDENT-ON* */
rv = svm_fifo_enqueue_segments (s->rx_fifo, segs, 2,
0 /* allow_partial */ );
@@ -664,15 +848,16 @@ session_enqueue_dgram_connection (session_t * s,
if (queue_event && rv > 0)
{
- /* Queue RX event on this fifo. Eventually these will need to be flushed
- * by calling stream_server_flush_enqueue_events () */
- session_worker_t *wrk;
-
- wrk = session_main_get_worker (s->thread_index);
+ /* Queue RX event on this fifo. Eventually these will need to be
+ * flushed by calling @ref session_main_flush_enqueue_events () */
if (!(s->flags & SESSION_F_RX_EVT))
{
+ u32 thread_index =
+ is_cl ? vlib_get_thread_index () : s->thread_index;
+ session_worker_t *wrk = session_main_get_worker (thread_index);
+ ASSERT (s->thread_index == vlib_get_thread_index () || is_cl);
s->flags |= SESSION_F_RX_EVT;
- vec_add1 (wrk->session_to_enqueue[proto], s->session_index);
+ vec_add1 (wrk->session_to_enqueue[proto], session_handle (s));
}
session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0);
@@ -681,6 +866,34 @@ session_enqueue_dgram_connection (session_t * s,
}
int
+session_enqueue_dgram_connection (session_t *s, session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto, u8 queue_event)
+{
+ return session_enqueue_dgram_connection_inline (s, hdr, b, proto,
+ queue_event, 0 /* is_cl */);
+}
+
+int
+session_enqueue_dgram_connection2 (session_t *s, session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto, u8 queue_event)
+{
+ return session_enqueue_dgram_connection_inline (s, hdr, b, proto,
+ queue_event, 1 /* is_cl */);
+}
+
+int
+session_enqueue_dgram_connection_cl (session_t *s, session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto,
+ u8 queue_event)
+{
+ session_t *awls;
+
+ awls = app_listener_select_wrk_cl_session (s, hdr);
+ return session_enqueue_dgram_connection_inline (awls, hdr, b, proto,
+ queue_event, 1 /* is_cl */);
+}
+
+int
session_tx_fifo_peek_bytes (transport_connection_t * tc, u8 * buffer,
u32 offset, u32 max_bytes)
{
@@ -703,187 +916,6 @@ session_tx_fifo_dequeue_drop (transport_connection_t * tc, u32 max_bytes)
return rv;
}
-static inline int
-session_notify_subscribers (u32 app_index, session_t * s,
- svm_fifo_t * f, session_evt_type_t evt_type)
-{
- app_worker_t *app_wrk;
- application_t *app;
- int i;
-
- app = application_get (app_index);
- if (!app)
- return -1;
-
- for (i = 0; i < f->shr->n_subscribers; i++)
- {
- app_wrk = application_get_worker (app, f->shr->subscribers[i]);
- if (!app_wrk)
- continue;
- if (app_worker_lock_and_send_event (app_wrk, s, evt_type))
- return -1;
- }
-
- return 0;
-}
-
-/**
- * Notify session peer that new data has been enqueued.
- *
- * @param s Stream session for which the event is to be generated.
- * @param lock Flag to indicate if call should lock message queue.
- *
- * @return 0 on success or negative number if failed to send notification.
- */
-static inline int
-session_enqueue_notify_inline (session_t * s)
-{
- app_worker_t *app_wrk;
- u32 session_index;
- u8 n_subscribers;
-
- session_index = s->session_index;
- n_subscribers = svm_fifo_n_subscribers (s->rx_fifo);
-
- app_wrk = app_worker_get_if_valid (s->app_wrk_index);
- if (PREDICT_FALSE (!app_wrk))
- {
- SESSION_DBG ("invalid s->app_index = %d", s->app_wrk_index);
- return 0;
- }
-
- SESSION_EVT (SESSION_EVT_ENQ, s, svm_fifo_max_dequeue_prod (s->rx_fifo));
-
- s->flags &= ~SESSION_F_RX_EVT;
-
- /* Application didn't confirm accept yet */
- if (PREDICT_FALSE (s->session_state == SESSION_STATE_ACCEPTING))
- return 0;
-
- if (PREDICT_FALSE (app_worker_lock_and_send_event (app_wrk, s,
- SESSION_IO_EVT_RX)))
- return -1;
-
- if (PREDICT_FALSE (n_subscribers))
- {
- s = session_get (session_index, vlib_get_thread_index ());
- return session_notify_subscribers (app_wrk->app_index, s,
- s->rx_fifo, SESSION_IO_EVT_RX);
- }
-
- return 0;
-}
-
-int
-session_enqueue_notify (session_t * s)
-{
- return session_enqueue_notify_inline (s);
-}
-
-static void
-session_enqueue_notify_rpc (void *arg)
-{
- u32 session_index = pointer_to_uword (arg);
- session_t *s;
-
- s = session_get_if_valid (session_index, vlib_get_thread_index ());
- if (!s)
- return;
-
- session_enqueue_notify (s);
-}
-
-/**
- * Like session_enqueue_notify, but can be called from a thread that does not
- * own the session.
- */
-void
-session_enqueue_notify_thread (session_handle_t sh)
-{
- u32 thread_index = session_thread_from_handle (sh);
- u32 session_index = session_index_from_handle (sh);
-
- /*
- * Pass session index (u32) as opposed to handle (u64) in case pointers
- * are not 64-bit.
- */
- session_send_rpc_evt_to_thread (thread_index,
- session_enqueue_notify_rpc,
- uword_to_pointer (session_index, void *));
-}
-
-int
-session_dequeue_notify (session_t * s)
-{
- app_worker_t *app_wrk;
-
- svm_fifo_clear_deq_ntf (s->tx_fifo);
-
- app_wrk = app_worker_get_if_valid (s->app_wrk_index);
- if (PREDICT_FALSE (!app_wrk))
- return -1;
-
- if (PREDICT_FALSE (app_worker_lock_and_send_event (app_wrk, s,
- SESSION_IO_EVT_TX)))
- return -1;
-
- if (PREDICT_FALSE (s->tx_fifo->shr->n_subscribers))
- return session_notify_subscribers (app_wrk->app_index, s,
- s->tx_fifo, SESSION_IO_EVT_TX);
-
- return 0;
-}
-
-/**
- * Flushes queue of sessions that are to be notified of new data
- * enqueued events.
- *
- * @param thread_index Thread index for which the flush is to be performed.
- * @return 0 on success or a positive number indicating the number of
- * failures due to API queue being full.
- */
-int
-session_main_flush_enqueue_events (u8 transport_proto, u32 thread_index)
-{
- session_worker_t *wrk = session_main_get_worker (thread_index);
- session_t *s;
- int i, errors = 0;
- u32 *indices;
-
- indices = wrk->session_to_enqueue[transport_proto];
-
- for (i = 0; i < vec_len (indices); i++)
- {
- s = session_get_if_valid (indices[i], thread_index);
- if (PREDICT_FALSE (!s))
- {
- errors++;
- continue;
- }
-
- session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED,
- 0 /* TODO/not needed */ );
-
- if (PREDICT_FALSE (session_enqueue_notify_inline (s)))
- errors++;
- }
-
- vec_reset_length (indices);
- wrk->session_to_enqueue[transport_proto] = indices;
-
- return errors;
-}
-
-int
-session_main_flush_all_enqueue_events (u8 transport_proto)
-{
- vlib_thread_main_t *vtm = vlib_get_thread_main ();
- int i, errors = 0;
- for (i = 0; i < 1 + vtm->n_threads; i++)
- errors += session_main_flush_enqueue_events (transport_proto, i);
- return errors;
-}
-
int
session_stream_connect_notify (transport_connection_t * tc,
session_error_t err)
@@ -898,6 +930,7 @@ session_stream_connect_notify (transport_connection_t * tc,
session_lookup_del_half_open (tc);
ho = ho_session_get (tc->s_index);
+ session_set_state (ho, SESSION_STATE_TRANSPORT_CLOSED);
opaque = ho->opaque;
app_wrk = app_worker_get_if_valid (ho->app_wrk_index);
if (!app_wrk)
@@ -907,8 +940,9 @@ session_stream_connect_notify (transport_connection_t * tc,
return app_worker_connect_notify (app_wrk, s, err, opaque);
s = session_alloc_for_connection (tc);
- s->session_state = SESSION_STATE_CONNECTING;
+ session_set_state (s, SESSION_STATE_CONNECTING);
s->app_wrk_index = app_wrk->wrk_index;
+ s->opaque = opaque;
new_si = s->session_index;
new_ti = s->thread_index;
@@ -920,7 +954,7 @@ session_stream_connect_notify (transport_connection_t * tc,
}
s = session_get (new_si, new_ti);
- s->session_state = SESSION_STATE_READY;
+ session_set_state (s, SESSION_STATE_READY);
session_lookup_add_connection (tc, session_handle (s));
if (app_worker_connect_notify (app_wrk, s, SESSION_E_NONE, opaque))
@@ -937,17 +971,19 @@ session_stream_connect_notify (transport_connection_t * tc,
}
static void
-session_switch_pool_reply (void *arg)
+session_switch_pool_closed_rpc (void *arg)
{
- u32 session_index = pointer_to_uword (arg);
+ session_handle_t sh;
session_t *s;
- s = session_get_if_valid (session_index, vlib_get_thread_index ());
+ sh = pointer_to_uword (arg);
+ s = session_get_from_handle_if_valid (sh);
if (!s)
return;
- /* Notify app that it has data on the new session */
- session_enqueue_notify (s);
+ transport_cleanup (session_get_transport_proto (s), s->connection_index,
+ s->thread_index);
+ session_cleanup (s);
}
typedef struct _session_switch_pool_args
@@ -965,39 +1001,40 @@ static void
session_switch_pool (void *cb_args)
{
session_switch_pool_args_t *args = (session_switch_pool_args_t *) cb_args;
- session_handle_t new_sh;
+ session_handle_t sh, new_sh;
segment_manager_t *sm;
app_worker_t *app_wrk;
session_t *s;
- void *rargs;
ASSERT (args->thread_index == vlib_get_thread_index ());
s = session_get (args->session_index, args->thread_index);
- transport_cleanup (session_get_transport_proto (s), s->connection_index,
- s->thread_index);
+ app_wrk = app_worker_get_if_valid (s->app_wrk_index);
+ if (!app_wrk)
+ goto app_closed;
- new_sh = session_make_handle (args->new_session_index,
- args->new_thread_index);
+ /* Cleanup fifo segment slice state for fifos */
+ sm = app_worker_get_connect_segment_manager (app_wrk);
+ segment_manager_detach_fifo (sm, &s->rx_fifo);
+ segment_manager_detach_fifo (sm, &s->tx_fifo);
- app_wrk = app_worker_get_if_valid (s->app_wrk_index);
- if (app_wrk)
- {
- /* Cleanup fifo segment slice state for fifos */
- sm = app_worker_get_connect_segment_manager (app_wrk);
- segment_manager_detach_fifo (sm, &s->rx_fifo);
- segment_manager_detach_fifo (sm, &s->tx_fifo);
+ /* Check if session closed during migration */
+ if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING)
+ goto app_closed;
- /* Notify app, using old session, about the migration event */
- app_worker_migrate_notify (app_wrk, s, new_sh);
- }
+ new_sh =
+ session_make_handle (args->new_session_index, args->new_thread_index);
+ app_worker_migrate_notify (app_wrk, s, new_sh);
- /* Trigger app read and fifo updates on the new thread */
- rargs = uword_to_pointer (args->new_session_index, void *);
- session_send_rpc_evt_to_thread (args->new_thread_index,
- session_switch_pool_reply, rargs);
+ clib_mem_free (cb_args);
+ return;
- session_free (s);
+app_closed:
+ /* Session closed during migration. Clean everything up */
+ sh = session_handle (s);
+ session_send_rpc_evt_to_thread (args->new_thread_index,
+ session_switch_pool_closed_rpc,
+ uword_to_pointer (sh, void *));
clib_mem_free (cb_args);
}
@@ -1018,7 +1055,7 @@ session_dgram_connect_notify (transport_connection_t * tc,
*/
new_s = session_clone_safe (tc->s_index, old_thread_index);
new_s->connection_index = tc->c_index;
- new_s->session_state = SESSION_STATE_READY;
+ session_set_state (new_s, SESSION_STATE_READY);
new_s->flags |= SESSION_F_IS_MIGRATING;
if (!(tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP))
@@ -1067,7 +1104,16 @@ session_transport_closing_notify (transport_connection_t * tc)
s = session_get (tc->s_index, tc->thread_index);
if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING)
return;
- s->session_state = SESSION_STATE_TRANSPORT_CLOSING;
+
+ /* Wait for reply from app before sending notification as the
+ * accept might be rejected */
+ if (s->session_state == SESSION_STATE_ACCEPTING)
+ {
+ session_set_state (s, SESSION_STATE_TRANSPORT_CLOSING);
+ return;
+ }
+
+ session_set_state (s, SESSION_STATE_TRANSPORT_CLOSING);
app_wrk = app_worker_get (s->app_wrk_index);
app_worker_close_notify (app_wrk, s);
}
@@ -1108,7 +1154,7 @@ session_transport_delete_notify (transport_connection_t * tc)
* because transport will soon be closed and closed sessions
* are assumed to have been removed from the lookup table */
session_lookup_del_session (s);
- s->session_state = SESSION_STATE_TRANSPORT_DELETED;
+ session_set_state (s, SESSION_STATE_TRANSPORT_DELETED);
session_cleanup_notify (s, SESSION_CLEANUP_TRANSPORT);
svm_fifo_dequeue_drop_all (s->tx_fifo);
break;
@@ -1119,7 +1165,7 @@ session_transport_delete_notify (transport_connection_t * tc)
* session is just removed because both transport and app have
* confirmed the close*/
session_lookup_del_session (s);
- s->session_state = SESSION_STATE_TRANSPORT_DELETED;
+ session_set_state (s, SESSION_STATE_TRANSPORT_DELETED);
session_cleanup_notify (s, SESSION_CLEANUP_TRANSPORT);
svm_fifo_dequeue_drop_all (s->tx_fifo);
session_program_transport_ctrl_evt (s, SESSION_CTRL_EVT_CLOSE);
@@ -1128,6 +1174,7 @@ session_transport_delete_notify (transport_connection_t * tc)
break;
case SESSION_STATE_CLOSED:
session_cleanup_notify (s, SESSION_CLEANUP_TRANSPORT);
+ session_set_state (s, SESSION_STATE_TRANSPORT_DELETED);
session_delete (s);
break;
default:
@@ -1155,6 +1202,9 @@ session_transport_closed_notify (transport_connection_t * tc)
if (!(s = session_get_if_valid (tc->s_index, tc->thread_index)))
return;
+ if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSED)
+ return;
+
/* Transport thinks that app requested close but it actually didn't.
* Can happen for tcp:
* 1)if fin and rst are received in close succession.
@@ -1163,17 +1213,15 @@ session_transport_closed_notify (transport_connection_t * tc)
{
session_transport_closing_notify (tc);
svm_fifo_dequeue_drop_all (s->tx_fifo);
- s->session_state = SESSION_STATE_TRANSPORT_CLOSED;
+ session_set_state (s, SESSION_STATE_TRANSPORT_CLOSED);
}
/* If app close has not been received or has not yet resulted in
* a transport close, only mark the session transport as closed */
else if (s->session_state <= SESSION_STATE_CLOSING)
- {
- s->session_state = SESSION_STATE_TRANSPORT_CLOSED;
- }
+ session_set_state (s, SESSION_STATE_TRANSPORT_CLOSED);
/* If app also closed, switch to closed */
else if (s->session_state == SESSION_STATE_APP_CLOSED)
- s->session_state = SESSION_STATE_CLOSED;
+ session_set_state (s, SESSION_STATE_CLOSED);
app_wrk = app_worker_get_if_valid (s->app_wrk_index);
if (app_wrk)
@@ -1193,7 +1241,12 @@ session_transport_reset_notify (transport_connection_t * tc)
svm_fifo_dequeue_drop_all (s->tx_fifo);
if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING)
return;
- s->session_state = SESSION_STATE_TRANSPORT_CLOSING;
+ if (s->session_state == SESSION_STATE_ACCEPTING)
+ {
+ session_set_state (s, SESSION_STATE_TRANSPORT_CLOSING);
+ return;
+ }
+ session_set_state (s, SESSION_STATE_TRANSPORT_CLOSING);
app_wrk = app_worker_get (s->app_wrk_index);
app_worker_reset_notify (app_wrk, s);
}
@@ -1210,12 +1263,12 @@ session_stream_accept_notify (transport_connection_t * tc)
return -1;
if (s->session_state != SESSION_STATE_CREATED)
return 0;
- s->session_state = SESSION_STATE_ACCEPTING;
+ session_set_state (s, SESSION_STATE_ACCEPTING);
if (app_worker_accept_notify (app_wrk, s))
{
/* On transport delete, no notifications should be sent. Unless, the
* accept is retried and successful. */
- s->session_state = SESSION_STATE_CREATED;
+ session_set_state (s, SESSION_STATE_CREATED);
return -1;
}
return 0;
@@ -1233,7 +1286,7 @@ session_stream_accept (transport_connection_t * tc, u32 listener_index,
s = session_alloc_for_connection (tc);
s->listener_handle = ((u64) thread_index << 32) | (u64) listener_index;
- s->session_state = SESSION_STATE_CREATED;
+ session_set_state (s, SESSION_STATE_CREATED);
if ((rv = app_worker_init_accepted (s)))
{
@@ -1277,6 +1330,7 @@ session_dgram_accept (transport_connection_t * tc, u32 listener_index,
}
session_lookup_add_connection (tc, session_handle (s));
+ session_set_state (s, SESSION_STATE_ACCEPTING);
app_wrk = app_worker_get (s->app_wrk_index);
if ((rv = app_worker_accept_notify (app_wrk, s)))
@@ -1314,7 +1368,10 @@ session_open_cl (session_endpoint_cfg_t *rmt, session_handle_t *rsh)
app_wrk = app_worker_get (rmt->app_wrk_index);
s = session_alloc_for_connection (tc);
s->app_wrk_index = app_wrk->wrk_index;
- s->session_state = SESSION_STATE_OPENED;
+ s->opaque = rmt->opaque;
+ session_set_state (s, SESSION_STATE_OPENED);
+ if (transport_connection_is_cless (tc))
+ s->flags |= SESSION_F_IS_CLESS;
if (app_worker_init_connected (app_wrk, s))
{
session_free (s);
@@ -1382,13 +1439,11 @@ session_open_app (session_endpoint_cfg_t *rmt, session_handle_t *rsh)
typedef int (*session_open_service_fn) (session_endpoint_cfg_t *,
session_handle_t *);
-/* *INDENT-OFF* */
static session_open_service_fn session_open_srv_fns[TRANSPORT_N_SERVICES] = {
session_open_vc,
session_open_cl,
session_open_app,
};
-/* *INDENT-ON* */
/**
* Ask transport to open connection to remote transport endpoint.
@@ -1422,12 +1477,12 @@ session_open (session_endpoint_cfg_t *rmt, session_handle_t *rsh)
int
session_listen (session_t * ls, session_endpoint_cfg_t * sep)
{
- transport_endpoint_t *tep;
+ transport_endpoint_cfg_t *tep;
int tc_index;
u32 s_index;
/* Transport bind/listen */
- tep = session_endpoint_to_transport (sep);
+ tep = session_endpoint_to_transport_cfg (sep);
s_index = ls->session_index;
tc_index = transport_start_listen (session_get_transport_proto (ls),
s_index, tep);
@@ -1439,6 +1494,9 @@ session_listen (session_t * ls, session_endpoint_cfg_t * sep)
* worker because local tables (for ct sessions) are not backed by a fib */
ls = listen_session_get (s_index);
ls->connection_index = tc_index;
+ ls->opaque = sep->opaque;
+ if (transport_connection_is_cless (session_get_transport (ls)))
+ ls->flags |= SESSION_F_IS_CLESS;
return 0;
}
@@ -1493,9 +1551,15 @@ session_half_close (session_t *s)
void
session_close (session_t * s)
{
- if (!s)
+ if (!s || (s->flags & SESSION_F_APP_CLOSED))
return;
+ /* Transports can close and delete their state independent of app closes
+ * and transport initiated state transitions can hide app closes. Instead
+ * of extending the state machine to support separate tracking of app and
+ * transport initiated closes, use a flag. */
+ s->flags |= SESSION_F_APP_CLOSED;
+
if (s->session_state >= SESSION_STATE_CLOSING)
{
/* Session will only be removed once both app and transport
@@ -1506,7 +1570,12 @@ session_close (session_t * s)
return;
}
- s->session_state = SESSION_STATE_CLOSING;
+ /* App closed so stop propagating dequeue notifications.
+ * App might disconnect session before connected, in this case,
+ * tx_fifo may not be setup yet, so clear only it's inited. */
+ if (s->tx_fifo)
+ svm_fifo_clear_deq_ntf (s->tx_fifo);
+ session_set_state (s, SESSION_STATE_CLOSING);
session_program_transport_ctrl_evt (s, SESSION_CTRL_EVT_CLOSE);
}
@@ -1518,12 +1587,46 @@ session_reset (session_t * s)
{
if (s->session_state >= SESSION_STATE_CLOSING)
return;
- /* Drop all outstanding tx data */
- svm_fifo_dequeue_drop_all (s->tx_fifo);
- s->session_state = SESSION_STATE_CLOSING;
+ /* Drop all outstanding tx data
+ * App might disconnect session before connected, in this case,
+ * tx_fifo may not be setup yet, so clear only it's inited. */
+ if (s->tx_fifo)
+ svm_fifo_dequeue_drop_all (s->tx_fifo);
+ session_set_state (s, SESSION_STATE_CLOSING);
session_program_transport_ctrl_evt (s, SESSION_CTRL_EVT_RESET);
}
+void
+session_detach_app (session_t *s)
+{
+ if (s->session_state < SESSION_STATE_TRANSPORT_CLOSING)
+ {
+ session_close (s);
+ }
+ else if (s->session_state < SESSION_STATE_TRANSPORT_DELETED)
+ {
+ transport_connection_t *tc;
+
+ /* Transport is closing but it's not yet deleted. Confirm close and
+ * subsequently detach transport from session and enqueue a session
+ * cleanup notification. Transport closed and cleanup notifications are
+ * going to be dropped by session layer apis */
+ transport_close (session_get_transport_proto (s), s->connection_index,
+ s->thread_index);
+ tc = session_get_transport (s);
+ tc->s_index = SESSION_INVALID_INDEX;
+ session_set_state (s, SESSION_STATE_TRANSPORT_DELETED);
+ session_cleanup_notify (s, SESSION_CLEANUP_SESSION);
+ }
+ else
+ {
+ session_cleanup_notify (s, SESSION_CLEANUP_SESSION);
+ }
+
+ s->flags |= SESSION_F_APP_CLOSED;
+ s->app_wrk_index = APP_INVALID_INDEX;
+}
+
/**
* Notify transport the session can be half-disconnected.
*
@@ -1555,10 +1658,10 @@ session_transport_close (session_t * s)
if (s->session_state >= SESSION_STATE_APP_CLOSED)
{
if (s->session_state == SESSION_STATE_TRANSPORT_CLOSED)
- s->session_state = SESSION_STATE_CLOSED;
+ session_set_state (s, SESSION_STATE_CLOSED);
/* If transport is already deleted, just free the session */
else if (s->session_state >= SESSION_STATE_TRANSPORT_DELETED)
- session_free_w_fifos (s);
+ session_program_cleanup (s);
return;
}
@@ -1568,7 +1671,7 @@ session_transport_close (session_t * s)
* delete notify. This will finally lead to the complete cleanup of the
* session.
*/
- s->session_state = SESSION_STATE_APP_CLOSED;
+ session_set_state (s, SESSION_STATE_APP_CLOSED);
transport_close (session_get_transport_proto (s), s->connection_index,
s->thread_index);
@@ -1583,13 +1686,13 @@ session_transport_reset (session_t * s)
if (s->session_state >= SESSION_STATE_APP_CLOSED)
{
if (s->session_state == SESSION_STATE_TRANSPORT_CLOSED)
- s->session_state = SESSION_STATE_CLOSED;
+ session_set_state (s, SESSION_STATE_CLOSED);
else if (s->session_state >= SESSION_STATE_TRANSPORT_DELETED)
- session_free_w_fifos (s);
+ session_program_cleanup (s);
return;
}
- s->session_state = SESSION_STATE_APP_CLOSED;
+ session_set_state (s, SESSION_STATE_APP_CLOSED);
transport_reset (session_get_transport_proto (s), s->connection_index,
s->thread_index);
}
@@ -1616,64 +1719,63 @@ session_transport_cleanup (session_t * s)
}
/**
- * Allocate event queues in the shared-memory segment
+ * Allocate worker mqs in share-able segment
*
- * That can only be a newly created memfd segment, that must be
- * mapped by all apps/stack users.
+ * That can only be a newly created memfd segment, that must be mapped
+ * by all apps/stack users unless private rx mqs are enabled.
*/
void
-session_vpp_event_queues_allocate (session_main_t * smm)
+session_vpp_wrk_mqs_alloc (session_main_t *smm)
{
- u32 evt_q_length = 2048, evt_size = sizeof (session_event_t);
- fifo_segment_t *eqs = &smm->evt_qs_segment;
- uword eqs_size = 64 << 20;
- pid_t vpp_pid = getpid ();
+ u32 mq_q_length = 2048, evt_size = sizeof (session_event_t);
+ fifo_segment_t *mqs_seg = &smm->wrk_mqs_segment;
+ svm_msg_q_cfg_t _cfg, *cfg = &_cfg;
+ uword mqs_seg_size;
int i;
- if (smm->configured_event_queue_length)
- evt_q_length = smm->configured_event_queue_length;
+ mq_q_length = clib_max (mq_q_length, smm->configured_wrk_mq_length);
- if (smm->evt_qs_segment_size)
- eqs_size = smm->evt_qs_segment_size;
+ svm_msg_q_ring_cfg_t rc[SESSION_MQ_N_RINGS] = {
+ { mq_q_length, evt_size, 0 }, { mq_q_length >> 1, 256, 0 }
+ };
+ cfg->consumer_pid = 0;
+ cfg->n_rings = 2;
+ cfg->q_nitems = mq_q_length;
+ cfg->ring_cfgs = rc;
+
+ /*
+ * Compute mqs segment size based on rings config and leave space
+ * for passing extended configuration messages, i.e., data allocated
+ * outside of the rings. If provided with a config value, accept it
+ * if larger than minimum size.
+ */
+ mqs_seg_size = svm_msg_q_size_to_alloc (cfg) * vec_len (smm->wrk);
+ mqs_seg_size = mqs_seg_size + (1 << 20);
+ mqs_seg_size = clib_max (mqs_seg_size, smm->wrk_mqs_segment_size);
- eqs->ssvm.ssvm_size = eqs_size;
- eqs->ssvm.my_pid = vpp_pid;
- eqs->ssvm.name = format (0, "%s%c", "session: evt-qs-segment", 0);
- /* clib_mem_vm_map_shared consumes first page before requested_va */
- eqs->ssvm.requested_va = smm->session_baseva + clib_mem_get_page_size ();
+ mqs_seg->ssvm.ssvm_size = mqs_seg_size;
+ mqs_seg->ssvm.my_pid = getpid ();
+ mqs_seg->ssvm.name = format (0, "%s%c", "session: wrk-mqs-segment", 0);
- if (ssvm_server_init (&eqs->ssvm, SSVM_SEGMENT_MEMFD))
+ if (ssvm_server_init (&mqs_seg->ssvm, SSVM_SEGMENT_MEMFD))
{
clib_warning ("failed to initialize queue segment");
return;
}
- fifo_segment_init (eqs);
+ fifo_segment_init (mqs_seg);
/* Special fifo segment that's filled only with mqs */
- eqs->h->n_mqs = vec_len (smm->wrk);
+ mqs_seg->h->n_mqs = vec_len (smm->wrk);
for (i = 0; i < vec_len (smm->wrk); i++)
- {
- svm_msg_q_cfg_t _cfg, *cfg = &_cfg;
- svm_msg_q_ring_cfg_t rc[SESSION_MQ_N_RINGS] = {
- {evt_q_length, evt_size, 0}
- ,
- {evt_q_length >> 1, 256, 0}
- };
- cfg->consumer_pid = 0;
- cfg->n_rings = 2;
- cfg->q_nitems = evt_q_length;
- cfg->ring_cfgs = rc;
-
- smm->wrk[i].vpp_event_queue = fifo_segment_msg_q_alloc (eqs, i, cfg);
- }
+ smm->wrk[i].vpp_event_queue = fifo_segment_msg_q_alloc (mqs_seg, i, cfg);
}
fifo_segment_t *
-session_main_get_evt_q_segment (void)
+session_main_get_wrk_mqs_segment (void)
{
- return &session_main.evt_qs_segment;
+ return &session_main.wrk_mqs_segment;
}
u64
@@ -1689,14 +1791,28 @@ session_segment_handle (session_t * s)
f->segment_index);
}
-/* *INDENT-OFF* */
+void
+session_get_original_dst (transport_endpoint_t *i2o_src,
+ transport_endpoint_t *i2o_dst,
+ transport_proto_t transport_proto, u32 *original_dst,
+ u16 *original_dst_port)
+{
+ session_main_t *smm = vnet_get_session_main ();
+ ip_protocol_t proto =
+ (transport_proto == TRANSPORT_PROTO_TCP ? IPPROTO_TCP : IPPROTO_UDP);
+ if (!smm->original_dst_lookup || !i2o_dst->is_ip4)
+ return;
+ smm->original_dst_lookup (&i2o_src->ip.ip4, i2o_src->port, &i2o_dst->ip.ip4,
+ i2o_dst->port, proto, original_dst,
+ original_dst_port);
+}
+
static session_fifo_rx_fn *session_tx_fns[TRANSPORT_TX_N_FNS] = {
session_tx_fifo_peek_and_snd,
session_tx_fifo_dequeue_and_snd,
session_tx_fifo_dequeue_internal,
session_tx_fifo_dequeue_and_snd
};
-/* *INDENT-ON* */
void
session_register_transport (transport_proto_t transport_proto,
@@ -1721,6 +1837,39 @@ session_register_transport (transport_proto_t transport_proto,
session_tx_fns[vft->transport_options.tx_type];
}
+void
+session_register_update_time_fn (session_update_time_fn fn, u8 is_add)
+{
+ session_main_t *smm = &session_main;
+ session_update_time_fn *fi;
+ u32 fi_pos = ~0;
+ u8 found = 0;
+
+ vec_foreach (fi, smm->update_time_fns)
+ {
+ if (*fi == fn)
+ {
+ fi_pos = fi - smm->update_time_fns;
+ found = 1;
+ break;
+ }
+ }
+
+ if (is_add)
+ {
+ if (found)
+ {
+ clib_warning ("update time fn %p already registered", fn);
+ return;
+ }
+ vec_add1 (smm->update_time_fns, fn);
+ }
+ else
+ {
+ vec_del1 (smm->update_time_fns, fi_pos);
+ }
+}
+
transport_proto_t
session_add_transport_proto (void)
{
@@ -1788,6 +1937,44 @@ session_queue_run_on_main_thread (vlib_main_t * vm)
vlib_node_set_interrupt_pending (vm, session_queue_node.index);
}
+static void
+session_stats_collector_fn (vlib_stats_collector_data_t *d)
+{
+ u32 i, n_workers, n_wrk_sessions, n_sessions = 0;
+ session_main_t *smm = &session_main;
+ session_worker_t *wrk;
+ counter_t **counters;
+ counter_t *cb;
+
+ n_workers = vec_len (smm->wrk);
+ vlib_stats_validate (d->entry_index, 0, n_workers - 1);
+ counters = d->entry->data;
+ cb = counters[0];
+
+ for (i = 0; i < vec_len (smm->wrk); i++)
+ {
+ wrk = session_main_get_worker (i);
+ n_wrk_sessions = pool_elts (wrk->sessions);
+ cb[i] = n_wrk_sessions;
+ n_sessions += n_wrk_sessions;
+ }
+
+ vlib_stats_set_gauge (d->private_data, n_sessions);
+}
+
+static void
+session_stats_collector_init (void)
+{
+ vlib_stats_collector_reg_t reg = {};
+
+ reg.entry_index =
+ vlib_stats_add_counter_vector ("/sys/session/sessions_per_worker");
+ reg.private_data = vlib_stats_add_gauge ("/sys/session/sessions_total");
+ reg.collect_fn = session_stats_collector_fn;
+ vlib_stats_register_collector_fn (&reg);
+ vlib_stats_validate (reg.entry_index, 0, vlib_get_n_threads ());
+}
+
static clib_error_t *
session_manager_main_enable (vlib_main_t * vm)
{
@@ -1808,6 +1995,7 @@ session_manager_main_enable (vlib_main_t * vm)
/* Allocate cache line aligned worker contexts */
vec_validate_aligned (smm->wrk, num_threads - 1, CLIB_CACHE_LINE_BYTES);
+ clib_spinlock_init (&session_main.pool_realloc_lock);
for (i = 0; i < num_threads; i++)
{
@@ -1816,21 +2004,20 @@ session_manager_main_enable (vlib_main_t * vm)
wrk->new_head = clib_llist_make_head (wrk->event_elts, evt_list);
wrk->old_head = clib_llist_make_head (wrk->event_elts, evt_list);
wrk->pending_connects = clib_llist_make_head (wrk->event_elts, evt_list);
+ wrk->evts_pending_main =
+ clib_llist_make_head (wrk->event_elts, evt_list);
wrk->vm = vlib_get_main_by_index (i);
wrk->last_vlib_time = vlib_time_now (vm);
wrk->last_vlib_us_time = wrk->last_vlib_time * CLIB_US_TIME_FREQ;
wrk->timerfd = -1;
vec_validate (wrk->session_to_enqueue, smm->last_transport_proto_type);
- if (num_threads > 1)
- clib_rwlock_init (&smm->wrk[i].peekers_rw_locks);
-
if (!smm->no_adaptive && smm->use_private_rx_mqs)
session_wrk_enable_adaptive_mode (wrk);
}
/* Allocate vpp event queues segment and queue */
- session_vpp_event_queues_allocate (smm);
+ session_vpp_wrk_mqs_alloc (smm);
/* Initialize segment manager properties */
segment_manager_main_init ();
@@ -1860,6 +2047,7 @@ session_manager_main_enable (vlib_main_t * vm)
session_lookup_init ();
app_namespaces_init ();
transport_init ();
+ session_stats_collector_init ();
smm->is_initialized = 1;
done:
@@ -1879,6 +2067,87 @@ session_manager_main_disable (vlib_main_t * vm)
transport_enable_disable (vm, 0 /* is_en */ );
}
+/* in this new callback, cookie hint the index */
+void
+session_dma_completion_cb (vlib_main_t *vm, struct vlib_dma_batch *batch)
+{
+ session_worker_t *wrk;
+ wrk = session_main_get_worker (vm->thread_index);
+ session_dma_transfer *dma_transfer;
+
+ dma_transfer = &wrk->dma_trans[wrk->trans_head];
+ vec_add (wrk->pending_tx_buffers, dma_transfer->pending_tx_buffers,
+ vec_len (dma_transfer->pending_tx_buffers));
+ vec_add (wrk->pending_tx_nexts, dma_transfer->pending_tx_nexts,
+ vec_len (dma_transfer->pending_tx_nexts));
+ vec_reset_length (dma_transfer->pending_tx_buffers);
+ vec_reset_length (dma_transfer->pending_tx_nexts);
+ wrk->trans_head++;
+ if (wrk->trans_head == wrk->trans_size)
+ wrk->trans_head = 0;
+ return;
+}
+
+static void
+session_prepare_dma_args (vlib_dma_config_t *args)
+{
+ args->max_batches = 16;
+ args->max_transfers = DMA_TRANS_SIZE;
+ args->max_transfer_size = 65536;
+ args->features = 0;
+ args->sw_fallback = 1;
+ args->barrier_before_last = 1;
+ args->callback_fn = session_dma_completion_cb;
+}
+
+static void
+session_node_enable_dma (u8 is_en, int n_vlibs)
+{
+ vlib_dma_config_t args;
+ session_prepare_dma_args (&args);
+ session_worker_t *wrk;
+ vlib_main_t *vm;
+
+ int config_index = -1;
+
+ if (is_en)
+ {
+ vm = vlib_get_main_by_index (0);
+ config_index = vlib_dma_config_add (vm, &args);
+ }
+ else
+ {
+ vm = vlib_get_main_by_index (0);
+ wrk = session_main_get_worker (0);
+ if (wrk->config_index >= 0)
+ vlib_dma_config_del (vm, wrk->config_index);
+ }
+ int i;
+ for (i = 0; i < n_vlibs; i++)
+ {
+ vm = vlib_get_main_by_index (i);
+ wrk = session_main_get_worker (vm->thread_index);
+ wrk->config_index = config_index;
+ if (is_en)
+ {
+ if (config_index >= 0)
+ wrk->dma_enabled = true;
+ wrk->dma_trans = (session_dma_transfer *) clib_mem_alloc (
+ sizeof (session_dma_transfer) * DMA_TRANS_SIZE);
+ bzero (wrk->dma_trans,
+ sizeof (session_dma_transfer) * DMA_TRANS_SIZE);
+ }
+ else
+ {
+ if (wrk->dma_trans)
+ clib_mem_free (wrk->dma_trans);
+ }
+ wrk->trans_head = 0;
+ wrk->trans_tail = 0;
+ wrk->trans_size = DMA_TRANS_SIZE;
+ }
+}
+
void
session_node_enable_disable (u8 is_en)
{
@@ -1914,11 +2183,15 @@ session_node_enable_disable (u8 is_en)
if (!sm->poll_main)
continue;
}
+ vlib_node_set_state (vm, session_input_node.index, mstate);
vlib_node_set_state (vm, session_queue_node.index, state);
}
if (sm->use_private_rx_mqs)
application_enable_rx_mqs_nodes (is_en);
+
+ if (sm->dma_enabled)
+ session_node_enable_dma (is_en, n_vlibs);
}
clib_error_t *
@@ -1953,17 +2226,9 @@ session_main_init (vlib_main_t * vm)
smm->poll_main = 0;
smm->use_private_rx_mqs = 0;
smm->no_adaptive = 0;
- smm->session_baseva = HIGH_SEGMENT_BASEVA;
-
-#if (HIGH_SEGMENT_BASEVA > (4ULL << 30))
- smm->session_va_space_size = 128ULL << 30;
- smm->evt_qs_segment_size = 64 << 20;
-#else
- smm->session_va_space_size = 128 << 20;
- smm->evt_qs_segment_size = 1 << 20;
-#endif
-
- smm->last_transport_proto_type = TRANSPORT_PROTO_SRTP;
+ smm->last_transport_proto_type = TRANSPORT_PROTO_HTTP;
+ smm->port_allocator_min_src_port = 1024;
+ smm->port_allocator_max_src_port = 65535;
return 0;
}
@@ -1993,13 +2258,16 @@ session_config_fn (vlib_main_t * vm, unformat_input_t * input)
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (input, "event-queue-length %d", &nitems))
+ if (unformat (input, "wrk-mq-length %d", &nitems))
{
if (nitems >= 2048)
- smm->configured_event_queue_length = nitems;
+ smm->configured_wrk_mq_length = nitems;
else
clib_warning ("event queue length %d too small, ignored", nitems);
}
+ else if (unformat (input, "wrk-mqs-segment-size %U",
+ unformat_memory_size, &smm->wrk_mqs_segment_size))
+ ;
else if (unformat (input, "preallocated-sessions %d",
&smm->preallocated_sessions))
;
@@ -2058,16 +2326,12 @@ session_config_fn (vlib_main_t * vm, unformat_input_t * input)
else if (unformat (input, "local-endpoints-table-buckets %d",
&smm->local_endpoints_table_buckets))
;
- /* Deprecated but maintained for compatibility */
- else if (unformat (input, "evt_qs_memfd_seg"))
- ;
- else if (unformat (input, "evt_qs_seg_size %U", unformat_memory_size,
- &smm->evt_qs_segment_size))
- ;
+ else if (unformat (input, "min-src-port %d", &tmp))
+ smm->port_allocator_min_src_port = tmp;
+ else if (unformat (input, "max-src-port %d", &tmp))
+ smm->port_allocator_max_src_port = tmp;
else if (unformat (input, "enable"))
smm->session_enable_asap = 1;
- else if (unformat (input, "segment-baseva 0x%lx", &smm->session_baseva))
- ;
else if (unformat (input, "use-app-socket-api"))
(void) appns_sapi_enable_disable (1 /* is_enable */);
else if (unformat (input, "poll-main"))
@@ -2076,6 +2340,30 @@ session_config_fn (vlib_main_t * vm, unformat_input_t * input)
smm->use_private_rx_mqs = 1;
else if (unformat (input, "no-adaptive"))
smm->no_adaptive = 1;
+ else if (unformat (input, "use-dma"))
+ smm->dma_enabled = 1;
+ else if (unformat (input, "nat44-original-dst-enable"))
+ {
+ smm->original_dst_lookup = vlib_get_plugin_symbol (
+ "nat_plugin.so", "nat44_original_dst_lookup");
+ }
+ /*
+ * Deprecated but maintained for compatibility
+ */
+ else if (unformat (input, "evt_qs_memfd_seg"))
+ ;
+ else if (unformat (input, "segment-baseva 0x%lx", &tmp))
+ ;
+ else if (unformat (input, "evt_qs_seg_size %U", unformat_memory_size,
+ &smm->wrk_mqs_segment_size))
+ ;
+ else if (unformat (input, "event-queue-length %d", &nitems))
+ {
+ if (nitems >= 2048)
+ smm->configured_wrk_mq_length = nitems;
+ else
+ clib_warning ("event queue length %d too small, ignored", nitems);
+ }
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h
index 2d01eb6a67a..a5604bf8725 100644
--- a/src/vnet/session/session.h
+++ b/src/vnet/session/session.h
@@ -21,23 +21,12 @@
#include <vnet/session/session_debug.h>
#include <svm/message_queue.h>
#include <svm/fifo_segment.h>
+#include <vlib/dma/dma.h>
-#define foreach_session_input_error \
-_(NO_SESSION, "No session drops") \
-_(NO_LISTENER, "No listener for dst port drops") \
-_(ENQUEUED, "Packets pushed into rx fifo") \
-_(NOT_READY, "Session not ready packets") \
-_(FIFO_FULL, "Packets dropped for lack of rx fifo space") \
-_(EVENT_FIFO_FULL, "Events not sent for lack of event fifo space") \
-_(API_QUEUE_FULL, "Sessions not created for lack of API queue space") \
-
-typedef enum
+typedef struct session_wrk_stats_
{
-#define _(sym,str) SESSION_ERROR_##sym,
- foreach_session_input_error
-#undef _
- SESSION_N_ERROR,
-} session_input_error_t;
+ u32 errors[SESSION_N_ERRORS];
+} session_wrk_stats_t;
typedef struct session_tx_context_
{
@@ -59,6 +48,7 @@ typedef struct session_tx_context_
/** Vector of tx buffer free lists */
u32 *tx_buffers;
+ vlib_buffer_t **transport_pending_bufs;
} session_tx_context_t;
typedef struct session_evt_elt
@@ -84,6 +74,13 @@ typedef enum session_wrk_flags_
SESSION_WRK_F_ADAPTIVE = 1 << 0,
} __clib_packed session_wrk_flag_t;
+#define DMA_TRANS_SIZE 1024
+typedef struct
+{
+ u32 *pending_tx_buffers;
+ u16 *pending_tx_nexts;
+} session_dma_transfer;
+
typedef struct session_worker_
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
@@ -103,8 +100,8 @@ typedef struct session_worker_
/** Convenience pointer to this worker's vlib_main */
vlib_main_t *vm;
- /** Per-proto vector of sessions to enqueue */
- u32 **session_to_enqueue;
+ /** Per-proto vector of session handles to enqueue */
+ session_handle_t **session_to_enqueue;
/** Timerfd used to periodically signal wrk session queue node */
int timerfd;
@@ -133,9 +130,6 @@ typedef struct session_worker_
/** Head of list of pending events */
clib_llist_index_t old_head;
- /** Peekers rw lock */
- clib_rwlock_t peekers_rw_locks;
-
/** Vector of buffers to be sent */
u32 *pending_tx_buffers;
@@ -151,8 +145,22 @@ typedef struct session_worker_
/** Flag that is set if main thread signaled to handle connects */
u32 n_pending_connects;
- /** Main thread loops in poll mode without a connect */
- u32 no_connect_loops;
+ /** List head for first worker evts pending handling on main */
+ clib_llist_index_t evts_pending_main;
+
+ /** Per-app-worker bitmap of pending notifications */
+ uword *app_wrks_pending_ntf;
+
+ int config_index;
+ u8 dma_enabled;
+ session_dma_transfer *dma_trans;
+ u16 trans_head;
+ u16 trans_tail;
+ u16 trans_size;
+ u16 batch_num;
+ vlib_dma_batch_t *batch;
+
+ session_wrk_stats_t stats;
#if SESSION_DEBUG
/** last event poll time by thread */
@@ -170,13 +178,22 @@ extern session_fifo_rx_fn session_tx_fifo_dequeue_internal;
u8 session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e);
+typedef void (*session_update_time_fn) (f64 time_now, u8 thread_index);
+typedef void (*nat44_original_dst_lookup_fn) (
+ ip4_address_t *i2o_src, u16 i2o_src_port, ip4_address_t *i2o_dst,
+ u16 i2o_dst_port, ip_protocol_t proto, u32 *original_dst,
+ u16 *original_dst_port);
+
typedef struct session_main_
{
/** Worker contexts */
session_worker_t *wrk;
+ /** Vector of transport update time functions */
+ session_update_time_fn *update_time_fns;
+
/** Event queues memfd segment */
- fifo_segment_t evt_qs_segment;
+ fifo_segment_t wrk_mqs_segment;
/** Unique segment name counter */
u32 unique_segment_name_counter;
@@ -189,11 +206,22 @@ typedef struct session_main_
* Trade memory for speed, for now */
u32 *session_type_to_next;
- /** Thread for cl and ho that rely on cl allocs */
+ /** Thread used for allocating active open connections, i.e., half-opens
+ * for transports like tcp, and sessions that will be migrated for cl
+ * transports like udp. If vpp has workers, this will be first worker. */
u32 transport_cl_thread;
transport_proto_t last_transport_proto_type;
+ /** Number of workers at pool realloc barrier */
+ volatile u32 pool_realloc_at_barrier;
+
+ /** Number of workers doing reallocs */
+ volatile u32 pool_realloc_doing_work;
+
+ /** Lock to synchronize parallel forced reallocs */
+ clib_spinlock_t pool_realloc_lock;
+
/*
* Config parameters
*/
@@ -217,12 +245,13 @@ typedef struct session_main_
u8 no_adaptive;
/** vpp fifo event queue configured length */
- u32 configured_event_queue_length;
+ u32 configured_wrk_mq_length;
/** Session ssvm segment configs*/
- uword session_baseva;
- uword session_va_space_size;
- uword evt_qs_segment_size;
+ uword wrk_mqs_segment_size;
+
+ /** Session enable dma*/
+ u8 dma_enabled;
/** Session table size parameters */
u32 configured_v4_session_table_buckets;
@@ -238,14 +267,22 @@ typedef struct session_main_
u32 local_endpoints_table_memory;
u32 local_endpoints_table_buckets;
+ /** Transport source port allocation range */
+ u16 port_allocator_min_src_port;
+ u16 port_allocator_max_src_port;
+
/** Preallocate session config parameter */
u32 preallocated_sessions;
u16 msg_id_base;
+
+ /** Query nat44-ed session to get original dst ip4 & dst port. */
+ nat44_original_dst_lookup_fn original_dst_lookup;
} session_main_t;
extern session_main_t session_main;
extern vlib_node_registration_t session_queue_node;
+extern vlib_node_registration_t session_input_node;
extern vlib_node_registration_t session_queue_process_node;
extern vlib_node_registration_t session_queue_pre_input_node;
@@ -301,7 +338,7 @@ session_evt_ctrl_data (session_worker_t * wrk, session_evt_elt_t * elt)
static inline void
session_evt_ctrl_data_free (session_worker_t * wrk, session_evt_elt_t * elt)
{
- ASSERT (elt->evt.event_type > SESSION_IO_EVT_BUILTIN_TX);
+ ASSERT (elt->evt.event_type >= SESSION_CTRL_EVT_RPC);
pool_put_index (wrk->ctrl_evts_data, elt->evt.ctrl_data_index);
}
@@ -329,7 +366,8 @@ int session_wrk_handle_mq (session_worker_t *wrk, svm_msg_q_t *mq);
session_t *session_alloc (u32 thread_index);
void session_free (session_t * s);
-void session_free_w_fifos (session_t * s);
+void session_cleanup (session_t *s);
+void session_program_cleanup (session_t *s);
void session_cleanup_half_open (session_handle_t ho_handle);
u8 session_is_valid (u32 si, u8 thread_index);
@@ -354,100 +392,53 @@ session_get_if_valid (u64 si, u32 thread_index)
}
always_inline session_t *
-session_get_from_handle (session_handle_t handle)
+session_get_from_handle (session_handle_tu_t handle)
{
session_main_t *smm = &session_main;
- u32 session_index, thread_index;
- session_parse_handle (handle, &session_index, &thread_index);
- return pool_elt_at_index (smm->wrk[thread_index].sessions, session_index);
+ return pool_elt_at_index (smm->wrk[handle.thread_index].sessions,
+ handle.session_index);
}
always_inline session_t *
-session_get_from_handle_if_valid (session_handle_t handle)
+session_get_from_handle_if_valid (session_handle_tu_t handle)
{
- u32 session_index, thread_index;
- session_parse_handle (handle, &session_index, &thread_index);
- return session_get_if_valid (session_index, thread_index);
+ return session_get_if_valid (handle.session_index, handle.thread_index);
}
-u64 session_segment_handle (session_t * s);
-
/**
- * Acquires a lock that blocks a session pool from expanding.
+ * Get session from handle and avoid pool validation if no same thread
*
- * This is typically used for safely peeking into other threads'
- * pools in order to clone elements. Lock should be dropped as soon
- * as possible by calling @ref session_pool_remove_peeker.
- *
- * NOTE: Avoid using pool_elt_at_index while the lock is held because
- * it may lead to free elt bitmap expansion/contraction!
- */
-always_inline void
-session_pool_add_peeker (u32 thread_index)
-{
- session_worker_t *wrk = &session_main.wrk[thread_index];
- if (thread_index == vlib_get_thread_index ())
- return;
- clib_rwlock_reader_lock (&wrk->peekers_rw_locks);
-}
-
-always_inline void
-session_pool_remove_peeker (u32 thread_index)
-{
- session_worker_t *wrk = &session_main.wrk[thread_index];
- if (thread_index == vlib_get_thread_index ())
- return;
- clib_rwlock_reader_unlock (&wrk->peekers_rw_locks);
-}
-
-/**
- * Get session from handle and 'lock' pool resize if not in same thread
- *
- * Caller should drop the peek 'lock' as soon as possible.
+ * Peekers are fine because pool grows with barrier (see @ref session_alloc)
*/
always_inline session_t *
-session_get_from_handle_safe (u64 handle)
+session_get_from_handle_safe (session_handle_tu_t handle)
{
- u32 thread_index = session_thread_from_handle (handle);
- session_worker_t *wrk = &session_main.wrk[thread_index];
+ session_worker_t *wrk = &session_main.wrk[handle.thread_index];
- if (thread_index == vlib_get_thread_index ())
+ if (handle.thread_index == vlib_get_thread_index ())
{
- return pool_elt_at_index (wrk->sessions,
- session_index_from_handle (handle));
+ return pool_elt_at_index (wrk->sessions, handle.session_index);
}
else
{
- session_pool_add_peeker (thread_index);
- /* Don't use pool_elt_at index. See @ref session_pool_add_peeker */
- return wrk->sessions + session_index_from_handle (handle);
+ /* Don't use pool_elt_at index to avoid pool bitmap reallocs */
+ return wrk->sessions + handle.session_index;
}
}
-always_inline u32
-session_get_index (session_t * s)
-{
- return (s - session_main.wrk[s->thread_index].sessions);
-}
-
always_inline session_t *
session_clone_safe (u32 session_index, u32 thread_index)
{
+ u32 current_thread_index = vlib_get_thread_index (), new_index;
session_t *old_s, *new_s;
- u32 current_thread_index = vlib_get_thread_index ();
- /* If during the memcpy pool is reallocated AND the memory allocator
- * decides to give the old chunk of memory to somebody in a hurry to
- * scribble something on it, we have a problem. So add this thread as
- * a session pool peeker.
- */
- session_pool_add_peeker (thread_index);
new_s = session_alloc (current_thread_index);
+ new_index = new_s->session_index;
+ /* Session pools are reallocated with barrier (see @ref session_alloc) */
old_s = session_main.wrk[thread_index].sessions + session_index;
clib_memcpy_fast (new_s, old_s, sizeof (*new_s));
- session_pool_remove_peeker (thread_index);
new_s->thread_index = current_thread_index;
- new_s->session_index = session_get_index (new_s);
+ new_s->session_index = new_index;
return new_s;
}
@@ -457,16 +448,19 @@ int session_stop_listen (session_t * s);
void session_half_close (session_t *s);
void session_close (session_t * s);
void session_reset (session_t * s);
+void session_detach_app (session_t *s);
void session_transport_half_close (session_t *s);
void session_transport_close (session_t * s);
void session_transport_reset (session_t * s);
void session_transport_cleanup (session_t * s);
-int session_send_io_evt_to_thread (svm_fifo_t * f,
- session_evt_type_t evt_type);
-int session_enqueue_notify (session_t * s);
+int session_enqueue_notify (session_t *s);
int session_dequeue_notify (session_t * s);
+int session_enqueue_notify_cl (session_t *s);
+int session_send_io_evt_to_thread (svm_fifo_t *f, session_evt_type_t evt_type);
int session_send_io_evt_to_thread_custom (void *data, u32 thread_index,
session_evt_type_t evt_type);
+int session_program_tx_io_evt (session_handle_tu_t sh,
+ session_evt_type_t evt_type);
void session_send_rpc_evt_to_thread (u32 thread_index, void *fp,
void *rpc_args);
void session_send_rpc_evt_to_thread_force (u32 thread_index, void *fp,
@@ -479,6 +473,7 @@ void session_get_endpoint (session_t * s, transport_endpoint_t * tep,
u8 is_lcl);
int session_transport_attribute (session_t *s, u8 is_get,
transport_endpt_attr_t *attr);
+u64 session_segment_handle (session_t *s);
u8 *format_session (u8 * s, va_list * args);
uword unformat_session (unformat_input_t * input, va_list * args);
@@ -496,6 +491,13 @@ int session_enqueue_dgram_connection (session_t * s,
session_dgram_hdr_t * hdr,
vlib_buffer_t * b, u8 proto,
u8 queue_event);
+int session_enqueue_dgram_connection2 (session_t *s, session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto,
+ u8 queue_event);
+int session_enqueue_dgram_connection_cl (session_t *s,
+ session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto,
+ u8 queue_event);
int session_stream_connect_notify (transport_connection_t * tc,
session_error_t err);
int session_dgram_connect_notify (transport_connection_t * tc,
@@ -513,6 +515,7 @@ int session_stream_accept (transport_connection_t * tc, u32 listener_index,
u32 thread_index, u8 notify);
int session_dgram_accept (transport_connection_t * tc, u32 listener_index,
u32 thread_index);
+
/**
* Initialize session layer for given transport proto and ip version
*
@@ -529,10 +532,18 @@ void session_register_transport (transport_proto_t transport_proto,
const transport_proto_vft_t * vft, u8 is_ip4,
u32 output_node);
transport_proto_t session_add_transport_proto (void);
+void session_register_update_time_fn (session_update_time_fn fn, u8 is_add);
int session_tx_fifo_peek_bytes (transport_connection_t * tc, u8 * buffer,
u32 offset, u32 max_bytes);
u32 session_tx_fifo_dequeue_drop (transport_connection_t * tc, u32 max_bytes);
+always_inline void
+session_set_state (session_t *s, session_state_t session_state)
+{
+ s->session_state = session_state;
+ SESSION_EVT (SESSION_EVT_STATE_CHANGE, s);
+}
+
always_inline u32
transport_max_rx_enqueue (transport_connection_t * tc)
{
@@ -575,6 +586,19 @@ transport_rx_fifo_has_ooo_data (transport_connection_t * tc)
return svm_fifo_has_ooo_data (s->rx_fifo);
}
+always_inline u32
+transport_tx_fifo_has_dgram (transport_connection_t *tc)
+{
+ session_t *s = session_get (tc->s_index, tc->thread_index);
+ u32 max_deq = svm_fifo_max_dequeue_cons (s->tx_fifo);
+ session_dgram_pre_hdr_t phdr;
+
+ if (max_deq <= sizeof (session_dgram_hdr_t))
+ return 0;
+ svm_fifo_peek (s->tx_fifo, 0, sizeof (phdr), (u8 *) &phdr);
+ return max_deq >= phdr.data_length + sizeof (session_dgram_hdr_t);
+}
+
always_inline void
transport_rx_fifo_req_deq_ntf (transport_connection_t *tc)
{
@@ -615,12 +639,19 @@ transport_cl_thread (void)
return session_main.transport_cl_thread;
}
+always_inline u32
+session_vlib_thread_is_cl_thread (void)
+{
+ return (vlib_get_thread_index () == transport_cl_thread () ||
+ vlib_thread_is_main_w_barrier ());
+}
+
/*
* Listen sessions
*/
-always_inline u64
-listen_session_get_handle (session_t * s)
+always_inline session_handle_t
+listen_session_get_handle (session_t *s)
{
ASSERT (s->session_state == SESSION_STATE_LISTENING ||
session_get_transport_proto (s) == TRANSPORT_PROTO_QUIC);
@@ -667,8 +698,8 @@ always_inline session_t *
ho_session_alloc (void)
{
session_t *s;
- ASSERT (vlib_get_thread_index () == 0);
- s = session_alloc (0);
+ ASSERT (session_vlib_thread_is_cl_thread ());
+ s = session_alloc (transport_cl_thread ());
s->session_state = SESSION_STATE_CONNECTING;
s->flags |= SESSION_F_HALF_OPEN;
return s;
@@ -677,7 +708,7 @@ ho_session_alloc (void)
always_inline session_t *
ho_session_get (u32 ho_index)
{
- return session_get (ho_index, 0 /* half-open thread */);
+ return session_get (ho_index, transport_cl_thread ());
}
always_inline void
@@ -702,7 +733,7 @@ vnet_get_session_main ()
always_inline session_worker_t *
session_main_get_worker (u32 thread_index)
{
- return &session_main.wrk[thread_index];
+ return vec_elt_at_index (session_main.wrk, thread_index);
}
static inline session_worker_t *
@@ -710,13 +741,13 @@ session_main_get_worker_if_valid (u32 thread_index)
{
if (thread_index > vec_len (session_main.wrk))
return 0;
- return &session_main.wrk[thread_index];
+ return session_main_get_worker (thread_index);
}
always_inline svm_msg_q_t *
session_main_get_vpp_event_queue (u32 thread_index)
{
- return session_main.wrk[thread_index].vpp_event_queue;
+ return session_main_get_worker (thread_index)->vpp_event_queue;
}
always_inline u8
@@ -725,14 +756,31 @@ session_main_is_enabled ()
return session_main.is_enabled == 1;
}
+always_inline void
+session_worker_stat_error_inc (session_worker_t *wrk, int error, int value)
+{
+ if ((-(error) >= 0 && -(error) < SESSION_N_ERRORS))
+ wrk->stats.errors[-error] += value;
+ else
+ SESSION_DBG ("unknown session counter");
+}
+
+always_inline void
+session_stat_error_inc (int error, int value)
+{
+ session_worker_t *wrk;
+ wrk = session_main_get_worker (vlib_get_thread_index ());
+ session_worker_stat_error_inc (wrk, error, value);
+}
+
#define session_cli_return_if_not_enabled() \
do { \
if (!session_main.is_enabled) \
return clib_error_return (0, "session layer is not enabled"); \
} while (0)
-int session_main_flush_enqueue_events (u8 proto, u32 thread_index);
-int session_main_flush_all_enqueue_events (u8 transport_proto);
+void session_main_flush_enqueue_events (transport_proto_t transport_proto,
+ u32 thread_index);
void session_queue_run_on_main_thread (vlib_main_t * vm);
/**
@@ -761,12 +809,116 @@ session_wrk_update_time (session_worker_t *wrk, f64 now)
}
void session_wrk_enable_adaptive_mode (session_worker_t *wrk);
-fifo_segment_t *session_main_get_evt_q_segment (void);
+fifo_segment_t *session_main_get_wrk_mqs_segment (void);
void session_node_enable_disable (u8 is_en);
clib_error_t *vnet_session_enable_disable (vlib_main_t * vm, u8 is_en);
+void session_wrk_handle_evts_main_rpc (void *);
+void session_wrk_program_app_wrk_evts (session_worker_t *wrk,
+ u32 app_wrk_index);
session_t *session_alloc_for_connection (transport_connection_t * tc);
session_t *session_alloc_for_half_open (transport_connection_t *tc);
+void session_get_original_dst (transport_endpoint_t *i2o_src,
+ transport_endpoint_t *i2o_dst,
+ transport_proto_t transport_proto,
+ u32 *original_dst, u16 *original_dst_port);
+
+typedef void (pool_safe_realloc_rpc_fn) (void *rpc_args);
+
+typedef struct
+{
+ u8 ph[STRUCT_OFFSET_OF (pool_header_t, max_elts) + 4];
+ u32 flag;
+} pool_safe_realloc_header_t;
+
+STATIC_ASSERT_SIZEOF (pool_safe_realloc_header_t, sizeof (pool_header_t));
+
+#define POOL_REALLOC_SAFE_ELT_THRESH 32
+
+#define pool_realloc_flag(PH) \
+ ((pool_safe_realloc_header_t *) pool_header (PH))->flag
+
+typedef struct pool_realloc_rpc_args_
+{
+ void **pool;
+ uword elt_size;
+ uword align;
+} pool_realloc_rpc_args_t;
+
+always_inline void
+pool_program_safe_realloc_rpc (void *args)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 free_elts, max_elts, n_alloc;
+ pool_realloc_rpc_args_t *pra;
+
+ ASSERT (vlib_get_thread_index () == 0);
+ pra = (pool_realloc_rpc_args_t *) args;
+
+ vlib_worker_thread_barrier_sync (vm);
+
+ free_elts = _pool_free_elts (*pra->pool, pra->elt_size);
+ if (free_elts < POOL_REALLOC_SAFE_ELT_THRESH)
+ {
+ max_elts = _vec_max_len (*pra->pool, pra->elt_size);
+ n_alloc = clib_max (2 * max_elts, POOL_REALLOC_SAFE_ELT_THRESH);
+ _pool_alloc (pra->pool, n_alloc, pra->align, 0, pra->elt_size);
+ }
+ pool_realloc_flag (*pra->pool) = 0;
+ clib_mem_free (args);
+
+ vlib_worker_thread_barrier_release (vm);
+}
+
+always_inline void
+pool_program_safe_realloc (void **p, u32 elt_size, u32 align)
+{
+ pool_realloc_rpc_args_t *pra;
+
+ /* Reuse pad as a realloc flag */
+ if (pool_realloc_flag (*p))
+ return;
+
+ pra = clib_mem_alloc (sizeof (*pra));
+ pra->pool = p;
+ pra->elt_size = elt_size;
+ pra->align = align;
+ pool_realloc_flag (*p) = 1;
+
+ session_send_rpc_evt_to_thread (0 /* thread index */,
+ pool_program_safe_realloc_rpc, pra);
+}
+
+#define pool_needs_realloc(P) \
+ ((!P) || \
+ (vec_len (pool_header (P)->free_indices) < POOL_REALLOC_SAFE_ELT_THRESH && \
+ pool_free_elts (P) < POOL_REALLOC_SAFE_ELT_THRESH))
+
+#define pool_get_aligned_safe(P, E, align) \
+ do \
+ { \
+ if (PREDICT_FALSE (pool_needs_realloc (P))) \
+ { \
+ if (PREDICT_FALSE (!(P))) \
+ { \
+ pool_alloc_aligned (P, POOL_REALLOC_SAFE_ELT_THRESH, align); \
+ } \
+ else if (PREDICT_FALSE (!pool_free_elts (P))) \
+ { \
+ vlib_workers_sync (); \
+ pool_alloc_aligned (P, pool_max_len (P), align); \
+ vlib_workers_continue (); \
+ ALWAYS_ASSERT (pool_free_elts (P) > 0); \
+ } \
+ else \
+ { \
+ pool_program_safe_realloc ((void **) &(P), sizeof ((P)[0]), \
+ _vec_align (P, align)); \
+ } \
+ } \
+ pool_get_aligned (P, E, align); \
+ } \
+ while (0)
#endif /* __included_session_h__ */
diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c
index 2121d2075e6..48eb932a2c9 100644
--- a/src/vnet/session/session_api.c
+++ b/src/vnet/session/session_api.c
@@ -82,40 +82,12 @@ session_send_fds (vl_api_registration_t * reg, int fds[], int n_fds)
}
static int
-mq_try_lock_and_alloc_msg (svm_msg_q_t * app_mq, svm_msg_q_msg_t * msg)
-{
- int rv;
- u8 try = 0;
- while (try < 100)
- {
- rv = svm_msg_q_lock_and_alloc_msg_w_ring (app_mq,
- SESSION_MQ_CTRL_EVT_RING,
- SVM_Q_NOWAIT, msg);
- if (!rv)
- return 0;
- /*
- * Break the loop if mq is full, usually this is because the
- * app has crashed or is hanging on somewhere.
- */
- if (rv != -1)
- break;
- try++;
- usleep (1);
- }
- clib_warning ("failed to alloc msg");
- return -1;
-}
-
-static int
mq_send_session_accepted_cb (session_t * s)
{
app_worker_t *app_wrk = app_worker_get (s->app_wrk_index);
- svm_msg_q_msg_t _msg, *msg = &_msg;
session_accepted_msg_t m = { 0 };
- svm_msg_q_t *app_mq;
fifo_segment_t *eq_seg;
session_t *listener;
- session_event_t *evt;
application_t *app;
app = application_get (app_wrk->app_index);
@@ -164,15 +136,14 @@ mq_send_session_accepted_cb (session_t * s)
m.mq_index = s->thread_index;
}
- app_mq = app_wrk->event_queue;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return SESSION_E_MQ_MSG_ALLOC;
+ if (application_original_dst_is_enabled (app))
+ {
+ session_get_original_dst (&m.lcl, &m.rmt,
+ session_get_transport_proto (s),
+ &m.original_dst_ip4, &m.original_dst_port);
+ }
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_ACCEPTED;
- clib_memcpy_fast (evt->data, &m, sizeof (m));
- svm_msg_q_add_and_unlock (app_mq, msg);
+ app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_ACCEPTED, &m, sizeof (m));
return 0;
}
@@ -181,21 +152,12 @@ static inline void
mq_send_session_close_evt (app_worker_t * app_wrk, session_handle_t sh,
session_evt_type_t evt_type)
{
- svm_msg_q_msg_t _msg, *msg = &_msg;
- session_disconnected_msg_t *mp;
- svm_msg_q_t *app_mq;
- session_event_t *evt;
+ session_disconnected_msg_t m = { 0 };
- app_mq = app_wrk->event_queue;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return;
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = evt_type;
- mp = (session_disconnected_msg_t *) evt->data;
- mp->handle = sh;
- mp->context = app_wrk->api_client_index;
- svm_msg_q_add_and_unlock (app_mq, msg);
+ m.handle = sh;
+ m.context = app_wrk->api_client_index;
+
+ app_wrk_send_ctrl_evt (app_wrk, evt_type, &m, sizeof (m));
}
static inline void
@@ -249,13 +211,9 @@ int
mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context,
session_t * s, session_error_t err)
{
- svm_msg_q_msg_t _msg, *msg = &_msg;
session_connected_msg_t m = { 0 };
- svm_msg_q_t *app_mq;
- transport_connection_t *tc;
fifo_segment_t *eq_seg;
app_worker_t *app_wrk;
- session_event_t *evt;
application_t *app;
app_wrk = app_worker_get (app_wrk_index);
@@ -271,14 +229,6 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context,
if (session_has_transport (s))
{
- tc = session_get_transport (s);
- if (!tc)
- {
- clib_warning ("failed to retrieve transport!");
- m.retval = SESSION_E_REFUSED;
- goto snd_msg;
- }
-
m.handle = session_handle (s);
m.vpp_event_queue_address =
fifo_segment_msg_q_offset (eq_seg, s->thread_index);
@@ -293,7 +243,6 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context,
else
{
ct_connection_t *cct;
- session_t *ss;
cct = (ct_connection_t *) session_get_transport (s);
m.handle = session_handle (s);
@@ -304,11 +253,10 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context,
m.server_rx_fifo = fifo_segment_fifo_offset (s->rx_fifo);
m.server_tx_fifo = fifo_segment_fifo_offset (s->tx_fifo);
m.segment_handle = session_segment_handle (s);
- ss = ct_session_get_peer (s);
- m.ct_rx_fifo = fifo_segment_fifo_offset (ss->tx_fifo);
- m.ct_tx_fifo = fifo_segment_fifo_offset (ss->rx_fifo);
- m.ct_segment_handle = session_segment_handle (ss);
m.mq_index = s->thread_index;
+ m.ct_rx_fifo = fifo_segment_fifo_offset (cct->client_rx_fifo);
+ m.ct_tx_fifo = fifo_segment_fifo_offset (cct->client_tx_fifo);
+ m.ct_segment_handle = cct->segment_handle;
}
/* Setup client session index in advance, in case data arrives
@@ -318,31 +266,19 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context,
snd_msg:
- app_mq = app_wrk->event_queue;
-
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return SESSION_E_MQ_MSG_ALLOC;
+ app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_CONNECTED, &m, sizeof (m));
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_CONNECTED;
- clib_memcpy_fast (evt->data, &m, sizeof (m));
-
- svm_msg_q_add_and_unlock (app_mq, msg);
return 0;
}
-int
+static int
mq_send_session_bound_cb (u32 app_wrk_index, u32 api_context,
session_handle_t handle, int rv)
{
- svm_msg_q_msg_t _msg, *msg = &_msg;
session_bound_msg_t m = { 0 };
- svm_msg_q_t *app_mq;
- transport_endpoint_t tep;
+ transport_connection_t *ltc;
fifo_segment_t *eq_seg;
app_worker_t *app_wrk;
- session_event_t *evt;
application_t *app;
app_listener_t *al;
session_t *ls = 0;
@@ -362,77 +298,60 @@ mq_send_session_bound_cb (u32 app_wrk_index, u32 api_context,
else
ls = app_listener_get_local_session (al);
- session_get_endpoint (ls, &tep, 1 /* is_lcl */);
- m.lcl_port = tep.port;
- m.lcl_is_ip4 = tep.is_ip4;
- clib_memcpy_fast (m.lcl_ip, &tep.ip, sizeof (tep.ip));
+ ltc = session_get_transport (ls);
+ m.lcl_port = ltc->lcl_port;
+ m.lcl_is_ip4 = ltc->is_ip4;
+ clib_memcpy_fast (m.lcl_ip, &ltc->lcl_ip, sizeof (m.lcl_ip));
app = application_get (app_wrk->app_index);
eq_seg = application_get_rx_mqs_segment (app);
m.vpp_evt_q = fifo_segment_msg_q_offset (eq_seg, ls->thread_index);
m.mq_index = ls->thread_index;
- if (session_transport_service_type (ls) == TRANSPORT_SERVICE_CL &&
- ls->rx_fifo)
+ if (transport_connection_is_cless (ltc))
{
- m.rx_fifo = fifo_segment_fifo_offset (ls->rx_fifo);
- m.tx_fifo = fifo_segment_fifo_offset (ls->tx_fifo);
- m.segment_handle = session_segment_handle (ls);
+ session_t *wrk_ls;
+ m.mq_index = transport_cl_thread ();
+ m.vpp_evt_q = fifo_segment_msg_q_offset (eq_seg, m.mq_index);
+ wrk_ls = app_listener_get_wrk_cl_session (al, app_wrk->wrk_map_index);
+ m.rx_fifo = fifo_segment_fifo_offset (wrk_ls->rx_fifo);
+ m.tx_fifo = fifo_segment_fifo_offset (wrk_ls->tx_fifo);
+ m.segment_handle = session_segment_handle (wrk_ls);
}
snd_msg:
- app_mq = app_wrk->event_queue;
+ app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_BOUND, &m, sizeof (m));
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return SESSION_E_MQ_MSG_ALLOC;
-
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_BOUND;
- clib_memcpy_fast (evt->data, &m, sizeof (m));
-
- svm_msg_q_add_and_unlock (app_mq, msg);
return 0;
}
-void
-mq_send_unlisten_reply (app_worker_t * app_wrk, session_handle_t sh,
- u32 context, int rv)
+static void
+mq_send_unlisten_cb (u32 app_wrk_index, session_handle_t sh, u32 context,
+ int rv)
{
- svm_msg_q_msg_t _msg, *msg = &_msg;
- session_unlisten_reply_msg_t *ump;
- svm_msg_q_t *app_mq;
- session_event_t *evt;
+ session_unlisten_reply_msg_t m = { 0 };
+ app_worker_t *app_wrk;
- app_mq = app_wrk->event_queue;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return;
+ app_wrk = app_worker_get (app_wrk_index);
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_UNLISTEN_REPLY;
- ump = (session_unlisten_reply_msg_t *) evt->data;
- ump->context = context;
- ump->handle = sh;
- ump->retval = rv;
- svm_msg_q_add_and_unlock (app_mq, msg);
+ m.context = context;
+ m.handle = sh;
+ m.retval = rv;
+ app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_UNLISTEN_REPLY, &m,
+ sizeof (m));
}
static void
mq_send_session_migrate_cb (session_t * s, session_handle_t new_sh)
{
- svm_msg_q_msg_t _msg, *msg = &_msg;
session_migrated_msg_t m = { 0 };
fifo_segment_t *eq_seg;
app_worker_t *app_wrk;
- session_event_t *evt;
- svm_msg_q_t *app_mq;
application_t *app;
u32 thread_index;
thread_index = session_thread_from_handle (new_sh);
app_wrk = app_worker_get (s->app_wrk_index);
- app_mq = app_wrk->event_queue;
app = application_get (app_wrk->app_index);
eq_seg = application_get_rx_mqs_segment (app);
@@ -442,27 +361,15 @@ mq_send_session_migrate_cb (session_t * s, session_handle_t new_sh)
m.vpp_evt_q = fifo_segment_msg_q_offset (eq_seg, thread_index);
m.segment_handle = SESSION_INVALID_HANDLE;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return;
-
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_MIGRATED;
- clib_memcpy_fast (evt->data, &m, sizeof (m));
-
- svm_msg_q_add_and_unlock (app_mq, msg);
+ app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_MIGRATED, &m, sizeof (m));
}
static int
mq_send_add_segment_cb (u32 app_wrk_index, u64 segment_handle)
{
- int fds[SESSION_N_FD_TYPE], n_fds = 0;
- svm_msg_q_msg_t _msg, *msg = &_msg;
- session_app_add_segment_msg_t *mp;
+ session_app_add_segment_msg_t m = { 0 };
vl_api_registration_t *reg;
app_worker_t *app_wrk;
- session_event_t *evt;
- svm_msg_q_t *app_mq;
fifo_segment_t *fs;
ssvm_private_t *sp;
u8 fd_flags = 0;
@@ -488,29 +395,16 @@ mq_send_add_segment_cb (u32 app_wrk_index, u64 segment_handle)
}
fd_flags |= SESSION_FD_F_MEMFD_SEGMENT;
- fds[n_fds] = sp->fd;
- n_fds += 1;
}
- app_mq = app_wrk->event_queue;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return -1;
+ m.segment_size = sp->ssvm_size;
+ m.fd_flags = fd_flags;
+ m.segment_handle = segment_handle;
+ strncpy ((char *) m.segment_name, (char *) sp->name,
+ sizeof (m.segment_name) - 1);
- if (n_fds)
- session_send_fds (reg, fds, n_fds);
-
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_APP_ADD_SEGMENT;
- mp = (session_app_add_segment_msg_t *) evt->data;
- clib_memset (mp, 0, sizeof (*mp));
- mp->segment_size = sp->ssvm_size;
- mp->fd_flags = fd_flags;
- mp->segment_handle = segment_handle;
- strncpy ((char *) mp->segment_name, (char *) sp->name,
- sizeof (mp->segment_name) - 1);
-
- svm_msg_q_add_and_unlock (app_mq, msg);
+ app_wrk_send_ctrl_evt_fd (app_wrk, SESSION_CTRL_EVT_APP_ADD_SEGMENT, &m,
+ sizeof (m), sp->fd);
return 0;
}
@@ -518,12 +412,9 @@ mq_send_add_segment_cb (u32 app_wrk_index, u64 segment_handle)
static int
mq_send_del_segment_cb (u32 app_wrk_index, u64 segment_handle)
{
- svm_msg_q_msg_t _msg, *msg = &_msg;
- session_app_del_segment_msg_t *mp;
+ session_app_del_segment_msg_t m = { 0 };
vl_api_registration_t *reg;
app_worker_t *app_wrk;
- session_event_t *evt;
- svm_msg_q_t *app_mq;
app_wrk = app_worker_get (app_wrk_index);
reg = vl_mem_api_client_index_to_registration (app_wrk->api_client_index);
@@ -533,17 +424,10 @@ mq_send_del_segment_cb (u32 app_wrk_index, u64 segment_handle)
return -1;
}
- app_mq = app_wrk->event_queue;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return -1;
+ m.segment_handle = segment_handle;
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_APP_DEL_SEGMENT;
- mp = (session_app_del_segment_msg_t *) evt->data;
- clib_memset (mp, 0, sizeof (*mp));
- mp->segment_handle = segment_handle;
- svm_msg_q_add_and_unlock (app_mq, msg);
+ app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_APP_DEL_SEGMENT, &m,
+ sizeof (m));
return 0;
}
@@ -551,10 +435,7 @@ mq_send_del_segment_cb (u32 app_wrk_index, u64 segment_handle)
static void
mq_send_session_cleanup_cb (session_t * s, session_cleanup_ntf_t ntf)
{
- svm_msg_q_msg_t _msg, *msg = &_msg;
- session_cleanup_msg_t *mp;
- svm_msg_q_t *app_mq;
- session_event_t *evt;
+ session_cleanup_msg_t m = { 0 };
app_worker_t *app_wrk;
/* Propagate transport cleanup notifications only if app didn't close */
@@ -566,17 +447,56 @@ mq_send_session_cleanup_cb (session_t * s, session_cleanup_ntf_t ntf)
if (!app_wrk)
return;
- app_mq = app_wrk->event_queue;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return;
+ m.handle = session_handle (s);
+ m.type = ntf;
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_CLEANUP;
- mp = (session_cleanup_msg_t *) evt->data;
- mp->handle = session_handle (s);
- mp->type = ntf;
- svm_msg_q_add_and_unlock (app_mq, msg);
+ app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_CLEANUP, &m, sizeof (m));
+}
+
+static int
+mq_send_io_rx_event (session_t *s)
+{
+ session_event_t *mq_evt;
+ svm_msg_q_msg_t mq_msg;
+ app_worker_t *app_wrk;
+ svm_msg_q_t *mq;
+
+ if (svm_fifo_has_event (s->rx_fifo))
+ return 0;
+
+ app_wrk = app_worker_get (s->app_wrk_index);
+ mq = app_wrk->event_queue;
+
+ mq_msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
+ mq_evt = svm_msg_q_msg_data (mq, &mq_msg);
+
+ mq_evt->event_type = SESSION_IO_EVT_RX;
+ mq_evt->session_index = s->rx_fifo->shr->client_session_index;
+
+ (void) svm_fifo_set_event (s->rx_fifo);
+
+ svm_msg_q_add_raw (mq, &mq_msg);
+
+ return 0;
+}
+
+static int
+mq_send_io_tx_event (session_t *s)
+{
+ app_worker_t *app_wrk = app_worker_get (s->app_wrk_index);
+ svm_msg_q_t *mq = app_wrk->event_queue;
+ session_event_t *mq_evt;
+ svm_msg_q_msg_t mq_msg;
+
+ mq_msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
+ mq_evt = svm_msg_q_msg_data (mq, &mq_msg);
+
+ mq_evt->event_type = SESSION_IO_EVT_TX;
+ mq_evt->session_index = s->tx_fifo->shr->client_session_index;
+
+ svm_msg_q_add_raw (mq, &mq_msg);
+
+ return 0;
}
static session_cb_vft_t session_mq_cb_vft = {
@@ -586,8 +506,12 @@ static session_cb_vft_t session_mq_cb_vft = {
.session_reset_callback = mq_send_session_reset_cb,
.session_migrate_callback = mq_send_session_migrate_cb,
.session_cleanup_callback = mq_send_session_cleanup_cb,
+ .session_listened_callback = mq_send_session_bound_cb,
+ .session_unlistened_callback = mq_send_unlisten_cb,
.add_segment_callback = mq_send_add_segment_cb,
.del_segment_callback = mq_send_del_segment_cb,
+ .builtin_app_rx_callback = mq_send_io_rx_event,
+ .builtin_app_tx_callback = mq_send_io_tx_event,
};
static void
@@ -653,7 +577,8 @@ vl_api_app_attach_t_handler (vl_api_app_attach_t * mp)
if ((rv = vnet_application_attach (a)))
{
- clib_warning ("attach returned: %d", rv);
+ clib_warning ("attach returned: %U", format_session_error, rv);
+ rv = VNET_API_ERROR_UNSPECIFIED;
vec_free (a->namespace_id);
goto done;
}
@@ -695,27 +620,28 @@ vl_api_app_attach_t_handler (vl_api_app_attach_t * mp)
}
done:
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_APP_ATTACH_REPLY, ({
- if (!rv)
- {
- ctrl_thread = n_workers ? 1 : 0;
- segp = (fifo_segment_t *) a->segment;
- rmp->app_index = clib_host_to_net_u32 (a->app_index);
- rmp->app_mq = fifo_segment_msg_q_offset (segp, 0);
- rmp->vpp_ctrl_mq = fifo_segment_msg_q_offset (rx_mqs_seg, ctrl_thread);
- rmp->vpp_ctrl_mq_thread = ctrl_thread;
- rmp->n_fds = n_fds;
- rmp->fd_flags = fd_flags;
- if (vec_len (segp->ssvm.name))
- {
- vl_api_vec_to_api_string (segp->ssvm.name, &rmp->segment_name);
- }
- rmp->segment_size = segp->ssvm.ssvm_size;
- rmp->segment_handle = clib_host_to_net_u64 (a->segment_handle);
- }
- }));
- /* *INDENT-ON* */
+ REPLY_MACRO3 (
+ VL_API_APP_ATTACH_REPLY,
+ ((!rv) ? vec_len (((fifo_segment_t *) a->segment)->ssvm.name) : 0), ({
+ if (!rv)
+ {
+ ctrl_thread = n_workers ? 1 : 0;
+ segp = (fifo_segment_t *) a->segment;
+ rmp->app_index = clib_host_to_net_u32 (a->app_index);
+ rmp->app_mq = fifo_segment_msg_q_offset (segp, 0);
+ rmp->vpp_ctrl_mq =
+ fifo_segment_msg_q_offset (rx_mqs_seg, ctrl_thread);
+ rmp->vpp_ctrl_mq_thread = ctrl_thread;
+ rmp->n_fds = n_fds;
+ rmp->fd_flags = fd_flags;
+ if (vec_len (segp->ssvm.name))
+ {
+ vl_api_vec_to_api_string (segp->ssvm.name, &rmp->segment_name);
+ }
+ rmp->segment_size = segp->ssvm.ssvm_size;
+ rmp->segment_handle = clib_host_to_net_u64 (a->segment_handle);
+ }
+ }));
if (n_fds)
session_send_fds (reg, fds, n_fds);
@@ -757,7 +683,9 @@ vl_api_app_worker_add_del_t_handler (vl_api_app_worker_add_del_t * mp)
rv = vnet_app_worker_add_del (&args);
if (rv)
{
- clib_warning ("app worker add/del returned: %d", rv);
+ clib_warning ("app worker add/del returned: %U", format_session_error,
+ rv);
+ rv = VNET_API_ERROR_UNSPECIFIED;
goto done;
}
@@ -778,25 +706,27 @@ vl_api_app_worker_add_del_t_handler (vl_api_app_worker_add_del_t * mp)
n_fds += 1;
}
- /* *INDENT-OFF* */
done:
- REPLY_MACRO2 (VL_API_APP_WORKER_ADD_DEL_REPLY, ({
- rmp->is_add = mp->is_add;
- rmp->wrk_index = clib_host_to_net_u32 (args.wrk_map_index);
- rmp->segment_handle = clib_host_to_net_u64 (args.segment_handle);
- if (!rv && mp->is_add)
- {
- rmp->app_event_queue_address =
- fifo_segment_msg_q_offset ((fifo_segment_t *) args.segment, 0);
- rmp->n_fds = n_fds;
- rmp->fd_flags = fd_flags;
- if (vec_len (args.segment->name))
- {
- vl_api_vec_to_api_string (args.segment->name, &rmp->segment_name);
- }
- }
- }));
- /* *INDENT-ON* */
+ REPLY_MACRO3 (
+ VL_API_APP_WORKER_ADD_DEL_REPLY,
+ ((!rv && mp->is_add) ? vec_len (args.segment->name) : 0), ({
+ rmp->is_add = mp->is_add;
+ rmp->wrk_index = mp->wrk_index;
+ if (!rv && mp->is_add)
+ {
+ rmp->wrk_index = clib_host_to_net_u32 (args.wrk_map_index);
+ rmp->segment_handle = clib_host_to_net_u64 (args.segment_handle);
+ rmp->app_event_queue_address =
+ fifo_segment_msg_q_offset ((fifo_segment_t *) args.segment, 0);
+ rmp->n_fds = n_fds;
+ rmp->fd_flags = fd_flags;
+ if (vec_len (args.segment->name))
+ {
+ vl_api_vec_to_api_string (args.segment->name,
+ &rmp->segment_name);
+ }
+ }
+ }));
if (n_fds)
session_send_fds (reg, fds, n_fds);
@@ -822,6 +752,12 @@ vl_api_application_detach_t_handler (vl_api_application_detach_t * mp)
a->app_index = app->app_index;
a->api_client_index = mp->client_index;
rv = vnet_application_detach (a);
+ if (rv)
+ {
+ clib_warning ("vnet_application_detach: %U", format_session_error,
+ rv);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
}
done:
@@ -845,7 +781,6 @@ vl_api_app_namespace_add_del_t_handler (vl_api_app_namespace_add_del_t * mp)
vnet_app_namespace_add_del_args_t args = {
.ns_id = ns_id,
- .netns = 0,
.sock_name = 0,
.secret = clib_net_to_host_u64 (mp->secret),
.sw_if_index = clib_net_to_host_u32 (mp->sw_if_index),
@@ -865,13 +800,11 @@ vl_api_app_namespace_add_del_t_handler (vl_api_app_namespace_add_del_t * mp)
}
vec_free (ns_id);
- /* *INDENT-OFF* */
done:
REPLY_MACRO2 (VL_API_APP_NAMESPACE_ADD_DEL_REPLY, ({
if (!rv)
rmp->appns_index = clib_host_to_net_u32 (appns_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -879,7 +812,7 @@ vl_api_app_namespace_add_del_v2_t_handler (
vl_api_app_namespace_add_del_v2_t *mp)
{
vl_api_app_namespace_add_del_v2_reply_t *rmp;
- u8 *ns_id = 0, *netns = 0;
+ u8 *ns_id = 0;
u32 appns_index = 0;
int rv = 0;
@@ -890,13 +823,10 @@ vl_api_app_namespace_add_del_v2_t_handler (
}
mp->namespace_id[sizeof (mp->namespace_id) - 1] = 0;
- mp->netns[sizeof (mp->netns) - 1] = 0;
ns_id = format (0, "%s", &mp->namespace_id);
- netns = format (0, "%s", &mp->netns);
vnet_app_namespace_add_del_args_t args = {
.ns_id = ns_id,
- .netns = netns,
.sock_name = 0,
.secret = clib_net_to_host_u64 (mp->secret),
.sw_if_index = clib_net_to_host_u32 (mp->sw_if_index),
@@ -915,7 +845,6 @@ vl_api_app_namespace_add_del_v2_t_handler (
}
}
vec_free (ns_id);
- vec_free (netns);
done:
REPLY_MACRO2 (VL_API_APP_NAMESPACE_ADD_DEL_V2_REPLY, ({
@@ -925,11 +854,55 @@ done:
}
static void
+vl_api_app_namespace_add_del_v4_t_handler (
+ vl_api_app_namespace_add_del_v4_t *mp)
+{
+ vl_api_app_namespace_add_del_v4_reply_t *rmp;
+ u8 *ns_id = 0, *sock_name = 0;
+ u32 appns_index = 0;
+ int rv = 0;
+ if (session_main_is_enabled () == 0)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto done;
+ }
+ mp->namespace_id[sizeof (mp->namespace_id) - 1] = 0;
+ ns_id = format (0, "%s", &mp->namespace_id);
+ sock_name = vl_api_from_api_to_new_vec (mp, &mp->sock_name);
+ vnet_app_namespace_add_del_args_t args = {
+ .ns_id = ns_id,
+ .sock_name = sock_name,
+ .secret = clib_net_to_host_u64 (mp->secret),
+ .sw_if_index = clib_net_to_host_u32 (mp->sw_if_index),
+ .ip4_fib_id = clib_net_to_host_u32 (mp->ip4_fib_id),
+ .ip6_fib_id = clib_net_to_host_u32 (mp->ip6_fib_id),
+ .is_add = mp->is_add,
+ };
+ rv = vnet_app_namespace_add_del (&args);
+ if (!rv && mp->is_add)
+ {
+ appns_index = app_namespace_index_from_id (ns_id);
+ if (appns_index == APP_NAMESPACE_INVALID_INDEX)
+ {
+ clib_warning ("app ns lookup failed id:%s", ns_id);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
+ }
+ vec_free (ns_id);
+ vec_free (sock_name);
+done:
+ REPLY_MACRO2 (VL_API_APP_NAMESPACE_ADD_DEL_V4_REPLY, ({
+ if (!rv)
+ rmp->appns_index = clib_host_to_net_u32 (appns_index);
+ }));
+}
+
+static void
vl_api_app_namespace_add_del_v3_t_handler (
vl_api_app_namespace_add_del_v3_t *mp)
{
vl_api_app_namespace_add_del_v3_reply_t *rmp;
- u8 *ns_id = 0, *netns = 0, *sock_name = 0;
+ u8 *ns_id = 0, *sock_name = 0, *api_sock_name = 0;
u32 appns_index = 0;
int rv = 0;
if (session_main_is_enabled () == 0)
@@ -938,13 +911,22 @@ vl_api_app_namespace_add_del_v3_t_handler (
goto done;
}
mp->namespace_id[sizeof (mp->namespace_id) - 1] = 0;
- mp->netns[sizeof (mp->netns) - 1] = 0;
ns_id = format (0, "%s", &mp->namespace_id);
- netns = format (0, "%s", &mp->netns);
- sock_name = vl_api_from_api_to_new_vec (mp, &mp->sock_name);
+ api_sock_name = vl_api_from_api_to_new_vec (mp, &mp->sock_name);
+ mp->netns[sizeof (mp->netns) - 1] = 0;
+ if (strlen ((char *) mp->netns) != 0)
+ {
+ sock_name =
+ format (0, "abstract:%v,netns_name=%s", api_sock_name, &mp->netns);
+ }
+ else
+ {
+ sock_name = api_sock_name;
+ api_sock_name = 0; // for vec_free
+ }
+
vnet_app_namespace_add_del_args_t args = {
.ns_id = ns_id,
- .netns = netns,
.sock_name = sock_name,
.secret = clib_net_to_host_u64 (mp->secret),
.sw_if_index = clib_net_to_host_u32 (mp->sw_if_index),
@@ -963,8 +945,8 @@ vl_api_app_namespace_add_del_v3_t_handler (
}
}
vec_free (ns_id);
- vec_free (netns);
vec_free (sock_name);
+ vec_free (api_sock_name);
done:
REPLY_MACRO2 (VL_API_APP_NAMESPACE_ADD_DEL_V3_REPLY, ({
if (!rv)
@@ -999,7 +981,10 @@ vl_api_session_rule_add_del_t_handler (vl_api_session_rule_add_del_t * mp)
rv = vnet_session_rule_add_del (&args);
if (rv)
- clib_warning ("rule add del returned: %d", rv);
+ {
+ clib_warning ("rule add del returned: %U", format_session_error, rv);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
vec_free (table_args->tag);
REPLY_MACRO (VL_API_SESSION_RULE_ADD_DEL_REPLY);
}
@@ -1102,7 +1087,6 @@ send_session_rules_table_details (session_rules_table_t * srt, u8 fib_proto,
if (is_local || fib_proto == FIB_PROTOCOL_IP4)
{
u8 *tag = 0;
- /* *INDENT-OFF* */
srt16 = &srt->session_rules_tables_16;
pool_foreach (rule16, srt16->rules) {
ri = mma_rules_table_rule_index_16 (srt16, rule16);
@@ -1110,12 +1094,10 @@ send_session_rules_table_details (session_rules_table_t * srt, u8 fib_proto,
send_session_rule_details4 (rule16, is_local, tp, appns_index, tag,
reg, context);
}
- /* *INDENT-ON* */
}
if (is_local || fib_proto == FIB_PROTOCOL_IP6)
{
u8 *tag = 0;
- /* *INDENT-OFF* */
srt40 = &srt->session_rules_tables_40;
pool_foreach (rule40, srt40->rules) {
ri = mma_rules_table_rule_index_40 (srt40, rule40);
@@ -1123,7 +1105,6 @@ send_session_rules_table_details (session_rules_table_t * srt, u8 fib_proto,
send_session_rule_details6 (rule40, is_local, tp, appns_index, tag,
reg, context);
}
- /* *INDENT-ON* */
}
}
@@ -1138,7 +1119,6 @@ vl_api_session_rules_dump_t_handler (vl_api_session_rules_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
session_table_foreach (st, ({
for (tp = 0; tp < TRANSPORT_N_PROTOS; tp++)
{
@@ -1148,7 +1128,6 @@ vl_api_session_rules_dump_t_handler (vl_api_session_rules_dump_t * mp)
mp->context);
}
}));
- /* *INDENT-ON* */
}
static void
@@ -1193,12 +1172,10 @@ vl_api_app_add_cert_key_pair_t_handler (vl_api_app_add_cert_key_pair_t * mp)
rv = vnet_app_add_cert_key_pair (a);
done:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_APP_ADD_CERT_KEY_PAIR_REPLY, ({
if (!rv)
rmp->index = clib_host_to_net_u32 (a->index);
}));
- /* *INDENT-ON* */
}
static void
@@ -1214,6 +1191,12 @@ vl_api_app_del_cert_key_pair_t_handler (vl_api_app_del_cert_key_pair_t * mp)
}
ckpair_index = clib_net_to_host_u32 (mp->index);
rv = vnet_app_del_cert_key_pair (ckpair_index);
+ if (rv)
+ {
+ clib_warning ("vnet_app_del_cert_key_pair: %U", format_session_error,
+ rv);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
done:
REPLY_MACRO (VL_API_APP_DEL_CERT_KEY_PAIR_REPLY);
@@ -1239,36 +1222,11 @@ VL_MSG_API_REAPER_FUNCTION (application_reaper_cb);
* Socket api functions
*/
-static void
-sapi_send_fds (app_worker_t * app_wrk, int *fds, int n_fds)
-{
- app_sapi_msg_t smsg = { 0 };
- app_namespace_t *app_ns;
- application_t *app;
- clib_socket_t *cs;
- u32 cs_index;
-
- app = application_get (app_wrk->app_index);
- app_ns = app_namespace_get (app->ns_index);
- cs_index = appns_sapi_handle_sock_index (app_wrk->api_client_index);
- cs = appns_sapi_get_socket (app_ns, cs_index);
- if (PREDICT_FALSE (!cs))
- return;
-
- /* There's no payload for the message only the type */
- smsg.type = APP_SAPI_MSG_TYPE_SEND_FDS;
- clib_socket_sendmsg (cs, &smsg, sizeof (smsg), fds, n_fds);
-}
-
static int
mq_send_add_segment_sapi_cb (u32 app_wrk_index, u64 segment_handle)
{
- int fds[SESSION_N_FD_TYPE], n_fds = 0;
- svm_msg_q_msg_t _msg, *msg = &_msg;
- session_app_add_segment_msg_t *mp;
+ session_app_add_segment_msg_t m = { 0 };
app_worker_t *app_wrk;
- session_event_t *evt;
- svm_msg_q_t *app_mq;
fifo_segment_t *fs;
ssvm_private_t *sp;
u8 fd_flags = 0;
@@ -1280,33 +1238,15 @@ mq_send_add_segment_sapi_cb (u32 app_wrk_index, u64 segment_handle)
ASSERT (ssvm_type (sp) == SSVM_SEGMENT_MEMFD);
fd_flags |= SESSION_FD_F_MEMFD_SEGMENT;
- fds[n_fds] = sp->fd;
- n_fds += 1;
- app_mq = app_wrk->event_queue;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return -1;
+ m.segment_size = sp->ssvm_size;
+ m.fd_flags = fd_flags;
+ m.segment_handle = segment_handle;
+ strncpy ((char *) m.segment_name, (char *) sp->name,
+ sizeof (m.segment_name) - 1);
- /*
- * Send the fd over api socket
- */
- sapi_send_fds (app_wrk, fds, n_fds);
-
- /*
- * Send the actual message over mq
- */
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_APP_ADD_SEGMENT;
- mp = (session_app_add_segment_msg_t *) evt->data;
- clib_memset (mp, 0, sizeof (*mp));
- mp->segment_size = sp->ssvm_size;
- mp->fd_flags = fd_flags;
- mp->segment_handle = segment_handle;
- strncpy ((char *) mp->segment_name, (char *) sp->name,
- sizeof (mp->segment_name) - 1);
-
- svm_msg_q_add_and_unlock (app_mq, msg);
+ app_wrk_send_ctrl_evt_fd (app_wrk, SESSION_CTRL_EVT_APP_ADD_SEGMENT, &m,
+ sizeof (m), sp->fd);
return 0;
}
@@ -1314,25 +1254,15 @@ mq_send_add_segment_sapi_cb (u32 app_wrk_index, u64 segment_handle)
static int
mq_send_del_segment_sapi_cb (u32 app_wrk_index, u64 segment_handle)
{
- svm_msg_q_msg_t _msg, *msg = &_msg;
- session_app_del_segment_msg_t *mp;
+ session_app_del_segment_msg_t m = { 0 };
app_worker_t *app_wrk;
- session_event_t *evt;
- svm_msg_q_t *app_mq;
app_wrk = app_worker_get (app_wrk_index);
- app_mq = app_wrk->event_queue;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return -1;
+ m.segment_handle = segment_handle;
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_APP_DEL_SEGMENT;
- mp = (session_app_del_segment_msg_t *) evt->data;
- clib_memset (mp, 0, sizeof (*mp));
- mp->segment_handle = segment_handle;
- svm_msg_q_add_and_unlock (app_mq, msg);
+ app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_APP_DEL_SEGMENT, &m,
+ sizeof (m));
return 0;
}
@@ -1344,8 +1274,12 @@ static session_cb_vft_t session_mq_sapi_cb_vft = {
.session_reset_callback = mq_send_session_reset_cb,
.session_migrate_callback = mq_send_session_migrate_cb,
.session_cleanup_callback = mq_send_session_cleanup_cb,
+ .session_listened_callback = mq_send_session_bound_cb,
+ .session_unlistened_callback = mq_send_unlisten_cb,
.add_segment_callback = mq_send_add_segment_sapi_cb,
.del_segment_callback = mq_send_del_segment_sapi_cb,
+ .builtin_app_rx_callback = mq_send_io_rx_event,
+ .builtin_app_tx_callback = mq_send_io_tx_event,
};
static void
@@ -1485,7 +1419,7 @@ sapi_add_del_worker_handler (app_namespace_t * app_ns,
app = application_get_if_valid (mp->app_index);
if (!app)
{
- rv = VNET_API_ERROR_INVALID_VALUE;
+ rv = SESSION_E_INVALID;
goto done;
}
@@ -1500,7 +1434,8 @@ sapi_add_del_worker_handler (app_namespace_t * app_ns,
rv = vnet_app_worker_add_del (&args);
if (rv)
{
- clib_warning ("app worker add/del returned: %d", rv);
+ clib_warning ("app worker add/del returned: %U", format_session_error,
+ rv);
goto done;
}
@@ -1523,15 +1458,20 @@ sapi_add_del_worker_handler (app_namespace_t * app_ns,
done:
+ /* With app sock api socket expected to be closed, no reply */
+ if (!mp->is_add && appns_sapi_enabled ())
+ return;
+
msg.type = APP_SAPI_MSG_TYPE_ADD_DEL_WORKER_REPLY;
rmp = &msg.worker_add_del_reply;
rmp->retval = rv;
rmp->is_add = mp->is_add;
+ rmp->wrk_index = mp->wrk_index;
rmp->api_client_handle = sapi_handle;
- rmp->wrk_index = args.wrk_map_index;
- rmp->segment_handle = args.segment_handle;
if (!rv && mp->is_add)
{
+ rmp->wrk_index = args.wrk_map_index;
+ rmp->segment_handle = args.segment_handle;
/* No segment name and size. This supports only memfds */
rmp->app_event_queue_address =
fifo_segment_msg_q_offset ((fifo_segment_t *) args.segment, 0);
@@ -1547,6 +1487,31 @@ done:
clib_socket_sendmsg (cs, &msg, sizeof (msg), fds, n_fds);
}
+/* This is a workaround for the case when session layer starts reading
+ * the socket before the client actualy sends the data
+ */
+static clib_error_t *
+sapi_socket_receive_wait (clib_socket_t *cs, u8 *msg, u32 msg_len)
+{
+ clib_error_t *err;
+ int n_tries = 5;
+
+ while (1)
+ {
+ err = clib_socket_recvmsg (cs, msg, msg_len, 0, 0);
+ if (!err)
+ break;
+
+ if (!n_tries)
+ return err;
+
+ n_tries--;
+ usleep (1);
+ }
+
+ return err;
+}
+
static void
sapi_add_del_cert_key_handler (app_namespace_t *app_ns, clib_socket_t *cs,
app_sapi_cert_key_add_del_msg_t *mp)
@@ -1570,11 +1535,11 @@ sapi_add_del_cert_key_handler (app_namespace_t *app_ns, clib_socket_t *cs,
}
vec_validate (certkey, mp->certkey_len - 1);
- err = clib_socket_recvmsg (cs, certkey, mp->certkey_len, 0, 0);
+
+ err = sapi_socket_receive_wait (cs, certkey, mp->certkey_len);
if (err)
{
clib_error_report (err);
- clib_error_free (err);
rv = SESSION_E_INVALID;
goto send_reply;
}
@@ -1635,7 +1600,9 @@ sapi_socket_detach (app_namespace_t * app_ns, clib_socket_t * cs)
/* Cleanup everything because app worker closed socket or crashed */
handle = (app_ns_api_handle_t *) & cs->private_data;
- app_wrk = app_worker_get (handle->aah_app_wrk_index);
+ app_wrk = app_worker_get_if_valid (handle->aah_app_wrk_index);
+ if (!app_wrk)
+ return;
vnet_app_worker_add_del_args_t args = {
.app_index = app_wrk->app_index,
@@ -1801,27 +1768,10 @@ appns_sapi_add_ns_socket (app_namespace_t * app_ns)
clib_socket_t *cs;
char dir[4096];
- if (app_ns->netns)
- {
- if (!app_ns->sock_name)
- app_ns->sock_name = format (0, "@vpp/session/%v%c", app_ns->ns_id, 0);
- if (app_ns->sock_name[0] != '@')
- return VNET_API_ERROR_INVALID_VALUE;
- }
- else
- {
- snprintf (dir, sizeof (dir), "%s%s", vlib_unix_get_runtime_dir (),
- subdir);
- err = vlib_unix_recursive_mkdir ((char *) dir);
- if (err)
- {
- clib_error_report (err);
- return VNET_API_ERROR_SYSCALL_ERROR_1;
- }
+ snprintf (dir, sizeof (dir), "%s%s", vlib_unix_get_runtime_dir (), subdir);
- if (!app_ns->sock_name)
- app_ns->sock_name = format (0, "%s%v%c", dir, app_ns->ns_id, 0);
- }
+ if (!app_ns->sock_name)
+ app_ns->sock_name = format (0, "%s%v%c", dir, app_ns->ns_id, 0);
/*
* Create and initialize socket to listen on
@@ -1832,13 +1782,24 @@ appns_sapi_add_ns_socket (app_namespace_t * app_ns)
CLIB_SOCKET_F_ALLOW_GROUP_WRITE |
CLIB_SOCKET_F_SEQPACKET | CLIB_SOCKET_F_PASSCRED;
- if ((err = clib_socket_init_netns (cs, app_ns->netns)))
+ if (clib_socket_prefix_get_type (cs->config) == CLIB_SOCKET_TYPE_UNIX)
+ {
+ err = vlib_unix_recursive_mkdir ((char *) dir);
+ if (err)
+ {
+ clib_error_report (err);
+ return SESSION_E_SYSCALL;
+ }
+ }
+
+ if ((err = clib_socket_init (cs)))
{
clib_error_report (err);
return -1;
}
- if (!app_ns->netns && stat ((char *) app_ns->sock_name, &file_stat) == -1)
+ if (clib_socket_prefix_get_type (cs->config) == CLIB_SOCKET_TYPE_UNIX &&
+ stat ((char *) app_ns->sock_name, &file_stat) == -1)
return -1;
/*
@@ -1860,19 +1821,6 @@ appns_sapi_add_ns_socket (app_namespace_t * app_ns)
return 0;
}
-static void
-vl_api_application_tls_cert_add_t_handler (
- vl_api_application_tls_cert_add_t *mp)
-{
- /* deprecated */
-}
-
-static void
-vl_api_application_tls_key_add_t_handler (vl_api_application_tls_key_add_t *mp)
-{
- /* deprecated */
-}
-
#include <vnet/session/session.api.c>
static clib_error_t *
session_api_hookup (vlib_main_t *vm)
diff --git a/src/vnet/session/session_cli.c b/src/vnet/session/session_cli.c
index 24d8cfb1e24..569a77bccc1 100644
--- a/src/vnet/session/session_cli.c
+++ b/src/vnet/session/session_cli.c
@@ -145,8 +145,11 @@ format_session (u8 * s, va_list * args)
else if (ss->session_state == SESSION_STATE_CONNECTING)
{
if (ss->flags & SESSION_F_HALF_OPEN)
- s = format (s, "%U%v", format_transport_half_open_connection, tp,
- ss->connection_index, ss->thread_index, verbose, str);
+ {
+ s = format (s, "%U", format_transport_half_open_connection, tp,
+ ss->connection_index, ss->thread_index, verbose);
+ s = format (s, "%v", str);
+ }
else
s = format (s, "%U", format_transport_connection, tp,
ss->connection_index, ss->thread_index, verbose);
@@ -259,7 +262,6 @@ unformat_session (unformat_input_t * input, va_list * args)
if (s)
{
*result = s;
- session_pool_remove_peeker (s->thread_index);
return 1;
}
return 0;
@@ -340,7 +342,6 @@ session_cli_show_all_sessions (vlib_main_t * vm, int verbose)
n_closed = 0;
- /* *INDENT-OFF* */
pool_foreach (s, pool) {
if (s->session_state >= SESSION_STATE_TRANSPORT_DELETED)
{
@@ -349,7 +350,6 @@ session_cli_show_all_sessions (vlib_main_t * vm, int verbose)
}
vlib_cli_output (vm, "%U", format_session, s, verbose);
}
- /* *INDENT-ON* */
if (!n_closed)
vlib_cli_output (vm, "Thread %d: active sessions %u", thread_index,
@@ -488,7 +488,6 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
{
u8 one_session = 0, do_listeners = 0, sst, do_elog = 0, do_filter = 0;
u32 track_index, thread_index = 0, start = 0, end = ~0, session_index;
- unformat_input_t _line_input, *line_input = &_line_input;
transport_proto_t transport_proto = TRANSPORT_PROTO_INVALID;
session_state_t state = SESSION_N_STATES, *states = 0;
session_main_t *smm = &session_main;
@@ -502,26 +501,20 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
session_cli_return_if_not_enabled ();
- if (!unformat_user (input, unformat_line_input, line_input))
- {
- session_cli_show_all_sessions (vm, 0);
- return 0;
- }
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "verbose %d", &verbose))
+ if (unformat (input, "verbose %d", &verbose))
;
- else if (unformat (line_input, "verbose"))
+ else if (unformat (input, "verbose"))
verbose = 1;
- else if (unformat (line_input, "listeners %U", unformat_transport_proto,
+ else if (unformat (input, "listeners %U", unformat_transport_proto,
&transport_proto))
do_listeners = 1;
- else if (unformat (line_input, "%U", unformat_session, &s))
+ else if (unformat (input, "%U", unformat_session, &s))
{
one_session = 1;
}
- else if (unformat (line_input, "thread %u index %u", &thread_index,
+ else if (unformat (input, "thread %u index %u", &thread_index,
&session_index))
{
s = session_get_if_valid (session_index, thread_index);
@@ -532,19 +525,17 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
}
one_session = 1;
}
- else if (unformat (line_input, "thread %u", &thread_index))
+ else if (unformat (input, "thread %u", &thread_index))
{
do_filter = 1;
}
- else
- if (unformat (line_input, "state %U", unformat_session_state, &state))
+ else if (unformat (input, "state %U", unformat_session_state, &state))
{
vec_add1 (states, state);
do_filter = 1;
}
- else if (unformat (line_input, "proto %U index %u",
- unformat_transport_proto, &transport_proto,
- &transport_index))
+ else if (unformat (input, "proto %U index %u", unformat_transport_proto,
+ &transport_proto, &transport_index))
{
transport_connection_t *tc;
tc = transport_get_connection (transport_proto, transport_index,
@@ -565,34 +556,34 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
}
one_session = 1;
}
- else if (unformat (line_input, "proto %U", unformat_transport_proto,
+ else if (unformat (input, "proto %U", unformat_transport_proto,
&transport_proto))
do_filter = 1;
- else if (unformat (line_input, "range %u %u", &start, &end))
+ else if (unformat (input, "range %u %u", &start, &end))
do_filter = 1;
- else if (unformat (line_input, "range %u", &start))
+ else if (unformat (input, "range %u", &start))
{
end = start + 50;
do_filter = 1;
}
- else if (unformat (line_input, "elog"))
+ else if (unformat (input, "elog"))
do_elog = 1;
- else if (unformat (line_input, "protos"))
+ else if (unformat (input, "protos"))
{
vlib_cli_output (vm, "%U", format_transport_protos);
goto done;
}
- else if (unformat (line_input, "states"))
+ else if (unformat (input, "states"))
{
session_cli_print_session_states (vm);
goto done;
}
- else if (unformat (line_input, "events"))
+ else if (unformat (input, "events"))
do_events = 1;
else
{
error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
+ format_unformat_error, input);
goto done;
}
}
@@ -625,7 +616,6 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "%-" SESSION_CLI_ID_LEN "s%-24s", "Listener",
"App");
- /* *INDENT-OFF* */
pool_foreach (s, smm->wrk[0].sessions) {
if (s->session_state != SESSION_STATE_LISTENING
|| s->session_type != sst)
@@ -635,7 +625,6 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "%U%-25v%", format_session, s, 0,
app_name);
}
- /* *INDENT-ON* */
goto done;
}
@@ -661,12 +650,10 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
session_cli_show_all_sessions (vm, verbose);
done:
- unformat_free (line_input);
vec_free (states);
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_show_session_command) =
{
.path = "show session",
@@ -676,7 +663,6 @@ VLIB_CLI_COMMAND (vlib_cli_show_session_command) =
"[protos] [states] ",
.function = show_session_command_fn,
};
-/* *INDENT-ON* */
static int
clear_session (session_t * s)
@@ -728,27 +714,23 @@ clear_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (clear_all)
{
- /* *INDENT-OFF* */
vec_foreach (wrk, smm->wrk)
{
pool_foreach (session, wrk->sessions) {
clear_session (session);
}
};
- /* *INDENT-ON* */
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_session_command, static) =
{
.path = "clear session",
.short_help = "clear session thread <thread> session <index>",
.function = clear_session_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_session_fifo_trace_command_fn (vlib_main_t * vm,
@@ -791,14 +773,12 @@ show_session_fifo_trace_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_session_fifo_trace_command, static) =
{
.path = "show session fifo trace",
.short_help = "show session fifo trace <session>",
.function = show_session_fifo_trace_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
session_replay_fifo_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -838,53 +818,98 @@ session_replay_fifo_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (session_replay_fifo_trace_command, static) =
{
.path = "session replay fifo",
.short_help = "session replay fifo <session>",
.function = session_replay_fifo_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
session_enable_disable_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
- unformat_input_t _line_input, *line_input = &_line_input;
- u8 is_en = 1;
- clib_error_t *error;
-
- if (!unformat_user (input, unformat_line_input, line_input))
- return clib_error_return (0, "expected enable | disable");
+ u8 is_en = 2;
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "enable"))
+ if (unformat (input, "enable"))
is_en = 1;
- else if (unformat (line_input, "disable"))
+ else if (unformat (input, "disable"))
is_en = 0;
else
- {
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
- unformat_free (line_input);
- return error;
- }
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
}
- unformat_free (line_input);
+ if (is_en > 1)
+ return clib_error_return (0, "expected enable | disable");
+
return vnet_session_enable_disable (vm, is_en);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (session_enable_disable_command, static) =
{
.path = "session",
.short_help = "session [enable|disable]",
.function = session_enable_disable_fn,
};
-/* *INDENT-ON* */
+
+static clib_error_t *
+show_session_stats_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ session_main_t *smm = &session_main;
+ session_worker_t *wrk;
+ unsigned int *e;
+
+ if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ return clib_error_return (0, "unknown input `%U'", format_unformat_error,
+ input);
+
+ vec_foreach (wrk, smm->wrk)
+ {
+ vlib_cli_output (vm, "Thread %u:\n", wrk - smm->wrk);
+ e = wrk->stats.errors;
+#define _(name, str) \
+ if (e[SESSION_EP_##name]) \
+ vlib_cli_output (vm, " %lu %s", e[SESSION_EP_##name], str);
+ foreach_session_error
+#undef _
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_session_stats_command, static) = {
+ .path = "show session stats",
+ .short_help = "show session stats",
+ .function = show_session_stats_fn,
+};
+
+static clib_error_t *
+clear_session_stats_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ session_main_t *smm = &session_main;
+ session_worker_t *wrk;
+
+ if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ return clib_error_return (0, "unknown input `%U'", format_unformat_error,
+ input);
+
+ vec_foreach (wrk, smm->wrk)
+ {
+ clib_memset (&wrk->stats, 0, sizeof (wrk->stats));
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (clear_session_stats_command, static) = {
+ .path = "clear session stats",
+ .short_help = "clear session stats",
+ .function = clear_session_stats_fn,
+};
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/session/session_debug.c b/src/vnet/session/session_debug.c
index 349d1ec9b46..2a50adac5dd 100644
--- a/src/vnet/session/session_debug.c
+++ b/src/vnet/session/session_debug.c
@@ -52,15 +52,20 @@ show_session_dbg_clock_cycles_fn (vlib_main_t * vm, unformat_input_t * input,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_session_dbg_clock_cycles_command, static) =
{
.path = "show session dbg clock_cycles",
.short_help = "show session dbg clock_cycles",
.function = show_session_dbg_clock_cycles_fn,
};
-/* *INDENT-ON* */
+static_always_inline f64
+session_dbg_time_now (u32 thread)
+{
+ vlib_main_t *vm = vlib_get_main_by_index (thread);
+
+ return clib_time_now (&vm->clib_time) + vm->time_offset;
+}
static clib_error_t *
clear_session_dbg_clock_cycles_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -77,7 +82,7 @@ clear_session_dbg_clock_cycles_fn (vlib_main_t * vm, unformat_input_t * input,
{
sde = &session_dbg_main.wrk[thread];
clib_memset (sde, 0, sizeof (session_dbg_evts_t));
- sde->last_time = vlib_time_now (vlib_mains[thread]);
+ sde->last_time = session_dbg_time_now (thread);
sde->start_time = sde->last_time;
}
@@ -85,14 +90,12 @@ clear_session_dbg_clock_cycles_fn (vlib_main_t * vm, unformat_input_t * input,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_session_clock_cycles_command, static) =
{
.path = "clear session dbg clock_cycles",
.short_help = "clear session dbg clock_cycles",
.function = clear_session_dbg_clock_cycles_fn,
};
-/* *INDENT-ON* */
void
session_debug_init (void)
@@ -107,15 +110,99 @@ session_debug_init (void)
for (thread = 0; thread < num_threads; thread++)
{
clib_memset (&sdm->wrk[thread], 0, sizeof (session_dbg_evts_t));
- sdm->wrk[thread].start_time = vlib_time_now (vlib_mains[thread]);
+ sdm->wrk[thread].start_time = session_dbg_time_now (thread);
+ }
+}
+
+static const char *session_evt_grp_str[] = {
+#define _(sym, str) str,
+ foreach_session_evt_grp
+#undef _
+};
+
+static void
+session_debug_show_groups (vlib_main_t *vm)
+{
+ session_dbg_main_t *sdm = &session_dbg_main;
+ int i = 0;
+
+ vlib_cli_output (vm, "%-10s%-30s%-10s", "Index", "Group", "Level");
+
+ for (i = 0; i < SESSION_EVT_N_GRP; i++)
+ vlib_cli_output (vm, "%-10d%-30s%-10d", i, session_evt_grp_str[i],
+ sdm->grp_dbg_lvl[i]);
+}
+
+static clib_error_t *
+session_debug_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ session_dbg_main_t *sdm = &session_dbg_main;
+ u32 group, level = ~0;
+ clib_error_t *error = 0;
+ u8 is_show = 0;
+ uword *bitmap = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "show"))
+ is_show = 1;
+ else if (unformat (input, "group %U", unformat_bitmap_list, &bitmap))
+ ;
+ else if (unformat (input, "level %d", &level))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ if (is_show)
+ {
+ session_debug_show_groups (vm);
+ goto done;
+ }
+ if (level == ~0)
+ {
+ vlib_cli_output (vm, "level must be entered");
+ goto done;
+ }
+
+ group = clib_bitmap_last_set (bitmap);
+ if (group == ~0)
+ {
+ vlib_cli_output (vm, "group must be entered");
+ goto done;
+ }
+ if (group >= SESSION_EVT_N_GRP)
+ {
+ vlib_cli_output (vm, "group out of bounds");
+ goto done;
}
+ clib_bitmap_foreach (group, bitmap)
+ sdm->grp_dbg_lvl[group] = level;
+
+done:
+
+ clib_bitmap_free (bitmap);
+ return error;
}
+
+VLIB_CLI_COMMAND (session_debug_command, static) = {
+ .path = "session debug",
+ .short_help = "session debug {show | debug group <list> level <n>}",
+ .function = session_debug_fn,
+ .is_mp_safe = 1,
+};
+
#else
void
session_debug_init (void)
{
}
-#endif
+#endif /* SESSION_DEBUG */
void
dump_thread_0_event_queue (void)
@@ -144,6 +231,8 @@ dump_thread_0_event_queue (void)
{
case SESSION_IO_EVT_TX:
s0 = session_get_if_valid (e->session_index, my_thread_index);
+ if (!s0)
+ break;
fformat (stdout, "[%04d] TX session %d\n", i, s0->session_index);
break;
@@ -155,6 +244,8 @@ dump_thread_0_event_queue (void)
case SESSION_IO_EVT_BUILTIN_RX:
s0 = session_get_if_valid (e->session_index, my_thread_index);
+ if (!s0)
+ break;
fformat (stdout, "[%04d] builtin_rx %d\n", i, s0->session_index);
break;
@@ -180,28 +271,18 @@ dump_thread_0_event_queue (void)
static u8
session_node_cmp_event (session_event_t * e, svm_fifo_t * f)
{
- session_t *s;
switch (e->event_type)
{
case SESSION_IO_EVT_RX:
case SESSION_IO_EVT_TX:
case SESSION_IO_EVT_BUILTIN_RX:
- case SESSION_IO_EVT_BUILTIN_TX:
+ case SESSION_IO_EVT_TX_MAIN:
case SESSION_IO_EVT_TX_FLUSH:
if (e->session_index == f->shr->master_session_index)
return 1;
break;
case SESSION_CTRL_EVT_CLOSE:
- break;
case SESSION_CTRL_EVT_RPC:
- s = session_get_from_handle (e->session_handle);
- if (!s)
- {
- clib_warning ("session has event but doesn't exist!");
- break;
- }
- if (s->rx_fifo == f || s->tx_fifo == f)
- return 1;
break;
default:
break;
@@ -217,7 +298,6 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e)
session_worker_t *wrk;
int i, index, found = 0;
svm_msg_q_msg_t *msg;
- svm_msg_q_ring_t *ring;
svm_msg_q_t *mq;
u8 thread_index;
@@ -234,8 +314,7 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e)
for (i = 0; i < sq->cursize; i++)
{
msg = (svm_msg_q_msg_t *) (&sq->data[0] + sq->elsize * index);
- ring = svm_msg_q_ring (mq, msg->ring_index);
- clib_memcpy_fast (e, svm_msg_q_msg_data (mq, msg), ring->elsize);
+ clib_memcpy_fast (e, svm_msg_q_msg_data (mq, msg), sizeof (*e));
found = session_node_cmp_event (e, f);
if (found)
return 1;
@@ -245,7 +324,6 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e)
* Search pending events vector
*/
- /* *INDENT-OFF* */
clib_llist_foreach (wrk->event_elts, evt_list,
pool_elt_at_index (wrk->event_elts, wrk->new_head),
elt, ({
@@ -256,9 +334,7 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e)
goto done;
}
}));
- /* *INDENT-ON* */
- /* *INDENT-OFF* */
clib_llist_foreach (wrk->event_elts, evt_list,
pool_elt_at_index (wrk->event_elts, wrk->old_head),
elt, ({
@@ -269,7 +345,6 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e)
goto done;
}
}));
- /* *INDENT-ON* */
done:
return found;
diff --git a/src/vnet/session/session_debug.h b/src/vnet/session/session_debug.h
index 9e49a35dbe6..d433ef47fb1 100644
--- a/src/vnet/session/session_debug.h
+++ b/src/vnet/session/session_debug.h
@@ -17,49 +17,81 @@
#include <vnet/session/transport.h>
#include <vlib/vlib.h>
-
-#define foreach_session_dbg_evt \
- _(ENQ, "enqueue") \
- _(DEQ, "dequeue") \
- _(DEQ_NODE, "dequeue") \
- _(POLL_GAP_TRACK, "poll gap track") \
- _(POLL_DISPATCH_TIME, "dispatch time") \
- _(DISPATCH_START, "dispatch start") \
- _(DISPATCH_END, "dispatch end") \
- _(FREE, "session free") \
- _(DSP_CNTRS, "dispatch counters") \
- _(IO_EVT_COUNTS, "io evt counts") \
- _(EVT_COUNTS, "ctrl evt counts") \
+#include <vpp/vnet/config.h>
+
+#define foreach_session_dbg_evt \
+ _ (ENQ, DEQ_EVTS, 1, "enqueue") \
+ _ (DEQ, DEQ_EVTS, 1, "dequeue") \
+ _ (DEQ_NODE, DISPATCH_DBG, 1, "dequeue") \
+ _ (POLL_GAP_TRACK, EVT_POLL_DBG, 1, "poll gap track") \
+ _ (POLL_DISPATCH_TIME, EVT_POLL_DBG, 1, "dispatch time") \
+ _ (DISPATCH_START, CLOCKS_EVT_DBG, 1, "dispatch start") \
+ _ (DISPATCH_END, CLOCKS_EVT_DBG, 1, "dispatch end") \
+ _ (DSP_CNTRS, CLOCKS_EVT_DBG, 1, "dispatch counters") \
+ _ (STATE_CHANGE, SM, 1, "session state change") \
+ _ (FREE, SM, 1, "session free") \
+ _ (IO_EVT_COUNTS, COUNTS_EVT_DBG, 1, "io evt counts") \
+ _ (COUNTS, COUNTS_EVT_DBG, 1, "ctrl evt counts")
typedef enum _session_evt_dbg
{
-#define _(sym, str) SESSION_EVT_##sym,
+#define _(sym, grp, lvl, str) SESSION_EVT_##sym,
foreach_session_dbg_evt
#undef _
} session_evt_dbg_e;
-#define foreach_session_events \
-_(CLK_UPDATE_TIME, 1, 1, "Time Update Time") \
-_(CLK_MQ_DEQ, 1, 1, "Time MQ Dequeue") \
-_(CLK_CTRL_EVTS, 1, 1, "Time Ctrl Events") \
-_(CLK_NEW_IO_EVTS, 1, 1, "Time New IO Events") \
-_(CLK_OLD_IO_EVTS, 1, 1, "Time Old IO Events") \
-_(CLK_TOTAL, 1, 1, "Time Total in Node") \
-_(CLK_START, 1, 1, "Time Since Last Reset") \
- \
-_(CNT_MQ_EVTS, 1, 0, "# of MQ Events Processed" ) \
-_(CNT_CTRL_EVTS, 1, 0, "# of Ctrl Events Processed" ) \
-_(CNT_NEW_EVTS, 1, 0, "# of New Events Processed" ) \
-_(CNT_OLD_EVTS, 1, 0, "# of Old Events Processed" ) \
-_(CNT_IO_EVTS, 1, 0, "# of Events Processed" ) \
-_(CNT_NODE_CALL, 1, 0, "# of Node Calls") \
- \
-_(BASE_OFFSET_IO_EVTS, 0, 0, "NULL") \
-_(SESSION_IO_EVT_RX, 1, 0, "# of IO Event RX") \
-_(SESSION_IO_EVT_TX, 1, 0, "# of IO Event TX") \
-_(SESSION_IO_EVT_TX_FLUSH, 1, 0, "# of IO Event TX Flush") \
-_(SESSION_IO_EVT_BUILTIN_RX, 1, 0, "# of IO Event BuiltIn RX") \
-_(SESSION_IO_EVT_BUILTIN_TX, 1, 0, "# of IO Event BuiltIn TX") \
+typedef enum session_evt_lvl_
+{
+#define _(sym, grp, lvl, str) SESSION_EVT_##sym##_LVL = lvl,
+ foreach_session_dbg_evt
+#undef _
+} session_evt_lvl_e;
+
+#define foreach_session_evt_grp \
+ _ (DEQ_EVTS, "dequeue/enqueue events") \
+ _ (DISPATCH_DBG, "dispatch") \
+ _ (EVT_POLL_DBG, "event poll") \
+ _ (SM, "state machine") \
+ _ (CLOCKS_EVT_DBG, "clocks events") \
+ _ (COUNTS_EVT_DBG, "counts events")
+
+typedef enum session_evt_grp_
+{
+#define _(sym, str) SESSION_EVT_GRP_##sym,
+ foreach_session_evt_grp
+#undef _
+ SESSION_EVT_N_GRP
+} session_evt_grp_e;
+
+typedef enum session_evt_to_grp_
+{
+#define _(sym, grp, lvl, str) SESSION_EVT_##sym##_GRP = SESSION_EVT_GRP_##grp,
+ foreach_session_dbg_evt
+#undef _
+} session_evt_to_grp_e;
+
+#define foreach_session_events \
+ _ (CLK_UPDATE_TIME, 1, 1, "Time Update Time") \
+ _ (CLK_MQ_DEQ, 1, 1, "Time MQ Dequeue") \
+ _ (CLK_CTRL_EVTS, 1, 1, "Time Ctrl Events") \
+ _ (CLK_NEW_IO_EVTS, 1, 1, "Time New IO Events") \
+ _ (CLK_OLD_IO_EVTS, 1, 1, "Time Old IO Events") \
+ _ (CLK_TOTAL, 1, 1, "Time Total in Node") \
+ _ (CLK_START, 1, 1, "Time Since Last Reset") \
+ \
+ _ (CNT_MQ_EVTS, 1, 0, "# of MQ Events Processed") \
+ _ (CNT_CTRL_EVTS, 1, 0, "# of Ctrl Events Processed") \
+ _ (CNT_NEW_EVTS, 1, 0, "# of New Events Processed") \
+ _ (CNT_OLD_EVTS, 1, 0, "# of Old Events Processed") \
+ _ (CNT_IO_EVTS, 1, 0, "# of Events Processed") \
+ _ (CNT_NODE_CALL, 1, 0, "# of Node Calls") \
+ \
+ _ (BASE_OFFSET_IO_EVTS, 0, 0, "NULL") \
+ _ (SESSION_IO_EVT_RX, 1, 0, "# of IO Event RX") \
+ _ (SESSION_IO_EVT_TX, 1, 0, "# of IO Event TX") \
+ _ (SESSION_IO_EVT_TX_FLUSH, 1, 0, "# of IO Event TX Flush") \
+ _ (SESSION_IO_EVT_BUILTIN_RX, 1, 0, "# of IO Event BuiltIn RX") \
+ _ (SESSION_IO_EVT_TX_MAIN, 1, 0, "# of IO Event TX Main")
typedef enum
{
@@ -90,17 +122,28 @@ typedef struct session_dbg_evts_t
typedef struct session_dbg_main_
{
session_dbg_evts_t *wrk;
+ u8 grp_dbg_lvl[SESSION_EVT_N_GRP];
} session_dbg_main_t;
extern session_dbg_main_t session_dbg_main;
-#define SESSION_DEBUG 0 * (TRANSPORT_DEBUG > 0)
-#define SESSION_DEQ_EVTS (0)
-#define SESSION_DISPATCH_DBG (0)
-#define SESSION_EVT_POLL_DBG (0)
-#define SESSION_SM (0)
+#if defined VPP_SESSION_DEBUG && (TRANSPORT_DEBUG > 0)
+#define SESSION_DEBUG (1)
+#define SESSION_DEQ_EVTS (1)
+#define SESSION_DISPATCH_DBG (1)
+#define SESSION_EVT_POLL_DBG (1)
+#define SESSION_SM (1)
+#define SESSION_CLOCKS_EVT_DBG (1)
+#define SESSION_COUNTS_EVT_DBG (1)
+#else
+#define SESSION_DEBUG (0)
+#define SESSION_DEQ_EVTS (0)
+#define SESSION_DISPATCH_DBG (0)
+#define SESSION_EVT_POLL_DBG (0)
+#define SESSION_SM (0)
#define SESSION_CLOCKS_EVT_DBG (0)
#define SESSION_COUNTS_EVT_DBG (0)
+#endif
#if SESSION_DEBUG
@@ -123,17 +166,43 @@ extern session_dbg_main_t session_dbg_main;
ed = ELOG_DATA (&vlib_global_main.elog_main, _e)
#if SESSION_SM
-#define SESSION_EVT_FREE_HANDLER(_s) \
-{ \
- ELOG_TYPE_DECLARE (_e) = \
- { \
- .format = "free: idx %u", \
- .format_args = "i4", \
- }; \
- DEC_SESSION_ETD(_s, _e, 1); \
- ed->data[0] = _s->session_index; \
-}
+#define SESSION_EVT_STATE_CHANGE_HANDLER(_s) \
+ { \
+ ELOG_TYPE_DECLARE (_e) = { \
+ .format = "%s: idx %u", \
+ .format_args = "t4i4", \
+ .n_enum_strings = 12, \
+ .enum_strings = { \
+ "created", \
+ "listening", \
+ "connecting", \
+ "accepting", \
+ "ready", \
+ "opened", \
+ "transport closing", \
+ "closing", \
+ "app closed", \
+ "transport closed", \
+ "closed", \
+ "transport deleted", \
+ }, \
+ }; \
+ DEC_SESSION_ETD (_s, _e, 2); \
+ ed->data[0] = _s->session_state; \
+ ed->data[1] = _s->session_index; \
+ }
+
+#define SESSION_EVT_FREE_HANDLER(_s) \
+ { \
+ ELOG_TYPE_DECLARE (_e) = { \
+ .format = "free: idx %u", \
+ .format_args = "i4", \
+ }; \
+ DEC_SESSION_ED (_e, 1); \
+ ed->data[0] = _s->session_index; \
+ }
#else
+#define SESSION_EVT_STATE_CHANGE_HANDLER(_s)
#define SESSION_EVT_FREE_HANDLER(_s)
#endif
@@ -282,17 +351,17 @@ extern session_dbg_main_t session_dbg_main;
counters[SESS_Q_##_node_evt].u64 += _cnt; \
}
-#define SESSION_IO_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk) \
-{ \
- u8 type = SESS_Q_BASE_OFFSET_IO_EVTS + _node_evt + 1; \
- session_dbg_evts_t *sde; \
- sde = &session_dbg_main.wrk[_wrk->vm->thread_index]; \
- sde->counters[type].u64 += _cnt; \
- sde->counters[SESS_Q_CNT_IO_EVTS].u64 += _cnt ; \
-}
+#define SESSION_EVT_IO_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk) \
+ { \
+ u8 type = SESS_Q_BASE_OFFSET_IO_EVTS + _node_evt + 1; \
+ session_dbg_evts_t *sde; \
+ sde = &session_dbg_main.wrk[_wrk->vm->thread_index]; \
+ sde->counters[type].u64 += _cnt; \
+ sde->counters[SESS_Q_CNT_IO_EVTS].u64 += _cnt; \
+ }
#else
#define SESSION_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk)
-#define SESSION_IO_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk)
+#define SESSION_EVT_IO_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk)
#endif /*SESSION_COUNTS_EVT_DBG */
@@ -322,8 +391,18 @@ extern session_dbg_main_t session_dbg_main;
#define CONCAT_HELPER(_a, _b) _a##_b
#define CC(_a, _b) CONCAT_HELPER(_a, _b)
-#define SESSION_EVT(_evt, _args...) CC(_evt, _HANDLER)(_args)
-
+#define session_evt_lvl(_evt) CC (_evt, _LVL)
+#define session_evt_grp(_evt) CC (_evt, _GRP)
+#define session_evt_grp_dbg_lvl(_evt) \
+ session_dbg_main.grp_dbg_lvl[session_evt_grp (_evt)]
+#define SESSION_EVT(_evt, _args...) \
+ do \
+ { \
+ if (PREDICT_FALSE (session_evt_grp_dbg_lvl (_evt) >= \
+ session_evt_lvl (_evt))) \
+ CC (_evt, _HANDLER) (_args); \
+ } \
+ while (0)
#else
#define SESSION_EVT(_evt, _args...)
#define SESSION_DBG(_fmt, _args...)
diff --git a/src/vnet/session/session_input.c b/src/vnet/session/session_input.c
new file mode 100644
index 00000000000..73b777127fd
--- /dev/null
+++ b/src/vnet/session/session_input.c
@@ -0,0 +1,343 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/session/session.h>
+#include <vnet/session/application.h>
+
+static inline int
+mq_try_lock (svm_msg_q_t *mq)
+{
+ int rv, n_try = 0;
+
+ while (n_try < 100)
+ {
+ rv = svm_msg_q_try_lock (mq);
+ if (!rv)
+ return 0;
+ n_try += 1;
+ usleep (1);
+ }
+
+ return -1;
+}
+
+always_inline u8
+mq_event_ring_index (session_evt_type_t et)
+{
+ return (et >= SESSION_CTRL_EVT_RPC ? SESSION_MQ_CTRL_EVT_RING :
+ SESSION_MQ_IO_EVT_RING);
+}
+
+void
+app_worker_del_all_events (app_worker_t *app_wrk)
+{
+ session_worker_t *wrk;
+ session_event_t *evt;
+ u32 thread_index;
+ session_t *s;
+
+ for (thread_index = 0; thread_index < vec_len (app_wrk->wrk_evts);
+ thread_index++)
+ {
+ while (clib_fifo_elts (app_wrk->wrk_evts[thread_index]))
+ {
+ clib_fifo_sub2 (app_wrk->wrk_evts[thread_index], evt);
+ switch (evt->event_type)
+ {
+ case SESSION_CTRL_EVT_MIGRATED:
+ s = session_get (evt->session_index, thread_index);
+ transport_cleanup (session_get_transport_proto (s),
+ s->connection_index, s->thread_index);
+ session_free (s);
+ break;
+ case SESSION_CTRL_EVT_CLEANUP:
+ s = session_get (evt->as_u64[0] & 0xffffffff, thread_index);
+ if (evt->as_u64[0] >> 32 != SESSION_CLEANUP_SESSION)
+ break;
+ uword_to_pointer (evt->as_u64[1], void (*) (session_t * s)) (s);
+ break;
+ case SESSION_CTRL_EVT_HALF_CLEANUP:
+ s = ho_session_get (evt->session_index);
+ pool_put_index (app_wrk->half_open_table, s->ho_index);
+ session_free (s);
+ break;
+ default:
+ break;
+ }
+ }
+ wrk = session_main_get_worker (thread_index);
+ clib_bitmap_set (wrk->app_wrks_pending_ntf, app_wrk->wrk_index, 0);
+ }
+}
+
+always_inline int
+app_worker_flush_events_inline (app_worker_t *app_wrk, u32 thread_index,
+ u8 is_builtin)
+{
+ application_t *app = application_get (app_wrk->app_index);
+ svm_msg_q_t *mq = app_wrk->event_queue;
+ u8 ring_index, mq_is_cong;
+ session_state_t old_state;
+ session_event_t *evt;
+ u32 n_evts = 128, i;
+ session_t *s;
+ int rv;
+
+ n_evts = clib_min (n_evts, clib_fifo_elts (app_wrk->wrk_evts[thread_index]));
+
+ if (!is_builtin)
+ {
+ mq_is_cong = app_worker_mq_is_congested (app_wrk);
+ if (mq_try_lock (mq))
+ {
+ app_worker_set_mq_wrk_congested (app_wrk, thread_index);
+ return 0;
+ }
+ }
+
+ for (i = 0; i < n_evts; i++)
+ {
+ evt = clib_fifo_head (app_wrk->wrk_evts[thread_index]);
+ if (!is_builtin)
+ {
+ ring_index = mq_event_ring_index (evt->event_type);
+ if (svm_msg_q_or_ring_is_full (mq, ring_index))
+ {
+ app_worker_set_mq_wrk_congested (app_wrk, thread_index);
+ break;
+ }
+ }
+
+ switch (evt->event_type)
+ {
+ case SESSION_IO_EVT_RX:
+ s = session_get (evt->session_index, thread_index);
+ s->flags &= ~SESSION_F_RX_EVT;
+ /* Application didn't confirm accept yet */
+ if (PREDICT_FALSE (s->session_state == SESSION_STATE_ACCEPTING ||
+ s->session_state == SESSION_STATE_CONNECTING))
+ break;
+ app->cb_fns.builtin_app_rx_callback (s);
+ break;
+ /* Handle sessions that might not be on current thread */
+ case SESSION_IO_EVT_BUILTIN_RX:
+ s = session_get_from_handle_if_valid (evt->session_handle);
+ if (!s)
+ break;
+ s->flags &= ~SESSION_F_RX_EVT;
+ if (PREDICT_FALSE (s->session_state == SESSION_STATE_ACCEPTING ||
+ s->session_state == SESSION_STATE_CONNECTING))
+ break;
+ app->cb_fns.builtin_app_rx_callback (s);
+ break;
+ case SESSION_IO_EVT_TX:
+ s = session_get (evt->session_index, thread_index);
+ app->cb_fns.builtin_app_tx_callback (s);
+ break;
+ case SESSION_IO_EVT_TX_MAIN:
+ s = session_get_from_handle_if_valid (evt->session_handle);
+ if (!s)
+ break;
+ app->cb_fns.builtin_app_tx_callback (s);
+ break;
+ case SESSION_CTRL_EVT_BOUND:
+ /* No app cb function currently */
+ if (is_builtin)
+ break;
+ app->cb_fns.session_listened_callback (
+ app_wrk->wrk_index, evt->as_u64[1] >> 32, evt->session_handle,
+ evt->as_u64[1] & 0xffffffff);
+ break;
+ case SESSION_CTRL_EVT_ACCEPTED:
+ s = session_get (evt->session_index, thread_index);
+ old_state = s->session_state;
+ if (app->cb_fns.session_accept_callback (s))
+ {
+ session_detach_app (s);
+ break;
+ }
+ if (is_builtin)
+ {
+ if (old_state >= SESSION_STATE_TRANSPORT_CLOSING)
+ {
+ session_set_state (s,
+ clib_max (old_state, s->session_state));
+ if (!(s->flags & SESSION_F_APP_CLOSED))
+ app->cb_fns.session_disconnect_callback (s);
+ }
+ }
+ break;
+ case SESSION_CTRL_EVT_CONNECTED:
+ if (!(evt->as_u64[1] & 0xffffffff))
+ {
+ s = session_get (evt->session_index, thread_index);
+ old_state = s->session_state;
+ }
+ else
+ s = 0;
+ rv = app->cb_fns.session_connected_callback (
+ app_wrk->wrk_index, evt->as_u64[1] >> 32, s,
+ evt->as_u64[1] & 0xffffffff);
+ if (!s)
+ break;
+ if (rv)
+ {
+ session_detach_app (s);
+ break;
+ }
+ if (old_state >= SESSION_STATE_TRANSPORT_CLOSING)
+ {
+ session_set_state (s, clib_max (old_state, s->session_state));
+ if (!(s->flags & SESSION_F_APP_CLOSED))
+ app->cb_fns.session_disconnect_callback (s);
+ }
+ break;
+ case SESSION_CTRL_EVT_DISCONNECTED:
+ s = session_get (evt->session_index, thread_index);
+ if (!(s->flags & SESSION_F_APP_CLOSED))
+ app->cb_fns.session_disconnect_callback (s);
+ break;
+ case SESSION_CTRL_EVT_RESET:
+ s = session_get (evt->session_index, thread_index);
+ if (!(s->flags & SESSION_F_APP_CLOSED))
+ app->cb_fns.session_reset_callback (s);
+ break;
+ case SESSION_CTRL_EVT_UNLISTEN_REPLY:
+ if (is_builtin)
+ break;
+ app->cb_fns.session_unlistened_callback (
+ app_wrk->wrk_index, evt->session_handle, evt->as_u64[1] >> 32,
+ evt->as_u64[1] & 0xffffffff);
+ break;
+ case SESSION_CTRL_EVT_MIGRATED:
+ s = session_get (evt->session_index, thread_index);
+ app->cb_fns.session_migrate_callback (s, evt->as_u64[1]);
+ transport_cleanup (session_get_transport_proto (s),
+ s->connection_index, s->thread_index);
+ session_free (s);
+ /* Notify app that it has data on the new session */
+ s = session_get_from_handle (evt->as_u64[1]);
+ session_send_io_evt_to_thread (s->rx_fifo,
+ SESSION_IO_EVT_BUILTIN_RX);
+ break;
+ case SESSION_CTRL_EVT_TRANSPORT_CLOSED:
+ s = session_get (evt->session_index, thread_index);
+ /* Notification enqueued before session was refused by app */
+ if (PREDICT_FALSE (s->app_wrk_index == APP_INVALID_INDEX))
+ break;
+ if (app->cb_fns.session_transport_closed_callback)
+ app->cb_fns.session_transport_closed_callback (s);
+ break;
+ case SESSION_CTRL_EVT_CLEANUP:
+ s = session_get (evt->as_u64[0] & 0xffffffff, thread_index);
+ /* Notification enqueued before session was refused by app */
+ if (PREDICT_TRUE (s->app_wrk_index != APP_INVALID_INDEX))
+ {
+ if (app->cb_fns.session_cleanup_callback)
+ app->cb_fns.session_cleanup_callback (s, evt->as_u64[0] >> 32);
+ }
+ if (evt->as_u64[0] >> 32 != SESSION_CLEANUP_SESSION)
+ break;
+ uword_to_pointer (evt->as_u64[1], void (*) (session_t * s)) (s);
+ break;
+ case SESSION_CTRL_EVT_HALF_CLEANUP:
+ s = ho_session_get (evt->session_index);
+ ASSERT (session_vlib_thread_is_cl_thread ());
+ if (app->cb_fns.half_open_cleanup_callback)
+ app->cb_fns.half_open_cleanup_callback (s);
+ pool_put_index (app_wrk->half_open_table, s->ho_index);
+ session_free (s);
+ break;
+ case SESSION_CTRL_EVT_APP_ADD_SEGMENT:
+ app->cb_fns.add_segment_callback (app_wrk->wrk_index,
+ evt->as_u64[1]);
+ break;
+ case SESSION_CTRL_EVT_APP_DEL_SEGMENT:
+ app->cb_fns.del_segment_callback (app_wrk->wrk_index,
+ evt->as_u64[1]);
+ break;
+ default:
+ clib_warning ("unexpected event: %u", evt->event_type);
+ ASSERT (0);
+ break;
+ }
+ clib_fifo_advance_head (app_wrk->wrk_evts[thread_index], 1);
+ }
+
+ if (!is_builtin)
+ {
+ svm_msg_q_unlock (mq);
+ if (mq_is_cong && i == n_evts)
+ app_worker_unset_wrk_mq_congested (app_wrk, thread_index);
+ }
+
+ return 0;
+}
+
+int
+app_wrk_flush_wrk_events (app_worker_t *app_wrk, u32 thread_index)
+{
+ if (app_worker_application_is_builtin (app_wrk))
+ return app_worker_flush_events_inline (app_wrk, thread_index,
+ 1 /* is_builtin */);
+ else
+ return app_worker_flush_events_inline (app_wrk, thread_index,
+ 0 /* is_builtin */);
+}
+
+static inline int
+session_wrk_flush_events (session_worker_t *wrk)
+{
+ app_worker_t *app_wrk;
+ uword app_wrk_index;
+ u32 thread_index;
+
+ thread_index = wrk->vm->thread_index;
+ app_wrk_index = clib_bitmap_first_set (wrk->app_wrks_pending_ntf);
+
+ while (app_wrk_index != ~0)
+ {
+ app_wrk = app_worker_get_if_valid (app_wrk_index);
+ /* app_wrk events are flushed on free, so should be valid here */
+ ASSERT (app_wrk != 0);
+ app_wrk_flush_wrk_events (app_wrk, thread_index);
+
+ if (!clib_fifo_elts (app_wrk->wrk_evts[thread_index]))
+ clib_bitmap_set (wrk->app_wrks_pending_ntf, app_wrk->wrk_index, 0);
+
+ app_wrk_index =
+ clib_bitmap_next_set (wrk->app_wrks_pending_ntf, app_wrk_index + 1);
+ }
+
+ if (!clib_bitmap_is_zero (wrk->app_wrks_pending_ntf))
+ vlib_node_set_interrupt_pending (wrk->vm, session_input_node.index);
+
+ return 0;
+}
+
+VLIB_NODE_FN (session_input_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ u32 thread_index = vm->thread_index;
+ session_worker_t *wrk;
+
+ wrk = session_main_get_worker (thread_index);
+ session_wrk_flush_events (wrk);
+
+ return 0;
+}
+
+VLIB_REGISTER_NODE (session_input_node) = {
+ .name = "session-input",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_DISABLED,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */ \ No newline at end of file
diff --git a/src/vnet/session/session_lookup.c b/src/vnet/session/session_lookup.c
index 5cd1712f195..ff20bc2d835 100644
--- a/src/vnet/session/session_lookup.c
+++ b/src/vnet/session/session_lookup.c
@@ -29,13 +29,14 @@
#include <vnet/session/session.h>
#include <vnet/session/application.h>
+static session_lookup_main_t sl_main;
+
/**
* Network namespace index (i.e., fib index) to session lookup table. We
* should have one per network protocol type but for now we only support IP4/6
*/
static u32 *fib_index_to_table_index[2];
-/* *INDENT-OFF* */
/* 16 octets */
typedef CLIB_PACKED (struct {
union
@@ -72,7 +73,6 @@ typedef CLIB_PACKED (struct {
u64 as_u64[6];
};
}) v6_connection_key_t;
-/* *INDENT-ON* */
typedef clib_bihash_kv_16_8_t session_kv4_t;
typedef clib_bihash_kv_48_8_t session_kv6_t;
@@ -155,29 +155,70 @@ make_v6_ss_kv_from_tc (session_kv6_t * kv, transport_connection_t * tc)
tc->rmt_port, tc->proto);
}
+static inline u8
+session_table_alloc_needs_sync (void)
+{
+ return !vlib_thread_is_main_w_barrier () && (vlib_num_workers () > 1);
+}
+
+static_always_inline u8
+session_table_is_alloced (u8 fib_proto, u32 fib_index)
+{
+ return (vec_len (fib_index_to_table_index[fib_proto]) > fib_index &&
+ fib_index_to_table_index[fib_proto][fib_index] != ~0);
+}
+
static session_table_t *
session_table_get_or_alloc (u8 fib_proto, u32 fib_index)
{
session_table_t *st;
u32 table_index;
+
ASSERT (fib_index != ~0);
- if (vec_len (fib_index_to_table_index[fib_proto]) > fib_index &&
- fib_index_to_table_index[fib_proto][fib_index] != ~0)
+
+ if (session_table_is_alloced (fib_proto, fib_index))
{
table_index = fib_index_to_table_index[fib_proto][fib_index];
return session_table_get (table_index);
}
+
+ u8 needs_sync = session_table_alloc_needs_sync ();
+ session_lookup_main_t *slm = &sl_main;
+
+ /* Stop workers, otherwise consumers might be affected. This is
+ * acceptable because new tables should seldom be allocated */
+ if (needs_sync)
+ {
+ vlib_workers_sync ();
+
+ /* We might have a race, only one worker allowed at once */
+ clib_spinlock_lock (&slm->st_alloc_lock);
+ }
+
+ /* Another worker just allocated this table */
+ if (session_table_is_alloced (fib_proto, fib_index))
+ {
+ table_index = fib_index_to_table_index[fib_proto][fib_index];
+ st = session_table_get (table_index);
+ }
else
{
st = session_table_alloc ();
- table_index = session_table_index (st);
+ st->active_fib_proto = fib_proto;
+ session_table_init (st, fib_proto);
vec_validate_init_empty (fib_index_to_table_index[fib_proto], fib_index,
~0);
+ table_index = session_table_index (st);
fib_index_to_table_index[fib_proto][fib_index] = table_index;
- st->active_fib_proto = fib_proto;
- session_table_init (st, fib_proto);
- return st;
}
+
+ if (needs_sync)
+ {
+ clib_spinlock_unlock (&slm->st_alloc_lock);
+ vlib_workers_continue ();
+ }
+
+ return st;
}
static session_table_t *
@@ -1046,9 +1087,7 @@ session_lookup_connection4 (u32 fib_index, ip4_address_t * lcl,
/**
* Lookup session with ip4 and transport layer information
*
- * Important note: this may look into another thread's pool table and
- * register as 'peeker'. Caller should call @ref session_pool_remove_peeker as
- * if needed as soon as possible.
+ * Important note: this may look into another thread's pool table
*
* Lookup logic is similar to that of @ref session_lookup_connection_wt4 but
* this returns a session as opposed to a transport connection and it does not
@@ -1145,7 +1184,6 @@ session_lookup_connection_wt6 (u32 fib_index, ip6_address_t * lcl,
rv = clib_bihash_search_inline_48_8 (&st->v6_session_hash, &kv6);
if (rv == 0)
{
- ASSERT ((u32) (kv6.value >> 32) == thread_index);
if (PREDICT_FALSE ((u32) (kv6.value >> 32) != thread_index))
{
*result = SESSION_LOOKUP_RESULT_WRONG_THREAD;
@@ -1313,8 +1351,8 @@ session_lookup_connection (u32 fib_index, ip46_address_t * lcl,
lcl_port, rmt_port, proto);
}
-int
-vnet_session_rule_add_del (session_rule_add_del_args_t * args)
+session_error_t
+vnet_session_rule_add_del (session_rule_add_del_args_t *args)
{
app_namespace_t *app_ns = app_namespace_get (args->appns_index);
session_rules_table_t *srt;
@@ -1324,14 +1362,14 @@ vnet_session_rule_add_del (session_rule_add_del_args_t * args)
int rv = 0;
if (!app_ns)
- return VNET_API_ERROR_APP_INVALID_NS;
+ return SESSION_E_INVALID_NS;
if (args->scope > 3)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
if (args->transport_proto != TRANSPORT_PROTO_TCP
&& args->transport_proto != TRANSPORT_PROTO_UDP)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
if ((args->scope & SESSION_RULE_SCOPE_GLOBAL) || args->scope == 0)
{
@@ -1571,7 +1609,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (session_rule_command, static) =
{
.path = "session rule",
@@ -1579,7 +1616,6 @@ VLIB_CLI_COMMAND (session_rule_command, static) =
"<lcl-ip/plen> <lcl-port> <rmt-ip/plen> <rmt-port> action <action>",
.function = session_rule_command_fn,
};
-/* *INDENT-ON* */
void
session_lookup_dump_rules_table (u32 fib_index, u8 fib_proto,
@@ -1702,7 +1738,6 @@ show_session_rules_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_session_rules_command, static) =
{
.path = "show session rules",
@@ -1710,11 +1745,93 @@ VLIB_CLI_COMMAND (show_session_rules_command, static) =
"<lcl-port> <rmt-ip/plen> <rmt-port> scope <scope>]",
.function = show_session_rules_command_fn,
};
-/* *INDENT-ON* */
+
+u8 *
+format_session_lookup_tables (u8 *s, va_list *args)
+{
+ u32 fib_proto = va_arg (*args, u32);
+ u32 *fibs, num_fibs = 0, fib_index, indent;
+ session_table_t *st;
+ u64 total_mem = 0;
+
+ fibs = fib_index_to_table_index[fib_proto];
+
+ for (fib_index = 0; fib_index < vec_len (fibs); fib_index++)
+ {
+ if (fibs[fib_index] == ~0)
+ continue;
+
+ num_fibs += 1;
+ st = session_table_get (fibs[fib_index]);
+ total_mem += session_table_memory_size (st);
+ }
+
+ indent = format_get_indent (s);
+ s = format (s, "active fibs:\t%u\n", num_fibs);
+ s = format (s, "%Umax fib-index:\t%u\n", format_white_space, indent,
+ vec_len (fibs) - 1);
+ s = format (s, "%Utable memory:\t%U\n", format_white_space, indent,
+ format_memory_size, total_mem);
+ s = format (s, "%Uvec memory:\t%U\n", format_white_space, indent,
+ format_memory_size, vec_mem_size (fibs));
+
+ return s;
+}
+
+static clib_error_t *
+show_session_lookup_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ session_table_t *st;
+ u32 fib_index = ~0;
+
+ session_cli_return_if_not_enabled ();
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table %u", &fib_index))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (fib_index != ~0)
+ {
+ st = session_table_get_for_fib_index (FIB_PROTOCOL_IP4, fib_index);
+ if (st)
+ vlib_cli_output (vm, "%U", format_session_table, st);
+ else
+ vlib_cli_output (vm, "no ip4 table for fib-index %u", fib_index);
+ st = session_table_get_for_fib_index (FIB_PROTOCOL_IP6, fib_index);
+ if (st)
+ vlib_cli_output (vm, "%U", format_session_table, st);
+ else
+ vlib_cli_output (vm, "no ip6 table for fib-index %u", fib_index);
+ goto done;
+ }
+
+ vlib_cli_output (vm, "ip4 fib lookup tables:\n %U",
+ format_session_lookup_tables, FIB_PROTOCOL_IP4);
+ vlib_cli_output (vm, "ip6 fib lookup tables:\n %U",
+ format_session_lookup_tables, FIB_PROTOCOL_IP6);
+
+done:
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_session_lookup_command, static) = {
+ .path = "show session lookup",
+ .short_help = "show session lookup [table <fib-index>]",
+ .function = show_session_lookup_command_fn,
+};
void
session_lookup_init (void)
{
+ session_lookup_main_t *slm = &sl_main;
+
+ clib_spinlock_init (&slm->st_alloc_lock);
+
/*
* Allocate default table and map it to fib_index 0
*/
diff --git a/src/vnet/session/session_lookup.h b/src/vnet/session/session_lookup.h
index c1037dff8c9..f9ffc15165a 100644
--- a/src/vnet/session/session_lookup.h
+++ b/src/vnet/session/session_lookup.h
@@ -29,6 +29,11 @@ typedef enum session_lookup_result_
SESSION_LOOKUP_RESULT_FILTERED
} session_lookup_result_t;
+typedef struct session_lookup_main_
+{
+ clib_spinlock_t st_alloc_lock;
+} session_lookup_main_t;
+
session_t *session_lookup_safe4 (u32 fib_index, ip4_address_t * lcl,
ip4_address_t * rmt, u16 lcl_port,
u16 rmt_port, u8 proto);
@@ -130,7 +135,7 @@ typedef struct _session_rule_add_del_args
u8 transport_proto;
} session_rule_add_del_args_t;
-int vnet_session_rule_add_del (session_rule_add_del_args_t * args);
+session_error_t vnet_session_rule_add_del (session_rule_add_del_args_t *args);
void session_lookup_set_tables_appns (app_namespace_t * app_ns);
void session_lookup_init (void);
diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c
index fe4e7324218..0ec158fb429 100644
--- a/src/vnet/session/session_node.c
+++ b/src/vnet/session/session_node.c
@@ -26,12 +26,28 @@
#include <svm/queue.h>
#include <sys/timerfd.h>
-#define app_check_thread_and_barrier(_fn, _arg) \
- if (!vlib_thread_is_main_w_barrier ()) \
- { \
- vlib_rpc_call_main_thread (_fn, (u8 *) _arg, sizeof(*_arg)); \
- return; \
- }
+static inline void
+session_wrk_send_evt_to_main (session_worker_t *wrk, session_evt_elt_t *elt)
+{
+ session_evt_elt_t *he;
+ uword thread_index;
+ u8 is_empty;
+
+ thread_index = wrk->vm->thread_index;
+ he = clib_llist_elt (wrk->event_elts, wrk->evts_pending_main);
+ is_empty = clib_llist_is_empty (wrk->event_elts, evt_list, he);
+ clib_llist_add_tail (wrk->event_elts, evt_list, elt, he);
+ if (is_empty)
+ session_send_rpc_evt_to_thread (0, session_wrk_handle_evts_main_rpc,
+ uword_to_pointer (thread_index, void *));
+}
+
+#define app_check_thread_and_barrier(_wrk, _elt) \
+ if (!vlib_thread_is_main_w_barrier ()) \
+ { \
+ session_wrk_send_evt_to_main (wrk, elt); \
+ return; \
+ }
static void
session_wrk_timerfd_update (session_worker_t *wrk, u64 time_ns)
@@ -93,16 +109,17 @@ session_mq_free_ext_config (application_t *app, uword offset)
}
static void
-session_mq_listen_handler (void *data)
+session_mq_listen_handler (session_worker_t *wrk, session_evt_elt_t *elt)
{
- session_listen_msg_t *mp = (session_listen_msg_t *) data;
vnet_listen_args_t _a, *a = &_a;
+ session_listen_msg_t *mp;
app_worker_t *app_wrk;
application_t *app;
int rv;
- app_check_thread_and_barrier (session_mq_listen_handler, mp);
+ app_check_thread_and_barrier (wrk, elt);
+ mp = session_evt_ctrl_data (wrk, elt);
app = application_lookup (mp->client_index);
if (!app)
return;
@@ -122,26 +139,31 @@ session_mq_listen_handler (void *data)
a->sep_ext.ext_cfg = session_mq_get_ext_config (app, mp->ext_config);
if ((rv = vnet_listen (a)))
- clib_warning ("listen returned: %U", format_session_error, rv);
+ session_worker_stat_error_inc (wrk, rv, 1);
app_wrk = application_get_worker (app, mp->wrk_index);
- mq_send_session_bound_cb (app_wrk->wrk_index, mp->context, a->handle, rv);
+ app_worker_listened_notify (app_wrk, a->handle, mp->context, rv);
if (mp->ext_config)
session_mq_free_ext_config (app, mp->ext_config);
+
+ /* Make sure events are flushed before releasing barrier, to avoid
+ * potential race with accept. */
+ app_wrk_flush_wrk_events (app_wrk, 0);
}
static void
-session_mq_listen_uri_handler (void *data)
+session_mq_listen_uri_handler (session_worker_t *wrk, session_evt_elt_t *elt)
{
- session_listen_uri_msg_t *mp = (session_listen_uri_msg_t *) data;
vnet_listen_args_t _a, *a = &_a;
+ session_listen_uri_msg_t *mp;
app_worker_t *app_wrk;
application_t *app;
int rv;
- app_check_thread_and_barrier (session_mq_listen_uri_handler, mp);
+ app_check_thread_and_barrier (wrk, elt);
+ mp = session_evt_ctrl_data (wrk, elt);
app = application_lookup (mp->client_index);
if (!app)
return;
@@ -152,7 +174,8 @@ session_mq_listen_uri_handler (void *data)
rv = vnet_bind_uri (a);
app_wrk = application_get_worker (app, 0);
- mq_send_session_bound_cb (app_wrk->wrk_index, mp->context, a->handle, rv);
+ app_worker_listened_notify (app_wrk, a->handle, mp->context, rv);
+ app_wrk_flush_wrk_events (app_wrk, 0);
}
static void
@@ -160,6 +183,7 @@ session_mq_connect_one (session_connect_msg_t *mp)
{
vnet_connect_args_t _a, *a = &_a;
app_worker_t *app_wrk;
+ session_worker_t *wrk;
application_t *app;
int rv;
@@ -173,6 +197,7 @@ session_mq_connect_one (session_connect_msg_t *mp)
a->sep.port = mp->port;
a->sep.transport_proto = mp->proto;
a->sep.peer.fib_index = mp->vrf;
+ a->sep.dscp = mp->dscp;
clib_memcpy_fast (&a->sep.peer.ip, &mp->lcl_ip, sizeof (mp->lcl_ip));
if (mp->is_ip4)
{
@@ -192,9 +217,10 @@ session_mq_connect_one (session_connect_msg_t *mp)
if ((rv = vnet_connect (a)))
{
- clib_warning ("connect returned: %U", format_session_error, rv);
+ wrk = session_main_get_worker (vlib_get_thread_index ());
+ session_worker_stat_error_inc (wrk, rv, 1);
app_wrk = application_get_worker (app, mp->wrk_index);
- mq_send_session_connected_cb (app_wrk->wrk_index, mp->context, 0, rv);
+ app_worker_connect_notify (app_wrk, 0, rv, mp->context);
}
if (mp->ext_config)
@@ -205,23 +231,20 @@ static void
session_mq_handle_connects_rpc (void *arg)
{
u32 max_connects = 32, n_connects = 0;
- vlib_main_t *vm = vlib_get_main ();
session_evt_elt_t *he, *elt, *next;
- session_worker_t *fwrk, *wrk;
+ session_worker_t *fwrk;
- ASSERT (vlib_get_thread_index () == 0);
+ ASSERT (session_vlib_thread_is_cl_thread ());
/* Pending connects on linked list pertaining to first worker */
- fwrk = session_main_get_worker (1);
+ fwrk = session_main_get_worker (transport_cl_thread ());
if (!fwrk->n_pending_connects)
- goto update_state;
-
- vlib_worker_thread_barrier_sync (vm);
+ return;
he = clib_llist_elt (fwrk->event_elts, fwrk->pending_connects);
elt = clib_llist_next (fwrk->event_elts, evt_list, he);
- /* Avoid holding the barrier for too long */
+ /* Avoid holding the worker for too long */
while (n_connects < max_connects && elt != he)
{
next = clib_llist_next (fwrk->event_elts, evt_list, elt);
@@ -235,45 +258,10 @@ session_mq_handle_connects_rpc (void *arg)
/* Decrement with worker barrier */
fwrk->n_pending_connects -= n_connects;
-
- vlib_worker_thread_barrier_release (vm);
-
-update_state:
-
- /* Switch worker to poll mode if it was in interrupt mode and had work or
- * back to interrupt if threshold of loops without a connect is passed.
- * While in poll mode, reprogram connects rpc */
- wrk = session_main_get_worker (0);
- if (wrk->state != SESSION_WRK_POLLING)
- {
- if (n_connects)
- {
- session_wrk_set_state (wrk, SESSION_WRK_POLLING);
- vlib_node_set_state (vm, session_queue_node.index,
- VLIB_NODE_STATE_POLLING);
- wrk->no_connect_loops = 0;
- }
- }
- else
- {
- if (!n_connects)
- {
- if (++wrk->no_connect_loops > 1e5)
- {
- session_wrk_set_state (wrk, SESSION_WRK_INTERRUPT);
- vlib_node_set_state (vm, session_queue_node.index,
- VLIB_NODE_STATE_INTERRUPT);
- }
- }
- else
- wrk->no_connect_loops = 0;
- }
-
- if (wrk->state == SESSION_WRK_POLLING)
+ if (fwrk->n_pending_connects > 0)
{
- elt = session_evt_alloc_ctrl (wrk);
- elt->evt.event_type = SESSION_CTRL_EVT_RPC;
- elt->evt.rpc_args.fp = session_mq_handle_connects_rpc;
+ session_send_rpc_evt_to_thread_force (fwrk->vm->thread_index,
+ session_mq_handle_connects_rpc, 0);
}
}
@@ -283,20 +271,28 @@ session_mq_connect_handler (session_worker_t *wrk, session_evt_elt_t *elt)
u32 thread_index = wrk - session_main.wrk;
session_evt_elt_t *he;
- /* No workers, so just deal with the connect now */
- if (PREDICT_FALSE (!thread_index))
+ if (PREDICT_FALSE (thread_index > transport_cl_thread ()))
{
- session_mq_connect_one (session_evt_ctrl_data (wrk, elt));
+ clib_warning ("Connect on wrong thread. Dropping");
return;
}
- if (PREDICT_FALSE (thread_index != 1))
+ /* If on worker, check if main has any pending messages. Avoids reordering
+ * with other control messages that need to be handled by main
+ */
+ if (thread_index)
{
- clib_warning ("Connect on wrong thread. Dropping");
- return;
+ he = clib_llist_elt (wrk->event_elts, wrk->evts_pending_main);
+
+ /* Events pending on main, postpone to avoid reordering */
+ if (!clib_llist_is_empty (wrk->event_elts, evt_list, he))
+ {
+ clib_llist_add_tail (wrk->event_elts, evt_list, elt, he);
+ return;
+ }
}
- /* Add to pending list to be handled by main thread */
+ /* Add to pending list to be handled by first worker */
he = clib_llist_elt (wrk->event_elts, wrk->pending_connects);
clib_llist_add_tail (wrk->event_elts, evt_list, elt, he);
@@ -304,23 +300,23 @@ session_mq_connect_handler (session_worker_t *wrk, session_evt_elt_t *elt)
wrk->n_pending_connects += 1;
if (wrk->n_pending_connects == 1)
{
- vlib_node_set_interrupt_pending (vlib_get_main_by_index (0),
- session_queue_node.index);
- session_send_rpc_evt_to_thread (0, session_mq_handle_connects_rpc, 0);
+ session_send_rpc_evt_to_thread_force (thread_index,
+ session_mq_handle_connects_rpc, 0);
}
}
static void
-session_mq_connect_uri_handler (void *data)
+session_mq_connect_uri_handler (session_worker_t *wrk, session_evt_elt_t *elt)
{
- session_connect_uri_msg_t *mp = (session_connect_uri_msg_t *) data;
vnet_connect_args_t _a, *a = &_a;
+ session_connect_uri_msg_t *mp;
app_worker_t *app_wrk;
application_t *app;
int rv;
- app_check_thread_and_barrier (session_mq_connect_uri_handler, mp);
+ app_check_thread_and_barrier (wrk, elt);
+ mp = session_evt_ctrl_data (wrk, elt);
app = application_lookup (mp->client_index);
if (!app)
return;
@@ -331,9 +327,9 @@ session_mq_connect_uri_handler (void *data)
a->app_index = app->app_index;
if ((rv = vnet_connect_uri (a)))
{
- clib_warning ("connect_uri returned: %d", rv);
+ session_worker_stat_error_inc (wrk, rv, 1);
app_wrk = application_get_worker (app, 0 /* default wrk only */ );
- mq_send_session_connected_cb (app_wrk->wrk_index, mp->context, 0, rv);
+ app_worker_connect_notify (app_wrk, 0, rv, mp->context);
}
}
@@ -370,14 +366,15 @@ session_mq_disconnect_handler (void *data)
}
static void
-app_mq_detach_handler (void *data)
+app_mq_detach_handler (session_worker_t *wrk, session_evt_elt_t *elt)
{
- session_app_detach_msg_t *mp = (session_app_detach_msg_t *) data;
vnet_app_detach_args_t _a, *a = &_a;
+ session_app_detach_msg_t *mp;
application_t *app;
- app_check_thread_and_barrier (app_mq_detach_handler, mp);
+ app_check_thread_and_barrier (wrk, elt);
+ mp = session_evt_ctrl_data (wrk, elt);
app = application_lookup (mp->client_index);
if (!app)
return;
@@ -388,18 +385,19 @@ app_mq_detach_handler (void *data)
}
static void
-session_mq_unlisten_rpc (session_unlisten_msg_t *mp)
+session_mq_unlisten_handler (session_worker_t *wrk, session_evt_elt_t *elt)
{
- vlib_main_t *vm = vlib_get_main ();
vnet_unlisten_args_t _a, *a = &_a;
+ session_unlisten_msg_t *mp;
app_worker_t *app_wrk;
session_handle_t sh;
application_t *app;
- u32 context;
int rv;
+ app_check_thread_and_barrier (wrk, elt);
+
+ mp = session_evt_ctrl_data (wrk, elt);
sh = mp->handle;
- context = mp->context;
app = application_lookup (mp->client_index);
if (!app)
@@ -410,65 +408,34 @@ session_mq_unlisten_rpc (session_unlisten_msg_t *mp)
a->handle = sh;
a->wrk_map_index = mp->wrk_index;
- vlib_worker_thread_barrier_sync (vm);
-
if ((rv = vnet_unlisten (a)))
- clib_warning ("unlisten returned: %d", rv);
-
- vlib_worker_thread_barrier_release (vm);
+ session_worker_stat_error_inc (wrk, rv, 1);
app_wrk = application_get_worker (app, a->wrk_map_index);
if (!app_wrk)
return;
- mq_send_unlisten_reply (app_wrk, sh, context, rv);
- clib_mem_free (mp);
-}
-
-static void
-session_mq_unlisten_handler (session_worker_t *wrk, session_evt_elt_t *elt)
-{
- u32 thread_index = wrk - session_main.wrk;
- session_unlisten_msg_t *mp, *arg;
-
- mp = session_evt_ctrl_data (wrk, elt);
- arg = clib_mem_alloc (sizeof (session_unlisten_msg_t));
- clib_memcpy_fast (arg, mp, sizeof (*arg));
-
- if (PREDICT_FALSE (!thread_index))
- {
- session_mq_unlisten_rpc (arg);
- return;
- }
-
- session_send_rpc_evt_to_thread_force (0, session_mq_unlisten_rpc, arg);
+ app_worker_unlisten_reply (app_wrk, sh, mp->context, rv);
}
static void
-session_mq_accepted_reply_handler (void *data)
+session_mq_accepted_reply_handler (session_worker_t *wrk,
+ session_evt_elt_t *elt)
{
- session_accepted_reply_msg_t *mp = (session_accepted_reply_msg_t *) data;
vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+ session_accepted_reply_msg_t *mp;
session_state_t old_state;
app_worker_t *app_wrk;
session_t *s;
- /* Server isn't interested, kill the session */
- if (mp->retval)
- {
- a->app_index = mp->context;
- a->handle = mp->handle;
- vnet_disconnect_session (a);
- return;
- }
+ mp = session_evt_ctrl_data (wrk, elt);
/* Mail this back from the main thread. We're not polling in main
* thread so we're using other workers for notifications. */
- if (vlib_num_workers () && vlib_get_thread_index () != 0
- && session_thread_from_handle (mp->handle) == 0)
+ if (session_thread_from_handle (mp->handle) == 0 && vlib_num_workers () &&
+ vlib_get_thread_index () != 0)
{
- vlib_rpc_call_main_thread (session_mq_accepted_reply_handler,
- (u8 *) mp, sizeof (*mp));
+ session_wrk_send_evt_to_main (wrk, elt);
return;
}
@@ -483,27 +450,36 @@ session_mq_accepted_reply_handler (void *data)
return;
}
- if (!session_has_transport (s))
+ /* Server isn't interested, disconnect the session */
+ if (mp->retval)
{
- s->session_state = SESSION_STATE_READY;
- if (ct_session_connect_notify (s, SESSION_E_NONE))
- return;
+ a->app_index = mp->context;
+ a->handle = mp->handle;
+ vnet_disconnect_session (a);
+ s->app_wrk_index = SESSION_INVALID_INDEX;
+ return;
}
- else
+
+ /* Special handling for cut-through sessions */
+ if (!session_has_transport (s))
{
- old_state = s->session_state;
- s->session_state = SESSION_STATE_READY;
+ session_set_state (s, SESSION_STATE_READY);
+ ct_session_connect_notify (s, SESSION_E_NONE);
+ return;
+ }
- if (!svm_fifo_is_empty_prod (s->rx_fifo))
- app_worker_lock_and_send_event (app_wrk, s, SESSION_IO_EVT_RX);
+ old_state = s->session_state;
+ session_set_state (s, SESSION_STATE_READY);
- /* Closed while waiting for app to reply. Resend disconnect */
- if (old_state >= SESSION_STATE_TRANSPORT_CLOSING)
- {
- app_worker_close_notify (app_wrk, s);
- s->session_state = old_state;
- return;
- }
+ if (!svm_fifo_is_empty_prod (s->rx_fifo))
+ app_worker_rx_notify (app_wrk, s);
+
+ /* Closed while waiting for app to reply. Resend disconnect */
+ if (old_state >= SESSION_STATE_TRANSPORT_CLOSING)
+ {
+ app_worker_close_notify (app_wrk, s);
+ session_set_state (s, old_state);
+ return;
}
}
@@ -515,15 +491,13 @@ session_mq_reset_reply_handler (void *data)
app_worker_t *app_wrk;
session_t *s;
application_t *app;
- u32 index, thread_index;
mp = (session_reset_reply_msg_t *) data;
app = application_lookup (mp->context);
if (!app)
return;
- session_parse_handle (mp->handle, &index, &thread_index);
- s = session_get_if_valid (index, thread_index);
+ s = session_get_from_handle_if_valid (mp->handle);
/* No session or not the right session */
if (!s || s->session_state < SESSION_STATE_TRANSPORT_CLOSING)
@@ -633,6 +607,7 @@ session_mq_worker_update_handler (void *data)
session_event_t *evt;
session_t *s;
application_t *app;
+ int rv;
app = application_lookup (mp->client_index);
if (!app)
@@ -669,7 +644,9 @@ session_mq_worker_update_handler (void *data)
return;
}
- app_worker_own_session (app_wrk, s);
+ rv = app_worker_own_session (app_wrk, s);
+ if (rv)
+ session_stat_error_inc (rv, 1);
/*
* Send reply
@@ -696,7 +673,7 @@ session_mq_worker_update_handler (void *data)
session_send_io_evt_to_thread (s->tx_fifo, SESSION_IO_EVT_TX);
if (s->rx_fifo && !svm_fifo_is_empty (s->rx_fifo))
- app_worker_lock_and_send_event (app_wrk, s, SESSION_IO_EVT_RX);
+ app_worker_rx_notify (app_wrk, s);
if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING)
app_worker_close_notify (app_wrk, s);
@@ -774,6 +751,67 @@ session_mq_transport_attr_handler (void *data)
svm_msg_q_add_and_unlock (app_wrk->event_queue, msg);
}
+void
+session_wrk_handle_evts_main_rpc (void *args)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ clib_llist_index_t ei, next_ei;
+ session_evt_elt_t *he, *elt;
+ session_worker_t *fwrk;
+ u32 thread_index;
+
+ vlib_worker_thread_barrier_sync (vm);
+
+ thread_index = pointer_to_uword (args);
+ fwrk = session_main_get_worker (thread_index);
+
+ he = clib_llist_elt (fwrk->event_elts, fwrk->evts_pending_main);
+ ei = clib_llist_next_index (he, evt_list);
+
+ while (ei != fwrk->evts_pending_main)
+ {
+ elt = clib_llist_elt (fwrk->event_elts, ei);
+ next_ei = clib_llist_next_index (elt, evt_list);
+ clib_llist_remove (fwrk->event_elts, evt_list, elt);
+ switch (elt->evt.event_type)
+ {
+ case SESSION_CTRL_EVT_LISTEN:
+ session_mq_listen_handler (fwrk, elt);
+ break;
+ case SESSION_CTRL_EVT_UNLISTEN:
+ session_mq_unlisten_handler (fwrk, elt);
+ break;
+ case SESSION_CTRL_EVT_APP_DETACH:
+ app_mq_detach_handler (fwrk, elt);
+ break;
+ case SESSION_CTRL_EVT_CONNECT_URI:
+ session_mq_connect_uri_handler (fwrk, elt);
+ break;
+ case SESSION_CTRL_EVT_ACCEPTED_REPLY:
+ session_mq_accepted_reply_handler (fwrk, elt);
+ break;
+ case SESSION_CTRL_EVT_CONNECT:
+ session_mq_connect_handler (fwrk, elt);
+ break;
+ default:
+ clib_warning ("unhandled %u", elt->evt.event_type);
+ ALWAYS_ASSERT (0);
+ break;
+ }
+
+ /* Regrab element in case pool moved */
+ elt = clib_llist_elt (fwrk->event_elts, ei);
+ if (!clib_llist_elt_is_linked (elt, evt_list))
+ {
+ session_evt_ctrl_data_free (fwrk, elt);
+ clib_llist_put (fwrk->event_elts, elt);
+ }
+ ei = next_ei;
+ }
+
+ vlib_worker_thread_barrier_release (vm);
+}
+
vlib_node_registration_t session_queue_node;
typedef struct
@@ -822,36 +860,134 @@ enum
};
static void
-session_tx_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
- u32 next_index, u32 * to_next, u16 n_segs,
- session_t * s, u32 n_trace)
+session_tx_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
+ u32 next_index, vlib_buffer_t **bufs, u16 n_segs,
+ session_t *s, u32 n_trace)
{
+ vlib_buffer_t **b = bufs;
+
while (n_trace && n_segs)
{
- vlib_buffer_t *b = vlib_get_buffer (vm, to_next[0]);
- if (PREDICT_TRUE
- (vlib_trace_buffer
- (vm, node, next_index, b, 1 /* follow_chain */ )))
+ if (PREDICT_TRUE (vlib_trace_buffer (vm, node, next_index, b[0],
+ 1 /* follow_chain */)))
{
session_queue_trace_t *t =
- vlib_add_trace (vm, node, b, sizeof (*t));
+ vlib_add_trace (vm, node, b[0], sizeof (*t));
t->session_index = s->session_index;
t->server_thread_index = s->thread_index;
n_trace--;
}
- to_next++;
+ b++;
n_segs--;
}
vlib_set_trace_count (vm, node, n_trace);
}
+always_inline int
+session_tx_fill_dma_transfers (session_worker_t *wrk,
+ session_tx_context_t *ctx, vlib_buffer_t *b)
+{
+ vlib_main_t *vm = wrk->vm;
+ u32 len_to_deq;
+ u8 *data0 = NULL;
+ int n_bytes_read, len_write;
+ svm_fifo_seg_t data_fs[2];
+
+ u32 n_segs = 2;
+ u16 n_transfers = 0;
+ /*
+ * Start with the first buffer in chain
+ */
+ b->error = 0;
+ b->flags = VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ b->current_data = 0;
+ data0 = vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN);
+ len_to_deq = clib_min (ctx->left_to_snd, ctx->deq_per_first_buf);
+
+ n_bytes_read = svm_fifo_segments (ctx->s->tx_fifo, ctx->sp.tx_offset,
+ data_fs, &n_segs, len_to_deq);
+
+ len_write = n_bytes_read;
+ ASSERT (n_bytes_read == len_to_deq);
+
+ while (n_bytes_read)
+ {
+ wrk->batch_num++;
+ vlib_dma_batch_add (vm, wrk->batch, data0, data_fs[n_transfers].data,
+ data_fs[n_transfers].len);
+ data0 += data_fs[n_transfers].len;
+ n_bytes_read -= data_fs[n_transfers].len;
+ n_transfers++;
+ }
+ return len_write;
+}
+
+always_inline int
+session_tx_fill_dma_transfers_tail (session_worker_t *wrk,
+ session_tx_context_t *ctx,
+ vlib_buffer_t *b, u32 len_to_deq, u8 *data)
+{
+ vlib_main_t *vm = wrk->vm;
+ int n_bytes_read, len_write;
+ svm_fifo_seg_t data_fs[2];
+ u32 n_segs = 2;
+ u16 n_transfers = 0;
+
+ n_bytes_read = svm_fifo_segments (ctx->s->tx_fifo, ctx->sp.tx_offset,
+ data_fs, &n_segs, len_to_deq);
+
+ len_write = n_bytes_read;
+
+ ASSERT (n_bytes_read == len_to_deq);
+
+ while (n_bytes_read)
+ {
+ wrk->batch_num++;
+ vlib_dma_batch_add (vm, wrk->batch, data, data_fs[n_transfers].data,
+ data_fs[n_transfers].len);
+ data += data_fs[n_transfers].len;
+ n_bytes_read -= data_fs[n_transfers].len;
+ n_transfers++;
+ }
+
+ return len_write;
+}
+
+always_inline int
+session_tx_copy_data (session_worker_t *wrk, session_tx_context_t *ctx,
+ vlib_buffer_t *b, u32 len_to_deq, u8 *data0)
+{
+ int n_bytes_read;
+ if (PREDICT_TRUE (!wrk->dma_enabled))
+ n_bytes_read =
+ svm_fifo_peek (ctx->s->tx_fifo, ctx->sp.tx_offset, len_to_deq, data0);
+ else
+ n_bytes_read = session_tx_fill_dma_transfers (wrk, ctx, b);
+ return n_bytes_read;
+}
+
+always_inline int
+session_tx_copy_data_tail (session_worker_t *wrk, session_tx_context_t *ctx,
+ vlib_buffer_t *b, u32 len_to_deq, u8 *data)
+{
+ int n_bytes_read;
+ if (PREDICT_TRUE (!wrk->dma_enabled))
+ n_bytes_read =
+ svm_fifo_peek (ctx->s->tx_fifo, ctx->sp.tx_offset, len_to_deq, data);
+ else
+ n_bytes_read =
+ session_tx_fill_dma_transfers_tail (wrk, ctx, b, len_to_deq, data);
+ return n_bytes_read;
+}
+
always_inline void
-session_tx_fifo_chain_tail (vlib_main_t * vm, session_tx_context_t * ctx,
- vlib_buffer_t * b, u16 * n_bufs, u8 peek_data)
+session_tx_fifo_chain_tail (session_worker_t *wrk, session_tx_context_t *ctx,
+ vlib_buffer_t *b, u16 *n_bufs, u8 peek_data)
{
+ vlib_main_t *vm = wrk->vm;
vlib_buffer_t *chain_b, *prev_b;
u32 chain_bi0, to_deq, left_from_seg;
- u16 len_to_deq, n_bytes_read;
+ int len_to_deq, n_bytes_read;
u8 *data, j;
b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
@@ -873,8 +1009,8 @@ session_tx_fifo_chain_tail (vlib_main_t * vm, session_tx_context_t * ctx,
data = vlib_buffer_get_current (chain_b);
if (peek_data)
{
- n_bytes_read = svm_fifo_peek (ctx->s->tx_fifo,
- ctx->sp.tx_offset, len_to_deq, data);
+ n_bytes_read =
+ session_tx_copy_data_tail (wrk, ctx, b, len_to_deq, data);
ctx->sp.tx_offset += n_bytes_read;
}
else
@@ -931,13 +1067,12 @@ session_tx_fifo_chain_tail (vlib_main_t * vm, session_tx_context_t * ctx,
}
always_inline void
-session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx,
- vlib_buffer_t * b, u16 * n_bufs, u8 peek_data)
+session_tx_fill_buffer (session_worker_t *wrk, session_tx_context_t *ctx,
+ vlib_buffer_t *b, u16 *n_bufs, u8 peek_data)
{
u32 len_to_deq;
u8 *data0;
int n_bytes_read;
-
/*
* Start with the first buffer in chain
*/
@@ -950,8 +1085,7 @@ session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx,
if (peek_data)
{
- n_bytes_read = svm_fifo_peek (ctx->s->tx_fifo, ctx->sp.tx_offset,
- len_to_deq, data0);
+ n_bytes_read = session_tx_copy_data (wrk, ctx, b, len_to_deq, data0);
ASSERT (n_bytes_read > 0);
/* Keep track of progress locally, transport is also supposed to
* increment it independently when pushing the header */
@@ -975,8 +1109,8 @@ session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx,
if (transport_connection_is_cless (ctx->tc))
{
- ip_copy (&ctx->tc->rmt_ip, &hdr->rmt_ip, ctx->tc->is_ip4);
- ctx->tc->rmt_port = hdr->rmt_port;
+ clib_memcpy_fast (data0 - sizeof (session_dgram_hdr_t), hdr,
+ sizeof (*hdr));
}
hdr->data_offset += n_bytes_read;
if (hdr->data_offset == hdr->data_length)
@@ -998,6 +1132,7 @@ session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx,
ASSERT (n_bytes_read > 0);
}
}
+
b->current_length = n_bytes_read;
ctx->left_to_snd -= n_bytes_read;
@@ -1005,7 +1140,7 @@ session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx,
* Fill in the remaining buffers in the chain, if any
*/
if (PREDICT_FALSE (ctx->n_bufs_per_seg > 1 && ctx->left_to_snd))
- session_tx_fifo_chain_tail (vm, ctx, b, n_bufs, peek_data);
+ session_tx_fifo_chain_tail (wrk, ctx, b, n_bufs, peek_data);
}
always_inline u8
@@ -1037,6 +1172,11 @@ session_tx_not_ready (session_t * s, u8 peek_data)
return 2;
}
}
+ else
+ {
+ if (s->session_state == SESSION_STATE_TRANSPORT_DELETED)
+ return 2;
+ }
return 0;
}
@@ -1093,9 +1233,28 @@ session_tx_set_dequeue_params (vlib_main_t * vm, session_tx_context_t * ctx,
svm_fifo_peek (ctx->s->tx_fifo, 0, sizeof (ctx->hdr),
(u8 *) & ctx->hdr);
+ /* Zero length dgrams not supported */
+ if (PREDICT_FALSE (ctx->hdr.data_length == 0))
+ {
+ svm_fifo_dequeue_drop (ctx->s->tx_fifo, sizeof (ctx->hdr));
+ ctx->max_len_to_snd = 0;
+ return;
+ }
+ /* We cannot be sure apps have not enqueued incomplete dgrams */
+ if (PREDICT_FALSE (ctx->max_dequeue <
+ ctx->hdr.data_length + sizeof (ctx->hdr)))
+ {
+ ctx->max_len_to_snd = 0;
+ return;
+ }
ASSERT (ctx->hdr.data_length > ctx->hdr.data_offset);
len = ctx->hdr.data_length - ctx->hdr.data_offset;
+ if (ctx->hdr.gso_size)
+ {
+ ctx->sp.snd_mss = clib_min (ctx->sp.snd_mss, ctx->hdr.gso_size);
+ }
+
/* Process multiple dgrams if smaller than min (buf_space, mss).
* This avoids handling multiple dgrams if they require buffer
* chains */
@@ -1115,11 +1274,13 @@ session_tx_set_dequeue_params (vlib_main_t * vm, session_tx_context_t * ctx,
{
svm_fifo_peek (ctx->s->tx_fifo, offset, sizeof (ctx->hdr),
(u8 *) & hdr);
- ASSERT (hdr.data_length > hdr.data_offset);
dgram_len = hdr.data_length - hdr.data_offset;
- if (len + dgram_len > ctx->max_dequeue
- || first_dgram_len != dgram_len)
+ if (offset + sizeof (hdr) + hdr.data_length >
+ ctx->max_dequeue ||
+ first_dgram_len != dgram_len)
break;
+ /* Assert here to allow test above with zero length dgrams */
+ ASSERT (hdr.data_length > hdr.data_offset);
len += dgram_len;
offset += sizeof (hdr) + hdr.data_length;
}
@@ -1188,8 +1349,30 @@ session_tx_maybe_reschedule (session_worker_t * wrk,
svm_fifo_unset_event (s->tx_fifo);
if (svm_fifo_max_dequeue_cons (s->tx_fifo) > ctx->sp.tx_offset)
- if (svm_fifo_set_event (s->tx_fifo))
- session_evt_add_head_old (wrk, elt);
+ {
+ if (svm_fifo_set_event (s->tx_fifo))
+ session_evt_add_head_old (wrk, elt);
+ }
+ else
+ {
+ transport_connection_deschedule (ctx->tc);
+ }
+}
+
+always_inline void
+session_tx_add_pending_buffer (session_worker_t *wrk, u32 bi, u32 next_index)
+{
+ if (PREDICT_TRUE (!wrk->dma_enabled))
+ {
+ vec_add1 (wrk->pending_tx_buffers, bi);
+ vec_add1 (wrk->pending_tx_nexts, next_index);
+ }
+ else
+ {
+ session_dma_transfer *dma_transfer = &wrk->dma_trans[wrk->trans_tail];
+ vec_add1 (dma_transfer->pending_tx_buffers, bi);
+ vec_add1 (dma_transfer->pending_tx_nexts, next_index);
+ }
}
always_inline int
@@ -1235,9 +1418,12 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
ctx->sp.max_burst_size = max_burst;
n_custom_tx = ctx->transport_vft->custom_tx (ctx->tc, &ctx->sp);
*n_tx_packets += n_custom_tx;
- if (PREDICT_FALSE
- (ctx->s->session_state >= SESSION_STATE_TRANSPORT_CLOSED))
- return SESSION_TX_OK;
+ if (PREDICT_FALSE (ctx->s->session_state >=
+ SESSION_STATE_TRANSPORT_CLOSED))
+ {
+ svm_fifo_unset_event (ctx->s->tx_fifo);
+ return SESSION_TX_OK;
+ }
max_burst -= n_custom_tx;
if (!max_burst || (ctx->s->flags & SESSION_F_CUSTOM_TX))
{
@@ -1246,6 +1432,11 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
}
}
+ /* Connection previously descheduled because it had no data to send.
+ * Clear descheduled flag and reset pacer if in use */
+ if (transport_connection_is_descheduled (ctx->tc))
+ transport_connection_clear_descheduled (ctx->tc);
+
transport_connection_snd_params (ctx->tc, &ctx->sp);
if (!ctx->sp.snd_space)
@@ -1308,6 +1499,8 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
ctx->left_to_snd = ctx->max_len_to_snd;
n_left = ctx->n_segs_per_evt;
+ vec_validate (ctx->transport_pending_bufs, n_left);
+
while (n_left >= 4)
{
vlib_buffer_t *b0, *b1;
@@ -1326,18 +1519,15 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
b0 = vlib_get_buffer (vm, bi0);
b1 = vlib_get_buffer (vm, bi1);
- session_tx_fill_buffer (vm, ctx, b0, &n_bufs, peek_data);
- session_tx_fill_buffer (vm, ctx, b1, &n_bufs, peek_data);
-
- ctx->transport_vft->push_header (ctx->tc, b0);
- ctx->transport_vft->push_header (ctx->tc, b1);
+ session_tx_fill_buffer (wrk, ctx, b0, &n_bufs, peek_data);
+ session_tx_fill_buffer (wrk, ctx, b1, &n_bufs, peek_data);
+ ctx->transport_pending_bufs[ctx->n_segs_per_evt - n_left] = b0;
+ ctx->transport_pending_bufs[ctx->n_segs_per_evt - n_left + 1] = b1;
n_left -= 2;
- vec_add1 (wrk->pending_tx_buffers, bi0);
- vec_add1 (wrk->pending_tx_buffers, bi1);
- vec_add1 (wrk->pending_tx_nexts, next_index);
- vec_add1 (wrk->pending_tx_nexts, next_index);
+ session_tx_add_pending_buffer (wrk, bi0, next_index);
+ session_tx_add_pending_buffer (wrk, bi1, next_index);
}
while (n_left)
{
@@ -1353,20 +1543,20 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
bi0 = ctx->tx_buffers[--n_bufs];
b0 = vlib_get_buffer (vm, bi0);
- session_tx_fill_buffer (vm, ctx, b0, &n_bufs, peek_data);
-
- /* Ask transport to push header after current_length and
- * total_length_not_including_first_buffer are updated */
- ctx->transport_vft->push_header (ctx->tc, b0);
+ session_tx_fill_buffer (wrk, ctx, b0, &n_bufs, peek_data);
+ ctx->transport_pending_bufs[ctx->n_segs_per_evt - n_left] = b0;
n_left -= 1;
- vec_add1 (wrk->pending_tx_buffers, bi0);
- vec_add1 (wrk->pending_tx_nexts, next_index);
+ session_tx_add_pending_buffer (wrk, bi0, next_index);
}
+ /* Ask transport to push headers */
+ ctx->transport_vft->push_header (ctx->tc, ctx->transport_pending_bufs,
+ ctx->n_segs_per_evt);
+
if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node)) > 0))
- session_tx_trace_frame (vm, node, next_index, wrk->pending_tx_buffers,
+ session_tx_trace_frame (vm, node, next_index, ctx->transport_pending_bufs,
ctx->n_segs_per_evt, ctx->s, n_trace);
if (PREDICT_FALSE (n_bufs))
@@ -1375,7 +1565,7 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
*n_tx_packets += ctx->n_segs_per_evt;
SESSION_EVT (SESSION_EVT_DEQ, ctx->s, ctx->max_len_to_snd, ctx->max_dequeue,
- ctx->s->tx_fifo->has_event, wrk->last_vlib_time);
+ ctx->s->tx_fifo->shr->has_event, wrk->last_vlib_time);
ASSERT (ctx->left_to_snd == 0);
@@ -1420,20 +1610,30 @@ session_tx_fifo_dequeue_internal (session_worker_t * wrk,
{
transport_send_params_t *sp = &wrk->ctx.sp;
session_t *s = wrk->ctx.s;
+ clib_llist_index_t ei;
u32 n_packets;
- if (PREDICT_FALSE (s->session_state >= SESSION_STATE_TRANSPORT_CLOSED))
+ if (PREDICT_FALSE ((s->session_state >= SESSION_STATE_TRANSPORT_CLOSED) ||
+ (s->session_state == SESSION_STATE_CONNECTING &&
+ (s->flags & SESSION_F_HALF_OPEN))))
return 0;
/* Clear custom-tx flag used to request reschedule for tx */
s->flags &= ~SESSION_F_CUSTOM_TX;
+ sp->flags = 0;
+ sp->bytes_dequeued = 0;
sp->max_burst_size = clib_min (SESSION_NODE_FRAME_SIZE - *n_tx_packets,
TRANSPORT_PACER_MAX_BURST_PKTS);
+ /* Grab elt index since app transports can enqueue events on tx */
+ ei = clib_llist_entry_index (wrk->event_elts, elt);
+
n_packets = transport_custom_tx (session_get_transport_proto (s), s, sp);
*n_tx_packets += n_packets;
+ elt = clib_llist_elt (wrk->event_elts, ei);
+
if (s->flags & SESSION_F_CUSTOM_TX)
{
session_evt_add_old (wrk, elt);
@@ -1446,8 +1646,8 @@ session_tx_fifo_dequeue_internal (session_worker_t * wrk,
session_evt_add_head_old (wrk, elt);
}
- if (sp->max_burst_size &&
- svm_fifo_needs_deq_ntf (s->tx_fifo, sp->max_burst_size))
+ if (sp->bytes_dequeued &&
+ svm_fifo_needs_deq_ntf (s->tx_fifo, sp->bytes_dequeued))
session_dequeue_notify (s);
return n_packets;
@@ -1499,10 +1699,10 @@ session_event_dispatch_ctrl (session_worker_t * wrk, session_evt_elt_t * elt)
session_transport_reset (s);
break;
case SESSION_CTRL_EVT_LISTEN:
- session_mq_listen_handler (session_evt_ctrl_data (wrk, elt));
+ session_mq_listen_handler (wrk, elt);
break;
case SESSION_CTRL_EVT_LISTEN_URI:
- session_mq_listen_uri_handler (session_evt_ctrl_data (wrk, elt));
+ session_mq_listen_uri_handler (wrk, elt);
break;
case SESSION_CTRL_EVT_UNLISTEN:
session_mq_unlisten_handler (wrk, elt);
@@ -1511,7 +1711,7 @@ session_event_dispatch_ctrl (session_worker_t * wrk, session_evt_elt_t * elt)
session_mq_connect_handler (wrk, elt);
break;
case SESSION_CTRL_EVT_CONNECT_URI:
- session_mq_connect_uri_handler (session_evt_ctrl_data (wrk, elt));
+ session_mq_connect_uri_handler (wrk, elt);
break;
case SESSION_CTRL_EVT_SHUTDOWN:
session_mq_shutdown_handler (session_evt_ctrl_data (wrk, elt));
@@ -1523,7 +1723,7 @@ session_event_dispatch_ctrl (session_worker_t * wrk, session_evt_elt_t * elt)
session_mq_disconnected_handler (session_evt_ctrl_data (wrk, elt));
break;
case SESSION_CTRL_EVT_ACCEPTED_REPLY:
- session_mq_accepted_reply_handler (session_evt_ctrl_data (wrk, elt));
+ session_mq_accepted_reply_handler (wrk, elt);
break;
case SESSION_CTRL_EVT_DISCONNECTED_REPLY:
session_mq_disconnected_reply_handler (session_evt_ctrl_data (wrk,
@@ -1536,7 +1736,7 @@ session_event_dispatch_ctrl (session_worker_t * wrk, session_evt_elt_t * elt)
session_mq_worker_update_handler (session_evt_ctrl_data (wrk, elt));
break;
case SESSION_CTRL_EVT_APP_DETACH:
- app_mq_detach_handler (session_evt_ctrl_data (wrk, elt));
+ app_mq_detach_handler (wrk, elt);
break;
case SESSION_CTRL_EVT_APP_WRK_RPC:
session_mq_app_wrk_rpc_handler (session_evt_ctrl_data (wrk, elt));
@@ -1588,7 +1788,7 @@ session_event_dispatch_io (session_worker_t * wrk, vlib_node_runtime_t * node,
break;
case SESSION_IO_EVT_RX:
s = session_event_get_session (wrk, e);
- if (!s)
+ if (!s || s->session_state >= SESSION_STATE_TRANSPORT_CLOSED)
break;
transport_app_rx_evt (session_get_transport_proto (s),
s->connection_index, s->thread_index);
@@ -1599,19 +1799,21 @@ session_event_dispatch_io (session_worker_t * wrk, vlib_node_runtime_t * node,
break;
svm_fifo_unset_event (s->rx_fifo);
app_wrk = app_worker_get (s->app_wrk_index);
- app_worker_builtin_rx (app_wrk, s);
+ app_worker_rx_notify (app_wrk, s);
break;
- case SESSION_IO_EVT_BUILTIN_TX:
- s = session_get_from_handle_if_valid (e->session_handle);
+ case SESSION_IO_EVT_TX_MAIN:
+ s = session_get_if_valid (e->session_index, 0 /* main thread */);
+ if (PREDICT_FALSE (!s))
+ break;
wrk->ctx.s = s;
if (PREDICT_TRUE (s != 0))
- session_tx_fifo_dequeue_internal (wrk, node, elt, n_tx_packets);
+ (smm->session_tx_fns[s->session_type]) (wrk, node, elt, n_tx_packets);
break;
default:
clib_warning ("unhandled event type %d", e->event_type);
}
- SESSION_EVT (SESSION_IO_EVT_COUNTS, e->event_type, 1, wrk);
+ SESSION_EVT (SESSION_EVT_IO_EVT_COUNTS, e->event_type, 1, wrk);
/* Regrab elements in case pool moved */
elt = clib_llist_elt (wrk->event_elts, ei);
@@ -1619,14 +1821,22 @@ session_event_dispatch_io (session_worker_t * wrk, vlib_node_runtime_t * node,
clib_llist_put (wrk->event_elts, elt);
}
-/* *INDENT-OFF* */
static const u32 session_evt_msg_sizes[] = {
#define _(symc, sym) \
[SESSION_CTRL_EVT_ ## symc] = sizeof (session_ ## sym ##_msg_t),
foreach_session_ctrl_evt
#undef _
};
-/* *INDENT-ON* */
+
+always_inline void
+session_update_time_subscribers (session_main_t *smm, clib_time_type_t now,
+ u32 thread_index)
+{
+ session_update_time_fn *fn;
+
+ vec_foreach (fn, smm->update_time_fns)
+ (*fn) (now, thread_index);
+}
always_inline void
session_evt_add_to_list (session_worker_t * wrk, session_event_t * evt)
@@ -1693,7 +1903,7 @@ session_wrk_update_state (session_worker_t *wrk)
if (wrk->state == SESSION_WRK_POLLING)
{
- if (clib_llist_elts (wrk->event_elts) == 4 &&
+ if (clib_llist_elts (wrk->event_elts) == 5 &&
vlib_last_vectors_per_main_loop (vm) < 1)
{
session_wrk_set_state (wrk, SESSION_WRK_INTERRUPT);
@@ -1703,7 +1913,7 @@ session_wrk_update_state (session_worker_t *wrk)
}
else if (wrk->state == SESSION_WRK_INTERRUPT)
{
- if (clib_llist_elts (wrk->event_elts) > 4 ||
+ if (clib_llist_elts (wrk->event_elts) > 5 ||
vlib_last_vectors_per_main_loop (vm) > 1)
{
session_wrk_set_state (wrk, SESSION_WRK_POLLING);
@@ -1742,10 +1952,19 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
/*
* Update transport time
*/
- transport_update_time (wrk->last_vlib_time, thread_index);
+ session_update_time_subscribers (smm, wrk->last_vlib_time, thread_index);
n_tx_packets = vec_len (wrk->pending_tx_buffers);
SESSION_EVT (SESSION_EVT_DSP_CNTRS, UPDATE_TIME, wrk);
+ if (PREDICT_FALSE (wrk->dma_enabled))
+ {
+ if (wrk->trans_head == ((wrk->trans_tail + 1) & (wrk->trans_size - 1)))
+ return 0;
+ wrk->batch = vlib_dma_batch_new (vm, wrk->config_index);
+ if (!wrk->batch)
+ return 0;
+ }
+
/*
* Dequeue new internal mq events
*/
@@ -1815,6 +2034,20 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
};
}
+ if (PREDICT_FALSE (wrk->dma_enabled))
+ {
+ if (wrk->batch_num)
+ {
+ vlib_dma_batch_set_cookie (vm, wrk->batch, wrk->trans_tail);
+ wrk->batch_num = 0;
+ wrk->trans_tail++;
+ if (wrk->trans_tail == wrk->trans_size)
+ wrk->trans_tail = 0;
+ }
+
+ vlib_dma_batch_submit (vm, wrk->batch);
+ }
+
SESSION_EVT (SESSION_EVT_DSP_CNTRS, OLD_IO_EVTS, wrk);
if (vec_len (wrk->pending_tx_buffers))
@@ -1831,7 +2064,6 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return n_tx_packets;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (session_queue_node) = {
.function = session_queue_node_fn,
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
@@ -1842,7 +2074,6 @@ VLIB_REGISTER_NODE (session_queue_node) = {
.error_counters = session_error_counters,
.state = VLIB_NODE_STATE_DISABLED,
};
-/* *INDENT-ON* */
static clib_error_t *
session_wrk_tfd_read_ready (clib_file_t *cf)
@@ -1946,7 +2177,6 @@ session_queue_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (session_queue_process_node) =
{
.function = session_queue_process,
@@ -1954,7 +2184,6 @@ VLIB_REGISTER_NODE (session_queue_process_node) =
.name = "session-queue-process",
.state = VLIB_NODE_STATE_DISABLED,
};
-/* *INDENT-ON* */
static_always_inline uword
session_queue_pre_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
@@ -1967,7 +2196,6 @@ session_queue_pre_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
return session_queue_node_fn (vm, node, frame);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (session_queue_pre_input_node) =
{
.function = session_queue_pre_input_inline,
@@ -1975,7 +2203,6 @@ VLIB_REGISTER_NODE (session_queue_pre_input_node) =
.name = "session-queue-main",
.state = VLIB_NODE_STATE_DISABLED,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/session/session_rules_table.c b/src/vnet/session/session_rules_table.c
index 5108c00d728..70a702cf55c 100644
--- a/src/vnet/session/session_rules_table.c
+++ b/src/vnet/session/session_rules_table.c
@@ -386,11 +386,11 @@ session_rules_table_lookup6 (session_rules_table_t * srt,
* @param srt table where rule should be added
* @param args rule arguments
*
- * @return 0 if success, clib_error_t error otherwise
+ * @return 0 if success, session_error_t error otherwise
*/
-int
-session_rules_table_add_del (session_rules_table_t * srt,
- session_rule_table_add_del_args_t * args)
+session_error_t
+session_rules_table_add_del (session_rules_table_t *srt,
+ session_rule_table_add_del_args_t *args)
{
u8 fib_proto = args->rmt.fp_proto, *rt;
u32 ri_from_tag, ri;
@@ -398,7 +398,7 @@ session_rules_table_add_del (session_rules_table_t * srt,
ri_from_tag = session_rules_table_rule_for_tag (srt, args->tag);
if (args->is_add && ri_from_tag != SESSION_RULES_TABLE_INVALID_INDEX)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
if (fib_proto == FIB_PROTOCOL_IP4)
{
@@ -509,7 +509,7 @@ session_rules_table_add_del (session_rules_table_t * srt,
}
}
else
- return VNET_API_ERROR_INVALID_VALUE_2;
+ return SESSION_E_INVALID;
return 0;
}
@@ -605,11 +605,9 @@ session_rules_table_cli_dump (vlib_main_t * vm, session_rules_table_t * srt,
srt4 = &srt->session_rules_tables_16;
vlib_cli_output (vm, "IP4 rules");
- /* *INDENT-OFF* */
pool_foreach (sr4, srt4->rules) {
vlib_cli_output (vm, "%U", format_session_rule4, srt, sr4);
}
- /* *INDENT-ON* */
}
else if (fib_proto == FIB_PROTOCOL_IP6)
@@ -619,11 +617,9 @@ session_rules_table_cli_dump (vlib_main_t * vm, session_rules_table_t * srt,
srt6 = &srt->session_rules_tables_40;
vlib_cli_output (vm, "IP6 rules");
- /* *INDENT-OFF* */
pool_foreach (sr6, srt6->rules) {
vlib_cli_output (vm, "%U", format_session_rule6, srt, sr6);
}
- /* *INDENT-ON* */
}
}
diff --git a/src/vnet/session/session_rules_table.h b/src/vnet/session/session_rules_table.h
index 206ef2f380f..010d50a6398 100644
--- a/src/vnet/session/session_rules_table.h
+++ b/src/vnet/session/session_rules_table.h
@@ -18,11 +18,11 @@
#include <vnet/vnet.h>
#include <vnet/fib/fib.h>
+#include <vnet/session/session_types.h>
#include <vnet/session/transport.h>
#include <vnet/session/mma_16.h>
#include <vnet/session/mma_40.h>
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
union
@@ -52,7 +52,6 @@ typedef CLIB_PACKED (struct
u64 as_u64[5];
};
}) session_mask_or_match_6_t;
-/* *INDENT-ON* */
#define SESSION_RULE_TAG_MAX_LEN 64
#define SESSION_RULES_TABLE_INVALID_INDEX MMA_TABLE_INVALID_INDEX
@@ -111,8 +110,9 @@ void session_rules_table_show_rule (vlib_main_t * vm,
ip46_address_t * lcl_ip, u16 lcl_port,
ip46_address_t * rmt_ip, u16 rmt_port,
u8 is_ip4);
-int session_rules_table_add_del (session_rules_table_t * srt,
- session_rule_table_add_del_args_t * args);
+session_error_t
+session_rules_table_add_del (session_rules_table_t *srt,
+ session_rule_table_add_del_args_t *args);
u8 *session_rules_table_rule_tag (session_rules_table_t * srt, u32 ri,
u8 is_ip4);
void session_rules_table_init (session_rules_table_t * srt);
diff --git a/src/vnet/session/session_table.c b/src/vnet/session/session_table.c
index 9af8ae6a584..dbbe771979c 100644
--- a/src/vnet/session/session_table.c
+++ b/src/vnet/session/session_table.c
@@ -185,7 +185,66 @@ ip4_session_table_walk (clib_bihash_16_8_t * hash,
&ctx);
}
-/* *INDENT-ON* */
+u32
+session_table_memory_size (session_table_t *st)
+{
+ u64 total_size = 0;
+
+ if (clib_bihash_is_initialised_16_8 (&st->v4_session_hash))
+ {
+ clib_bihash_alloc_chunk_16_8_t *c = st->v4_session_hash.chunks;
+ while (c)
+ {
+ total_size += c->size;
+ c = c->next;
+ }
+ c = st->v4_half_open_hash.chunks;
+ while (c)
+ {
+ total_size += c->size;
+ c = c->next;
+ }
+ }
+
+ if (clib_bihash_is_initialised_48_8 (&st->v6_session_hash))
+ {
+ clib_bihash_alloc_chunk_48_8_t *c = st->v6_session_hash.chunks;
+ while (c)
+ {
+ total_size += c->size;
+ c = c->next;
+ }
+ c = st->v6_half_open_hash.chunks;
+ while (c)
+ {
+ total_size += c->size;
+ c = c->next;
+ }
+ }
+
+ return total_size;
+}
+
+u8 *
+format_session_table (u8 *s, va_list *args)
+{
+ session_table_t *st = va_arg (*args, session_table_t *);
+
+ if (clib_bihash_is_initialised_16_8 (&st->v4_session_hash))
+ {
+ s = format (s, "%U", format_bihash_16_8, &st->v4_session_hash, 0);
+ s = format (s, "%U", format_bihash_16_8, &st->v4_half_open_hash, 0);
+ }
+
+ if (clib_bihash_is_initialised_48_8 (&st->v6_session_hash))
+ {
+ s = format (s, "%U", format_bihash_48_8, &st->v6_session_hash, 0);
+ s = format (s, "%U", format_bihash_48_8, &st->v6_half_open_hash, 0);
+ }
+
+ return s;
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/session/session_table.h b/src/vnet/session/session_table.h
index 2127ea45d01..636b8d77bee 100644
--- a/src/vnet/session/session_table.h
+++ b/src/vnet/session/session_table.h
@@ -69,6 +69,9 @@ u32 session_table_index (session_table_t * slt);
void session_table_init (session_table_t * slt, u8 fib_proto);
void session_table_free (session_table_t *slt, u8 fib_proto);
+u32 session_table_memory_size (session_table_t *st);
+u8 *format_session_table (u8 *s, va_list *args);
+
/* Internal, try not to use it! */
session_table_t *_get_session_tables ();
@@ -76,7 +79,6 @@ session_table_t *_get_session_tables ();
pool_foreach (VAR, _get_session_tables ()) BODY
#endif /* SRC_VNET_SESSION_SESSION_TABLE_H_ */
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/session/session_test.c b/src/vnet/session/session_test.c
index a8a9327b892..770e7263024 100644
--- a/src/vnet/session/session_test.c
+++ b/src/vnet/session/session_test.c
@@ -277,12 +277,6 @@ api_app_worker_add_del (vat_main_t *vat)
}
static int
-api_application_tls_key_add (vat_main_t *vat)
-{
- return -1;
-}
-
-static int
api_app_namespace_add_del (vat_main_t *vam)
{
vl_api_app_namespace_add_del_t *mp;
@@ -330,8 +324,14 @@ api_app_namespace_add_del (vat_main_t *vam)
return ret;
}
+static void
+vl_api_app_namespace_add_del_v4_reply_t_handler (
+ vl_api_app_namespace_add_del_v4_reply_t *mp)
+{
+}
+
static int
-api_application_tls_cert_add (vat_main_t *vat)
+api_app_namespace_add_del_v4 (vat_main_t *vat)
{
return -1;
}
diff --git a/src/vnet/session/session_types.h b/src/vnet/session/session_types.h
index 0cf463d569d..5e650727d61 100644
--- a/src/vnet/session/session_types.h
+++ b/src/vnet/session/session_types.h
@@ -22,8 +22,22 @@
#define SESSION_INVALID_INDEX ((u32)~0)
#define SESSION_INVALID_HANDLE ((u64)~0)
#define SESSION_CTRL_MSG_MAX_SIZE 86
+#define SESSION_CTRL_MSG_TX_MAX_SIZE 160
#define SESSION_NODE_FRAME_SIZE 128
+typedef u8 session_type_t;
+typedef u64 session_handle_t;
+
+typedef union session_handle_tu_
+{
+ session_handle_t handle;
+ struct
+ {
+ u32 session_index;
+ u32 thread_index;
+ };
+} __attribute__ ((__transparent_union__)) session_handle_tu_t;
+
#define foreach_session_endpoint_fields \
foreach_transport_endpoint_cfg_fields \
_(u8, transport_proto) \
@@ -35,6 +49,23 @@ typedef struct _session_endpoint
#undef _
} session_endpoint_t;
+#define foreach_session_endpoint_cfg_flags _ (PROXY_LISTEN, "proxy listener")
+
+typedef enum session_endpoint_cfg_flags_bits_
+{
+#define _(sym, str) SESSION_ENDPT_CFG_F_BIT_##sym,
+ foreach_session_endpoint_cfg_flags
+#undef _
+} __clib_packed session_endpoint_cfg_flags_bits_t;
+
+typedef enum session_endpoint_cfg_flags_
+{
+#define _(sym, str) \
+ SESSION_ENDPT_CFG_F_##sym = 1 << SESSION_ENDPT_CFG_F_BIT_##sym,
+ foreach_session_endpoint_cfg_flags
+#undef _
+} __clib_packed session_endpoint_cfg_flags_t;
+
typedef struct _session_endpoint_cfg
{
#define _(type, name) type name;
@@ -45,7 +76,7 @@ typedef struct _session_endpoint_cfg
u32 ns_index;
u8 original_tp;
u64 parent_handle;
- u8 flags;
+ session_endpoint_cfg_flags_t flags;
transport_endpt_ext_cfg_t *ext_cfg;
} session_endpoint_cfg_t;
@@ -107,9 +138,6 @@ session_endpoint_is_zero (session_endpoint_t * sep)
return ip_is_zero (&sep->ip, sep->is_ip4);
}
-typedef u8 session_type_t;
-typedef u64 session_handle_t;
-
typedef enum
{
SESSION_CLEANUP_TRANSPORT,
@@ -126,19 +154,19 @@ typedef enum session_ft_action_
/*
* Session states
*/
-#define foreach_session_state \
- _(CREATED, "created") \
- _(LISTENING, "listening") \
- _(CONNECTING, "connecting") \
- _(ACCEPTING, "accepting") \
- _(READY, "ready") \
- _(OPENED, "opened") \
- _(TRANSPORT_CLOSING, "transport-closing") \
- _(CLOSING, "closing") \
- _(APP_CLOSED, "app-closed") \
- _(TRANSPORT_CLOSED, "transport-closed") \
- _(CLOSED, "closed") \
- _(TRANSPORT_DELETED, "transport-deleted") \
+#define foreach_session_state \
+ _ (CREATED, "created") \
+ _ (LISTENING, "listening") \
+ _ (CONNECTING, "connecting") \
+ _ (ACCEPTING, "accepting") \
+ _ (READY, "ready") \
+ _ (OPENED, "opened") \
+ _ (TRANSPORT_CLOSING, "transport-closing") \
+ _ (CLOSING, "closing") \
+ _ (APP_CLOSED, "app-closed") \
+ _ (TRANSPORT_CLOSED, "transport-closed") \
+ _ (CLOSED, "closed") \
+ _ (TRANSPORT_DELETED, "transport-deleted")
typedef enum
{
@@ -146,7 +174,7 @@ typedef enum
foreach_session_state
#undef _
SESSION_N_STATES,
-} session_state_t;
+} __clib_packed session_state_t;
#define foreach_session_flag \
_ (RX_EVT, "rx-event") \
@@ -155,7 +183,9 @@ typedef enum
_ (IS_MIGRATING, "migrating") \
_ (UNIDIRECTIONAL, "unidirectional") \
_ (CUSTOM_FIFO_TUNING, "custom-fifo-tuning") \
- _ (HALF_OPEN, "half-open")
+ _ (HALF_OPEN, "half-open") \
+ _ (APP_CLOSED, "app-closed") \
+ _ (IS_CLESS, "connectionless")
typedef enum session_flags_bits_
{
@@ -178,38 +208,42 @@ typedef struct session_
svm_fifo_t *rx_fifo;
svm_fifo_t *tx_fifo;
+ union
+ {
+ session_handle_t handle;
+ struct
+ {
+ /** Index in thread pool where session was allocated */
+ u32 session_index;
+
+ /** Index of the thread that allocated the session */
+ u32 thread_index;
+ };
+ };
+
/** Type built from transport and network protocol types */
session_type_t session_type;
/** State in session layer state machine. See @ref session_state_t */
- volatile u8 session_state;
-
- /** Index in thread pool where session was allocated */
- u32 session_index;
+ volatile session_state_t session_state;
/** Index of the app worker that owns the session */
u32 app_wrk_index;
- /** Index of the thread that allocated the session */
- u8 thread_index;
-
/** Session flags. See @ref session_flags_t */
- u32 flags;
+ session_flags_t flags;
/** Index of the transport connection associated to the session */
u32 connection_index;
- /** Index of application that owns the listener. Set only if a listener */
- u32 app_index;
+ /** App listener index in app's listener pool if a listener */
+ u32 al_index;
union
{
/** Parent listener session index if the result of an accept */
session_handle_t listener_handle;
- /** App listener index in app's listener pool if a listener */
- u32 al_index;
-
/** Index in app worker's half-open table if a half-open */
u32 ho_index;
};
@@ -282,45 +316,35 @@ session_tx_is_dgram (session_t * s)
always_inline session_handle_t
session_handle (session_t * s)
{
- return ((u64) s->thread_index << 32) | (u64) s->session_index;
+ return s->handle;
}
always_inline u32
-session_index_from_handle (session_handle_t handle)
+session_index_from_handle (session_handle_tu_t handle)
{
- return handle & 0xFFFFFFFF;
+ return handle.session_index;
}
always_inline u32
-session_thread_from_handle (session_handle_t handle)
+session_thread_from_handle (session_handle_tu_t handle)
{
- return handle >> 32;
+ return handle.thread_index;
}
always_inline void
-session_parse_handle (session_handle_t handle, u32 * index,
- u32 * thread_index)
+session_parse_handle (session_handle_tu_t handle, u32 *index,
+ u32 *thread_index)
{
- *index = session_index_from_handle (handle);
- *thread_index = session_thread_from_handle (handle);
+ *index = handle.session_index;
+ *thread_index = handle.thread_index;
}
static inline session_handle_t
session_make_handle (u32 session_index, u32 data)
{
- return (((u64) data << 32) | (u64) session_index);
-}
-
-always_inline u32
-session_handle_index (session_handle_t ho_handle)
-{
- return (ho_handle & 0xffffffff);
-}
-
-always_inline u32
-session_handle_data (session_handle_t ho_handle)
-{
- return (ho_handle >> 32);
+ return ((session_handle_tu_t){ .session_index = session_index,
+ .thread_index = data })
+ .handle;
}
typedef enum
@@ -329,7 +353,7 @@ typedef enum
SESSION_IO_EVT_TX,
SESSION_IO_EVT_TX_FLUSH,
SESSION_IO_EVT_BUILTIN_RX,
- SESSION_IO_EVT_BUILTIN_TX,
+ SESSION_IO_EVT_TX_MAIN,
SESSION_CTRL_EVT_RPC,
SESSION_CTRL_EVT_HALF_CLOSE,
SESSION_CTRL_EVT_CLOSE,
@@ -360,6 +384,8 @@ typedef enum
SESSION_CTRL_EVT_APP_WRK_RPC,
SESSION_CTRL_EVT_TRANSPORT_ATTR,
SESSION_CTRL_EVT_TRANSPORT_ATTR_REPLY,
+ SESSION_CTRL_EVT_TRANSPORT_CLOSED,
+ SESSION_CTRL_EVT_HALF_CLEANUP,
} session_evt_type_t;
#define foreach_session_ctrl_evt \
@@ -394,7 +420,6 @@ typedef enum
#define FIFO_EVENT_APP_TX SESSION_IO_EVT_TX
#define FIFO_EVENT_DISCONNECT SESSION_CTRL_EVT_CLOSE
#define FIFO_EVENT_BUILTIN_RX SESSION_IO_EVT_BUILTIN_RX
-#define FIFO_EVENT_BUILTIN_TX SESSION_IO_EVT_BUILTIN_TX
typedef enum
{
@@ -419,6 +444,7 @@ typedef struct
session_handle_t session_handle;
session_rpc_args_t rpc_args;
u32 ctrl_data_index;
+ u64 as_u64[2];
struct
{
u8 data[0];
@@ -443,12 +469,12 @@ typedef struct session_dgram_header_
u16 rmt_port;
u16 lcl_port;
u8 is_ip4;
+ u16 gso_size;
} __clib_packed session_dgram_hdr_t;
#define SESSION_CONN_ID_LEN 37
-#define SESSION_CONN_HDR_LEN 45
-
-STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 8),
+#define SESSION_CONN_HDR_LEN 47
+STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 10),
"session conn id wrong length");
#define foreach_session_error \
@@ -466,9 +492,11 @@ STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 8),
_ (NOLISTEN, "not listening") \
_ (NOSESSION, "session does not exist") \
_ (NOAPP, "app not attached") \
+ _ (APP_ATTACHED, "app already attached") \
_ (PORTINUSE, "lcl port in use") \
_ (IPINUSE, "ip in use") \
_ (ALREADY_LISTENING, "ip port pair already listened on") \
+ _ (ADDR_NOT_IN_USE, "address not in use") \
_ (INVALID, "invalid value") \
_ (INVALID_RMT_IP, "invalid remote ip") \
_ (INVALID_APPWRK, "invalid app worker") \
@@ -487,7 +515,10 @@ STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 8),
_ (NOEXTCFG, "no extended transport config") \
_ (NOCRYPTOENG, "no crypto engine") \
_ (NOCRYPTOCKP, "cert key pair not found ") \
- _ (LOCAL_CONNECT, "could not connect with local scope")
+ _ (LOCAL_CONNECT, "could not connect with local scope") \
+ _ (WRONG_NS_SECRET, "wrong ns secret") \
+ _ (SYSCALL, "system call error") \
+ _ (TRANSPORT_NO_REG, "transport was not registered")
typedef enum session_error_p_
{
diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c
index 4b263cd29b9..1c2a9261d3c 100644
--- a/src/vnet/session/transport.c
+++ b/src/vnet/session/transport.c
@@ -17,36 +17,31 @@
#include <vnet/session/session.h>
#include <vnet/fib/fib.h>
-typedef struct local_endpoint_
-{
- transport_endpoint_t ep;
- int refcnt;
-} local_endpoint_t;
-
/**
* Per-type vector of transport protocol virtual function tables
*/
transport_proto_vft_t *tp_vfts;
-/*
- * Port allocator seed
- */
-static u32 port_allocator_seed;
-
-/*
- * Local endpoints table
- */
-static transport_endpoint_table_t local_endpoints_table;
+typedef struct local_endpoint_
+{
+ transport_endpoint_t ep;
+ transport_proto_t proto;
+ int refcnt;
+} local_endpoint_t;
-/*
- * Pool of local endpoints
- */
-static local_endpoint_t *local_endpoints;
+typedef struct transport_main_
+{
+ transport_endpoint_table_t local_endpoints_table;
+ local_endpoint_t *local_endpoints;
+ u32 *lcl_endpts_freelist;
+ u32 port_allocator_seed;
+ u16 port_allocator_min_src_port;
+ u16 port_allocator_max_src_port;
+ u8 lcl_endpts_cleanup_pending;
+ clib_spinlock_t local_endpoints_lock;
+} transport_main_t;
-/*
- * Local endpoints pool lock
- */
-static clib_spinlock_t local_endpoints_lock;
+static transport_main_t tp_main;
u8 *
format_transport_proto (u8 * s, va_list * args)
@@ -76,6 +71,35 @@ format_transport_proto_short (u8 * s, va_list * args)
return s;
}
+const char *transport_flags_str[] = {
+#define _(sym, str) str,
+ foreach_transport_connection_flag
+#undef _
+};
+
+u8 *
+format_transport_flags (u8 *s, va_list *args)
+{
+ transport_connection_flags_t flags;
+ int i, last = -1;
+
+ flags = va_arg (*args, transport_connection_flags_t);
+
+ for (i = 0; i < TRANSPORT_CONNECTION_N_FLAGS; i++)
+ if (flags & (1 << i))
+ last = i;
+
+ for (i = 0; i < last; i++)
+ {
+ if (flags & (1 << i))
+ s = format (s, "%s, ", transport_flags_str[i]);
+ }
+ if (last >= 0)
+ s = format (s, "%s", transport_flags_str[last]);
+
+ return s;
+}
+
u8 *
format_transport_connection (u8 * s, va_list * args)
{
@@ -100,8 +124,8 @@ format_transport_connection (u8 * s, va_list * args)
if (transport_connection_is_tx_paced (tc))
s = format (s, "%Upacer: %U\n", format_white_space, indent,
format_transport_pacer, &tc->pacer, tc->thread_index);
- s = format (s, "%Utransport: flags 0x%x\n", format_white_space, indent,
- tc->flags);
+ s = format (s, "%Utransport: flags: %U\n", format_white_space, indent,
+ format_transport_flags, tc->flags);
}
return s;
}
@@ -124,14 +148,13 @@ u8 *
format_transport_half_open_connection (u8 * s, va_list * args)
{
u32 transport_proto = va_arg (*args, u32);
- u32 ho_index = va_arg (*args, u32);
transport_proto_vft_t *tp_vft;
tp_vft = transport_protocol_get_vft (transport_proto);
if (!tp_vft)
return s;
- s = format (s, "%U", tp_vft->format_half_open, ho_index);
+ s = (tp_vft->format_half_open) (s, args);
return s;
}
@@ -314,6 +337,8 @@ transport_cleanup_half_open (transport_proto_t tp, u32 conn_index)
int
transport_connect (transport_proto_t tp, transport_endpoint_cfg_t * tep)
{
+ if (PREDICT_FALSE (!tp_vfts[tp].connect))
+ return SESSION_E_TRANSPORT_NO_REG;
return tp_vfts[tp].connect (tep);
}
@@ -341,8 +366,10 @@ transport_reset (transport_proto_t tp, u32 conn_index, u8 thread_index)
u32
transport_start_listen (transport_proto_t tp, u32 session_index,
- transport_endpoint_t * tep)
+ transport_endpoint_cfg_t *tep)
{
+ if (PREDICT_FALSE (!tp_vfts[tp].start_listen))
+ return SESSION_E_TRANSPORT_NO_REG;
return tp_vfts[tp].start_listen (session_index, tep);
}
@@ -420,67 +447,148 @@ transport_connection_attribute (transport_proto_t tp, u32 conn_index,
#define PORT_MASK ((1 << 16)- 1)
void
-transport_endpoint_del (u32 tepi)
+transport_endpoint_free (u32 tepi)
{
- clib_spinlock_lock_if_init (&local_endpoints_lock);
- pool_put_index (local_endpoints, tepi);
- clib_spinlock_unlock_if_init (&local_endpoints_lock);
+ transport_main_t *tm = &tp_main;
+ pool_put_index (tm->local_endpoints, tepi);
}
always_inline local_endpoint_t *
-transport_endpoint_new (void)
+transport_endpoint_alloc (void)
{
+ transport_main_t *tm = &tp_main;
local_endpoint_t *lep;
- pool_get_zero (local_endpoints, lep);
+
+ ASSERT (vlib_get_thread_index () <= transport_cl_thread ());
+
+ pool_get_aligned_safe (tm->local_endpoints, lep, 0);
return lep;
}
+static void
+transport_cleanup_freelist (void)
+{
+ transport_main_t *tm = &tp_main;
+ local_endpoint_t *lep;
+ u32 *lep_indexp;
+
+ clib_spinlock_lock (&tm->local_endpoints_lock);
+
+ vec_foreach (lep_indexp, tm->lcl_endpts_freelist)
+ {
+ lep = pool_elt_at_index (tm->local_endpoints, *lep_indexp);
+
+ /* Port re-shared after attempt to cleanup */
+ if (lep->refcnt > 0)
+ continue;
+
+ transport_endpoint_table_del (&tm->local_endpoints_table, lep->proto,
+ &lep->ep);
+ transport_endpoint_free (*lep_indexp);
+ }
+
+ vec_reset_length (tm->lcl_endpts_freelist);
+
+ tm->lcl_endpts_cleanup_pending = 0;
+
+ clib_spinlock_unlock (&tm->local_endpoints_lock);
+}
+
void
-transport_endpoint_cleanup (u8 proto, ip46_address_t * lcl_ip, u16 port)
+transport_program_endpoint_cleanup (u32 lepi)
+{
+ transport_main_t *tm = &tp_main;
+ u8 flush_fl = 0;
+
+ /* All workers can free connections. Synchronize access to freelist */
+ clib_spinlock_lock (&tm->local_endpoints_lock);
+
+ vec_add1 (tm->lcl_endpts_freelist, lepi);
+
+ /* Avoid accumulating lots of endpoints for cleanup */
+ if (!tm->lcl_endpts_cleanup_pending &&
+ vec_len (tm->lcl_endpts_freelist) > 32)
+ {
+ tm->lcl_endpts_cleanup_pending = 1;
+ flush_fl = 1;
+ }
+
+ clib_spinlock_unlock (&tm->local_endpoints_lock);
+
+ if (flush_fl)
+ session_send_rpc_evt_to_thread_force (transport_cl_thread (),
+ transport_cleanup_freelist, 0);
+}
+
+int
+transport_release_local_endpoint (u8 proto, ip46_address_t *lcl_ip, u16 port)
{
+ transport_main_t *tm = &tp_main;
local_endpoint_t *lep;
u32 lepi;
- /* Cleanup local endpoint if this was an active connect */
- lepi = transport_endpoint_lookup (&local_endpoints_table, proto, lcl_ip,
- clib_net_to_host_u16 (port));
- if (lepi != ENDPOINT_INVALID_INDEX)
+ lepi = transport_endpoint_lookup (&tm->local_endpoints_table, proto, lcl_ip,
+ port);
+ if (lepi == ENDPOINT_INVALID_INDEX)
+ return -1;
+
+ /* First worker may be cleaning up ports so avoid touching free bitmap */
+ lep = &tm->local_endpoints[lepi];
+ ASSERT (lep->refcnt >= 1);
+
+ /* Local endpoint no longer in use, program cleanup */
+ if (!clib_atomic_sub_fetch (&lep->refcnt, 1))
{
- lep = pool_elt_at_index (local_endpoints, lepi);
- if (!clib_atomic_sub_fetch (&lep->refcnt, 1))
- {
- transport_endpoint_table_del (&local_endpoints_table, proto,
- &lep->ep);
- transport_endpoint_del (lepi);
- }
+ transport_program_endpoint_cleanup (lepi);
+ return 0;
}
+
+ /* Not an error, just in idication that endpoint was not cleaned up */
+ return -1;
}
-static void
-transport_endpoint_mark_used (u8 proto, ip46_address_t * ip, u16 port)
+static int
+transport_endpoint_mark_used (u8 proto, ip46_address_t *ip, u16 port)
{
+ transport_main_t *tm = &tp_main;
local_endpoint_t *lep;
- clib_spinlock_lock_if_init (&local_endpoints_lock);
- lep = transport_endpoint_new ();
+ u32 tei;
+
+ ASSERT (vlib_get_thread_index () <= transport_cl_thread ());
+
+ tei =
+ transport_endpoint_lookup (&tm->local_endpoints_table, proto, ip, port);
+ if (tei != ENDPOINT_INVALID_INDEX)
+ return SESSION_E_PORTINUSE;
+
+ /* Pool reallocs with worker barrier */
+ lep = transport_endpoint_alloc ();
clib_memcpy_fast (&lep->ep.ip, ip, sizeof (*ip));
lep->ep.port = port;
+ lep->proto = proto;
lep->refcnt = 1;
- transport_endpoint_table_add (&local_endpoints_table, proto, &lep->ep,
- lep - local_endpoints);
- clib_spinlock_unlock_if_init (&local_endpoints_lock);
+
+ transport_endpoint_table_add (&tm->local_endpoints_table, proto, &lep->ep,
+ lep - tm->local_endpoints);
+
+ return 0;
}
void
transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip, u16 port)
{
+ transport_main_t *tm = &tp_main;
local_endpoint_t *lep;
u32 lepi;
- lepi = transport_endpoint_lookup (&local_endpoints_table, proto, lcl_ip,
- clib_net_to_host_u16 (port));
+ /* Active opens should call this only from a control thread, which are also
+ * used to allocate and free ports. So, pool has only one writer and
+ * potentially many readers. Listeners are allocated with barrier */
+ lepi = transport_endpoint_lookup (&tm->local_endpoints_table, proto, lcl_ip,
+ port);
if (lepi != ENDPOINT_INVALID_INDEX)
{
- lep = pool_elt_at_index (local_endpoints, lepi);
+ lep = pool_elt_at_index (tm->local_endpoints, lepi);
clib_atomic_add_fetch (&lep->refcnt, 1);
}
}
@@ -488,18 +596,22 @@ transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip, u16 port)
/**
* Allocate local port and add if successful add entry to local endpoint
* table to mark the pair as used.
+ *
+ * @return port in net order or -1 if port cannot be allocated
*/
int
-transport_alloc_local_port (u8 proto, ip46_address_t * ip)
+transport_alloc_local_port (u8 proto, ip46_address_t *lcl_addr,
+ transport_endpoint_cfg_t *rmt)
{
- u16 min = 1024, max = 65535; /* XXX configurable ? */
+ transport_main_t *tm = &tp_main;
+ u16 min = tm->port_allocator_min_src_port;
+ u16 max = tm->port_allocator_max_src_port;
int tries, limit;
- u32 tei;
limit = max - min;
- /* Only support active opens from thread 0 */
- ASSERT (vlib_get_thread_index () == 0);
+ /* Only support active opens from one of ctrl threads */
+ ASSERT (vlib_get_thread_index () <= transport_cl_thread ());
/* Search for first free slot */
for (tries = 0; tries < limit; tries++)
@@ -509,19 +621,26 @@ transport_alloc_local_port (u8 proto, ip46_address_t * ip)
/* Find a port in the specified range */
while (1)
{
- port = random_u32 (&port_allocator_seed) & PORT_MASK;
+ port = random_u32 (&tm->port_allocator_seed) & PORT_MASK;
if (PREDICT_TRUE (port >= min && port < max))
- break;
+ {
+ port = clib_host_to_net_u16 (port);
+ break;
+ }
}
- /* Look it up. If not found, we're done */
- tei = transport_endpoint_lookup (&local_endpoints_table, proto, ip,
- port);
- if (tei == ENDPOINT_INVALID_INDEX)
- {
- transport_endpoint_mark_used (proto, ip, port);
- return port;
- }
+ if (!transport_endpoint_mark_used (proto, lcl_addr, port))
+ return port;
+
+ /* IP:port pair already in use, check if 6-tuple available */
+ if (session_lookup_connection (rmt->fib_index, lcl_addr, &rmt->ip, port,
+ rmt->port, proto, rmt->is_ip4))
+ continue;
+
+ /* 6-tuple is available so increment lcl endpoint refcount */
+ transport_share_local_endpoint (proto, lcl_addr, port);
+
+ return port;
}
return -1;
}
@@ -584,9 +703,9 @@ transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt_cfg,
ip46_address_t * lcl_addr, u16 * lcl_port)
{
transport_endpoint_t *rmt = (transport_endpoint_t *) rmt_cfg;
+ transport_main_t *tm = &tp_main;
session_error_t error;
int port;
- u32 tei;
/*
* Find the local address
@@ -605,26 +724,37 @@ transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt_cfg,
sizeof (rmt_cfg->peer.ip));
}
+ /* Cleanup freelist if need be */
+ if (vec_len (tm->lcl_endpts_freelist))
+ transport_cleanup_freelist ();
+
/*
* Allocate source port
*/
if (rmt_cfg->peer.port == 0)
{
- port = transport_alloc_local_port (proto, lcl_addr);
+ port = transport_alloc_local_port (proto, lcl_addr, rmt_cfg);
if (port < 1)
return SESSION_E_NOPORT;
*lcl_port = port;
}
else
{
- port = clib_net_to_host_u16 (rmt_cfg->peer.port);
- *lcl_port = port;
- tei = transport_endpoint_lookup (&local_endpoints_table, proto,
- lcl_addr, port);
- if (tei != ENDPOINT_INVALID_INDEX)
+ *lcl_port = rmt_cfg->peer.port;
+
+ if (!transport_endpoint_mark_used (proto, lcl_addr, rmt_cfg->peer.port))
+ return 0;
+
+ /* IP:port pair already in use, check if 6-tuple available */
+ if (session_lookup_connection (rmt->fib_index, lcl_addr, &rmt->ip,
+ rmt_cfg->peer.port, rmt->port, proto,
+ rmt->is_ip4))
return SESSION_E_PORTINUSE;
- transport_endpoint_mark_used (proto, lcl_addr, port);
+ /* 6-tuple is available so increment lcl endpoint refcount */
+ transport_share_local_endpoint (proto, lcl_addr, rmt_cfg->peer.port);
+
+ return 0;
}
return 0;
@@ -660,15 +790,15 @@ static inline u32
spacer_max_burst (spacer_t * pacer, clib_us_time_t time_now)
{
u64 n_periods = (time_now - pacer->last_update);
- u64 inc;
+ i64 inc;
if ((inc = (f32) n_periods * pacer->tokens_per_period) > 10)
{
pacer->last_update = time_now;
- pacer->bucket = clib_min (pacer->bucket + inc, pacer->max_burst);
+ pacer->bucket = clib_min (pacer->bucket + inc, (i64) pacer->max_burst);
}
- return pacer->bucket > 0 ? pacer->max_burst : 0;
+ return pacer->bucket >= 0 ? pacer->max_burst : 0;
}
static inline void
@@ -790,7 +920,7 @@ void
transport_connection_reschedule (transport_connection_t * tc)
{
tc->flags &= ~TRANSPORT_CONNECTION_F_DESCHED;
- transport_connection_tx_pacer_reset_bucket (tc, TRANSPORT_PACER_MIN_BURST);
+ transport_connection_tx_pacer_reset_bucket (tc, 0 /* bucket */);
if (transport_max_tx_dequeue (tc))
sesssion_reschedule_tx (tc);
else
@@ -830,6 +960,9 @@ transport_enable_disable (vlib_main_t * vm, u8 is_en)
{
if (vft->enable)
(vft->enable) (vm, is_en);
+
+ if (vft->update_time)
+ session_register_update_time_fn (vft->update_time, is_en);
}
}
@@ -838,6 +971,7 @@ transport_init (void)
{
vlib_thread_main_t *vtm = vlib_get_thread_main ();
session_main_t *smm = vnet_get_session_main ();
+ transport_main_t *tm = &tp_main;
u32 num_threads;
if (smm->local_endpoints_table_buckets == 0)
@@ -846,15 +980,18 @@ transport_init (void)
smm->local_endpoints_table_memory = 512 << 20;
/* Initialize [port-allocator] random number seed */
- port_allocator_seed = (u32) clib_cpu_time_now ();
+ tm->port_allocator_seed = (u32) clib_cpu_time_now ();
+ tm->port_allocator_min_src_port = smm->port_allocator_min_src_port;
+ tm->port_allocator_max_src_port = smm->port_allocator_max_src_port;
- clib_bihash_init_24_8 (&local_endpoints_table, "local endpoints table",
+ clib_bihash_init_24_8 (&tm->local_endpoints_table, "local endpoints table",
smm->local_endpoints_table_buckets,
smm->local_endpoints_table_memory);
+ clib_spinlock_init (&tm->local_endpoints_lock);
+
num_threads = 1 /* main thread */ + vtm->n_threads;
if (num_threads > 1)
{
- clib_spinlock_init (&local_endpoints_lock);
/* Main not polled if there are workers */
smm->transport_cl_thread = 1;
}
diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h
index 2cfec06ec94..e6ba1ecbc5f 100644
--- a/src/vnet/session/transport.h
+++ b/src/vnet/session/transport.h
@@ -57,6 +57,7 @@ typedef struct transport_send_params_
struct
{
u32 max_burst_size;
+ u32 bytes_dequeued;
};
};
transport_snd_flags_t flags;
@@ -65,13 +66,12 @@ typedef struct transport_send_params_
/*
* Transport protocol virtual function table
*/
-/* *INDENT-OFF* */
typedef struct _transport_proto_vft
{
/*
* Setup
*/
- u32 (*start_listen) (u32 session_index, transport_endpoint_t * lcl);
+ u32 (*start_listen) (u32 session_index, transport_endpoint_cfg_t *lcl);
u32 (*stop_listen) (u32 conn_index);
int (*connect) (transport_endpoint_cfg_t * rmt);
void (*half_close) (u32 conn_index, u32 thread_index);
@@ -85,7 +85,8 @@ typedef struct _transport_proto_vft
* Transmission
*/
- u32 (*push_header) (transport_connection_t * tconn, vlib_buffer_t * b);
+ u32 (*push_header) (transport_connection_t *tconn, vlib_buffer_t **b,
+ u32 n_bufs);
int (*send_params) (transport_connection_t * tconn,
transport_send_params_t *sp);
void (*update_time) (f64 time_now, u8 thread_index);
@@ -123,7 +124,6 @@ typedef struct _transport_proto_vft
*/
transport_options_t transport_options;
} transport_proto_vft_t;
-/* *INDENT-ON* */
extern transport_proto_vft_t *tp_vfts;
@@ -138,7 +138,7 @@ void transport_half_close (transport_proto_t tp, u32 conn_index,
void transport_close (transport_proto_t tp, u32 conn_index, u8 thread_index);
void transport_reset (transport_proto_t tp, u32 conn_index, u8 thread_index);
u32 transport_start_listen (transport_proto_t tp, u32 session_index,
- transport_endpoint_t * tep);
+ transport_endpoint_cfg_t *tep);
u32 transport_stop_listen (transport_proto_t tp, u32 conn_index);
void transport_cleanup (transport_proto_t tp, u32 conn_index,
u8 thread_index);
@@ -244,13 +244,14 @@ transport_register_new_protocol (const transport_proto_vft_t * vft,
transport_proto_vft_t *transport_protocol_get_vft (transport_proto_t tp);
void transport_update_time (clib_time_type_t time_now, u8 thread_index);
-int transport_alloc_local_port (u8 proto, ip46_address_t * ip);
-int transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt,
- ip46_address_t * lcl_addr,
- u16 * lcl_port);
+int transport_alloc_local_port (u8 proto, ip46_address_t *ip,
+ transport_endpoint_cfg_t *rmt);
+int transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t *rmt,
+ ip46_address_t *lcl_addr, u16 *lcl_port);
void transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip,
u16 port);
-void transport_endpoint_cleanup (u8 proto, ip46_address_t * lcl_ip, u16 port);
+int transport_release_local_endpoint (u8 proto, ip46_address_t *lcl_ip,
+ u16 port);
void transport_enable_disable (vlib_main_t * vm, u8 is_en);
void transport_init (void);
@@ -327,6 +328,19 @@ transport_connection_is_tx_paced (transport_connection_t * tc)
return (tc->flags & TRANSPORT_CONNECTION_F_IS_TX_PACED);
}
+/**
+ * Clear descheduled flag and update pacer if needed
+ *
+ * To add session to scheduler use @ref transport_connection_reschedule
+ */
+always_inline void
+transport_connection_clear_descheduled (transport_connection_t *tc)
+{
+ tc->flags &= ~TRANSPORT_CONNECTION_F_DESCHED;
+ if (transport_connection_is_tx_paced (tc))
+ transport_connection_tx_pacer_reset_bucket (tc, 0 /* bucket */);
+}
+
u8 *format_transport_pacer (u8 * s, va_list * args);
/**
diff --git a/src/vnet/session/transport_types.h b/src/vnet/session/transport_types.h
index 9ea1f2102b4..b3469fa9fdb 100644
--- a/src/vnet/session/transport_types.h
+++ b/src/vnet/session/transport_types.h
@@ -21,10 +21,8 @@
#include <vnet/tcp/tcp_debug.h>
#include <vppinfra/bihash_24_8.h>
-
#define TRANSPORT_MAX_HDRS_LEN 140 /* Max number of bytes for headers */
-
typedef enum transport_dequeue_type_
{
TRANSPORT_TX_PEEK, /**< reliable transport protos */
@@ -42,24 +40,35 @@ typedef enum transport_service_type_
TRANSPORT_N_SERVICES
} transport_service_type_t;
+/*
+ * IS_TX_PACED : Connection sending is paced
+ * NO_LOOKUP: Don't register connection in lookup. Does not apply to local
+ * apps and transports using the network layer (udp/tcp)
+ * DESCHED: Connection descheduled by the session layer
+ * CLESS: Connection is "connection less". Some important implications of that
+ * are that connections are not pinned to workers and listeners will
+ * have fifos associated to them
+ */
+#define foreach_transport_connection_flag \
+ _ (IS_TX_PACED, "tx_paced") \
+ _ (NO_LOOKUP, "no_lookup") \
+ _ (DESCHED, "descheduled") \
+ _ (CLESS, "connectionless")
+
+typedef enum transport_connection_flags_bits_
+{
+#define _(sym, str) TRANSPORT_CONNECTION_F_BIT_##sym,
+ foreach_transport_connection_flag
+#undef _
+ TRANSPORT_CONNECTION_N_FLAGS
+} transport_connection_flags_bits_t;
+
typedef enum transport_connection_flags_
{
- TRANSPORT_CONNECTION_F_IS_TX_PACED = 1 << 0,
- /**
- * Don't register connection in lookup. Does not apply to local apps
- * and transports using the network layer (udp/tcp)
- */
- TRANSPORT_CONNECTION_F_NO_LOOKUP = 1 << 1,
- /**
- * Connection descheduled by the session layer.
- */
- TRANSPORT_CONNECTION_F_DESCHED = 1 << 2,
- /**
- * Connection is "connection less". Some important implications of that
- * are that connections are not pinned to workers and listeners will
- * have fifos associated to them
- */
- TRANSPORT_CONNECTION_F_CLESS = 1 << 3,
+#define _(sym, str) \
+ TRANSPORT_CONNECTION_F_##sym = 1 << TRANSPORT_CONNECTION_F_BIT_##sym,
+ foreach_transport_connection_flag
+#undef _
} transport_connection_flags_t;
typedef struct _spacer
@@ -106,6 +115,7 @@ typedef struct _transport_connection
u32 c_index; /**< Connection index in transport pool */
u32 thread_index; /**< Worker-thread index */
u8 flags; /**< Transport specific flags */
+ u8 dscp; /**< Differentiated Services Code Point */
/*fib_node_index_t rmt_fei;
dpo_id_t rmt_dpo; */
@@ -114,7 +124,7 @@ typedef struct _transport_connection
#if TRANSPORT_DEBUG
elog_track_t elog_track; /**< Event logging */
- u32 cc_stat_tstamp; /**< CC stats timestamp */
+ f64 cc_stat_tstamp; /**< CC stats timestamp */
#endif
/**
@@ -146,6 +156,7 @@ typedef struct _transport_connection
#define c_stats connection.stats
#define c_pacer connection.pacer
#define c_flags connection.flags
+#define c_dscp connection.dscp
#define s_ho_handle pacer.bytes_per_sec
} transport_connection_t;
@@ -164,7 +175,8 @@ STATIC_ASSERT (sizeof (transport_connection_t) <= 128,
_ (TLS, "tls", "J") \
_ (QUIC, "quic", "Q") \
_ (DTLS, "dtls", "D") \
- _ (SRTP, "srtp", "R")
+ _ (SRTP, "srtp", "R") \
+ _ (HTTP, "http", "H")
typedef enum _transport_proto
{
@@ -175,6 +187,7 @@ typedef enum _transport_proto
u8 *format_transport_proto (u8 * s, va_list * args);
u8 *format_transport_proto_short (u8 * s, va_list * args);
+u8 *format_transport_flags (u8 *s, va_list *args);
u8 *format_transport_connection (u8 * s, va_list * args);
u8 *format_transport_listen_connection (u8 * s, va_list * args);
u8 *format_transport_half_open_connection (u8 * s, va_list * args);
@@ -209,6 +222,7 @@ typedef enum transport_endpt_cfg_flags_
_ (u32, next_node_index) \
_ (u32, next_node_opaque) \
_ (u16, mss) \
+ _ (u8, dscp) \
_ (u8, transport_flags) \
/* clang-format on */
diff --git a/src/vnet/snap/node.c b/src/vnet/snap/node.c
index 2a42907321c..ad88b2b3a90 100644
--- a/src/vnet/snap/node.c
+++ b/src/vnet/snap/node.c
@@ -261,7 +261,6 @@ static char *snap_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (snap_input_node) = {
.function = snap_input,
.name = "snap-input",
@@ -282,7 +281,6 @@ VLIB_REGISTER_NODE (snap_input_node) = {
.format_trace = format_snap_input_trace,
.unformat_buffer = unformat_snap_header,
};
-/* *INDENT-ON* */
static void
snap_setup_node (vlib_main_t *vm, u32 node_index)
diff --git a/src/vnet/snap/snap.h b/src/vnet/snap/snap.h
index f6b3be1847f..028df4ede66 100644
--- a/src/vnet/snap/snap.h
+++ b/src/vnet/snap/snap.h
@@ -75,7 +75,6 @@ typedef enum
typedef union
{
- /* *INDENT-OFF* */
CLIB_PACKED (struct {
/* OUI: organization unique identifier. */
u8 oui[3];
@@ -83,7 +82,6 @@ typedef union
/* Per-OUI protocol. */
u16 protocol;
});
- /* *INDENT-ON* */
u8 as_u8[5];
} snap_header_t;
diff --git a/src/vnet/span/node.c b/src/vnet/span/node.c
index ca5ea68ae90..56977b58dc2 100644
--- a/src/vnet/span/node.c
+++ b/src/vnet/span/node.c
@@ -84,7 +84,6 @@ span_mirror (vlib_main_t * vm, vlib_node_runtime_t * node, u32 sw_if_index0,
if (PREDICT_FALSE (b0->flags & VNET_BUFFER_F_SPAN_CLONE))
return;
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, sm0->mirror_ports)
{
if (mirror_frames[i] == 0)
@@ -122,7 +121,6 @@ span_mirror (vlib_main_t * vm, vlib_node_runtime_t * node, u32 sw_if_index0,
}
}
}
- /* *INDENT-ON* */
}
static_always_inline uword
@@ -304,7 +302,6 @@ VLIB_NODE_FN (span_l2_output_node) (vlib_main_t * vm,
[0] = "error-drop" \
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (span_input_node) = {
span_node_defs,
.name = "span-input",
@@ -349,7 +346,6 @@ clib_error_t *span_init (vlib_main_t * vm)
}
VLIB_INIT_FUNCTION (span_init);
-/* *INDENT-ON* */
#endif /* CLIB_MARCH_VARIANT */
#undef span_node_defs
diff --git a/src/vnet/span/span.c b/src/vnet/span/span.c
index ec47920504a..bf5e20f4d14 100644
--- a/src/vnet/span/span.c
+++ b/src/vnet/span/span.c
@@ -87,6 +87,9 @@ span_add_delete_entry (vlib_main_t * vm,
if (enable_rx || disable_rx)
vnet_feature_enable_disable ("device-input", "span-input",
src_sw_if_index, rx, 0, 0);
+ if (enable_rx || disable_rx)
+ vnet_feature_enable_disable ("port-rx-eth", "span-input",
+ src_sw_if_index, rx, 0, 0);
if (enable_tx || disable_tx)
vnet_feature_enable_disable ("interface-output", "span-output",
src_sw_if_index, tx, 0, 0);
@@ -163,13 +166,11 @@ set_interface_span_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_span_command, static) = {
.path = "set interface span",
.short_help = "set interface span <if-name> [l2] {disable | destination <if-name> [both|rx|tx]}",
.function = set_interface_span_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_interfaces_span_command_fn (vlib_main_t * vm,
@@ -188,7 +189,6 @@ show_interfaces_span_command_fn (vlib_main_t * vm,
};
u8 *s = 0;
- /* *INDENT-OFF* */
vec_foreach (si, sm->interfaces)
{
span_mirror_t * drxm = &si->mirror_rxtx[SPAN_FEAT_DEVICE][VLIB_RX];
@@ -229,18 +229,15 @@ show_interfaces_span_command_fn (vlib_main_t * vm,
clib_bitmap_free (d);
}
}
- /* *INDENT-ON* */
vec_free (s);
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_interfaces_span_command, static) = {
.path = "show interface span",
.short_help = "Shows SPAN mirror table",
.function = show_interfaces_span_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/span/span_api.c b/src/vnet/span/span_api.c
index 300f619934e..f5b24bdf214 100644
--- a/src/vnet/span/span_api.c
+++ b/src/vnet/span/span_api.c
@@ -61,7 +61,6 @@ vl_api_sw_interface_span_dump_t_handler (vl_api_sw_interface_span_dump_t * mp)
return;
span_feat_t sf = mp->is_l2 ? SPAN_FEAT_L2 : SPAN_FEAT_DEVICE;
- /* *INDENT-OFF* */
vec_foreach (si, sm->interfaces)
{
span_mirror_t * rxm = &si->mirror_rxtx[sf][VLIB_RX];
@@ -90,7 +89,6 @@ vl_api_sw_interface_span_dump_t_handler (vl_api_sw_interface_span_dump_t * mp)
clib_bitmap_free (b);
}
}
- /* *INDENT-ON* */
}
#include <vnet/span/span.api.c>
diff --git a/src/vnet/srmpls/FEATURE.yaml b/src/vnet/srmpls/FEATURE.yaml
deleted file mode 100644
index c5b958224c7..00000000000
--- a/src/vnet/srmpls/FEATURE.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
----
-name: Segment Routing for MPLS
-maintainer: Pablo Camarillo <pcamaril@cisco.com>
-features:
- - SR Policy support
- - Automated steering (SR steering based on NextHop/Color)
-description: "SR-MPLS"
-state: production
-properties: [API, CLI, MULTITHREAD]
diff --git a/src/vnet/srmpls/dir.dox b/src/vnet/srmpls/dir.dox
deleted file mode 100755
index 76ec1d6a41b..00000000000
--- a/src/vnet/srmpls/dir.dox
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *
- * Copyright (c) 2013 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- @dir
- @brief Segment Routing MPLS code
-
- An implementation of Segment Routing for the MPLS dataplane.
-
-*/ \ No newline at end of file
diff --git a/src/vnet/srmpls/sr_doc.rst b/src/vnet/srmpls/sr_doc.rst
deleted file mode 100644
index ed847fa0d42..00000000000
--- a/src/vnet/srmpls/sr_doc.rst
+++ /dev/null
@@ -1,215 +0,0 @@
-.. _srmpls_doc:
-
-SR-MPLS: Segment Routing for MPLS
-=================================
-
-This is a memo intended to contain documentation of the VPP SR-MPLS
-implementation. Everything that is not directly obvious should come
-here. For any feedback on content that should be explained please
-mailto:pcamaril@cisco.com
-
-Segment Routing
----------------
-
-Segment routing is a network technology focused on addressing the
-limitations of existing IP and Multiprotocol Label Switching (MPLS)
-networks in terms of simplicity, scale, and ease of operation. It is a
-foundation for application engineered routing as it prepares the
-networks for new business models where applications can control the
-network behavior.
-
-Segment routing seeks the right balance between distributed intelligence
-and centralized optimization and programming. It was built for the
-software-defined networking (SDN) era.
-
-Segment routing enhances packet forwarding behavior by enabling a
-network to transport unicast packets through a specific forwarding path,
-different from the normal path that a packet usually takes (IGP shortest
-path or BGP best path). This capability benefits many use cases, and one
-can build those specific paths based on application requirements.
-
-Segment routing uses the source routing paradigm. A node, usually a
-router but also a switch, a trusted server, or a virtual forwarder
-running on a hypervisor, steers a packet through an ordered list of
-instructions, called segments. A segment can represent any instruction,
-topological or service-based. A segment can have a local semantic to a
-segment-routing node or global within a segment-routing network. Segment
-routing allows an operator to enforce a flow through any topological
-path and service chain while maintaining per-flow state only at the
-ingress node to the segment-routing network. Segment routing also
-supports equal-cost multipath (ECMP) by design.
-
-Segment routing can operate with either an MPLS or an IPv6 data plane.
-All the currently available MPLS services, such as Layer 3 VPN (L3VPN),
-L2VPN (Virtual Private Wire Service [VPWS], Virtual Private LAN Services
-[VPLS], Ethernet VPN [E-VPN], and Provider Backbone Bridging Ethernet
-VPN [PBB-EVPN]), can run on top of a segment-routing transport network.
-
-**The implementation of Segment Routing in VPP covers both the IPv6 data
-plane (SRv6) as well as the MPLS data plane (SR-MPLS). This page
-contains the SR-MPLS documentation.**
-
-Segment Routing terminology
----------------------------
-
-- SegmentID (SID): is an MPLS label.
-- Segment List (SL) (SID List): is the sequence of SIDs that the packet
- will traverse.
-- SR Policy: is a set of candidate paths (SID list+weight). An SR
- policy is uniquely identified by its Binding SID and associated with
- a weighted set of Segment Lists. In case several SID lists are
- defined, traffic steered into the policy is unevenly load-balanced
- among them according to their respective weights.
-- BindingSID: a BindingSID is a SID (only one) associated one-one with
- an SR Policy. If a packet arrives with MPLS label corresponding to a
- BindingSID, then the SR policy will be applied to such packet.
- (BindingSID is popped first.)
-
-SR-MPLS features in VPP
------------------------
-
-The SR-MPLS implementation is focused on the SR policies, as well on its
-steering. Others SR-MPLS features, such as for example AdjSIDs, can be
-achieved using the regular VPP MPLS implementation.
-
-The Segment Routing Policy
-(*draft-filsfils-spring-segment-routing-policy*) defines SR Policies.
-
-Creating a SR Policy
---------------------
-
-An SR Policy is defined by a Binding SID and a weighted set of Segment
-Lists.
-
-A new SR policy is created with a first SID list using:
-
-::
-
- sr mpls policy add bsid 40001 next 16001 next 16002 next 16003 (weight 5)
-
-- The weight parameter is only used if more than one SID list is
- associated with the policy.
-
-An SR policy is deleted with:
-
-::
-
- sr mpls policy del bsid 40001
-
-The existing SR policies are listed with:
-
-::
-
- show sr mpls policies
-
-Adding/Removing SID Lists from an SR policy
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-An additional SID list is associated with an existing SR policy with:
-
-::
-
- sr mpls policy mod bsid 40001 add sl next 16001 next 16002 next 16003 (weight 3)
-
-Conversely, a SID list can be removed from an SR policy with:
-
-::
-
- sr mpls policy mod bsid 4001 del sl index 1
-
-Note that this CLI cannot be used to remove the last SID list of a
-policy. Instead the SR policy delete CLI must be used.
-
-The weight of a SID list can also be modified with:
-
-::
-
- sr mpls policy mod bsid 40001 mod sl index 1 weight 4
-
-SR Policies: Spray policies
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Spray policies are a specific type of SR policies where the packet is
-replicated on all the SID lists, rather than load-balanced among them.
-
-SID list weights are ignored with this type of policies.
-
-A Spray policy is instantiated by appending the keyword **spray** to a
-regular SR-MPLS policy command, as in:
-
-::
-
- sr mpls policy add bsid 40002 next 16001 next 16002 next 16003 spray
-
-Spray policies are used for removing multicast state from a network core
-domain, and instead send a linear unicast copy to every access node. The
-last SID in each list accesses the multicast tree within the access
-node.
-
-Steering packets into a SR Policy
----------------------------------
-
-Segment Routing supports three methods of steering traffic into an SR
-policy.
-
-Local steering
-~~~~~~~~~~~~~~
-
-In this variant incoming packets match a routing policy which directs
-them on a local SR policy.
-
-In order to achieve this behavior the user needs to create an ‘sr
-steering policy via sr policy bsid’.
-
-::
-
- sr mpls steer l3 2001::/64 via sr policy bsid 40001
- sr mpls steer l3 2001::/64 via sr policy bsid 40001 fib-table 3
- sr mpls steer l3 10.0.0.0/16 via sr policy bsid 40001
- sr mpls steer l3 10.0.0.0/16 via sr policy bsid 40001 vpn-label 500
-
-Remote steering
-~~~~~~~~~~~~~~~
-
-In this variant incoming packets have an active SID matching a local
-BSID at the head-end.
-
-In order to achieve this behavior the packets should simply arrive with
-an active SID equal to the Binding SID of a locally instantiated SR
-policy.
-
-Automated steering
-~~~~~~~~~~~~~~~~~~
-
-In this variant incoming packets match a BGP/Service route which
-recurses on the BSID of a local policy.
-
-In order to achieve this behavior the user first needs to color the SR
-policies. He can do so by using the CLI:
-
-::
-
- sr mpls policy te bsid xxxxx endpoint x.x.x.x color 12341234
-
-Notice that an SR policy can have a single endpoint and a single color.
-Notice that the *endpoint* value is an IP46 address and the color a u32.
-
-Then, for any BGP/Service route the user has to use the API to steer
-prefixes:
-
-::
-
- sr steer l3 2001::/64 via next-hop 2001::1 color 1234 co 2
- sr steer l3 2001::/64 via next-hop 2001::1 color 1234 co 2 vpn-label 500
-
-Notice that *co* refers to the CO-bits (values [0|1|2|3]).
-
-Notice also that a given prefix might be steered over several colors
-(same next-hop and same co-bit value). In order to add new colors just
-execute the API several times (or with the del parameter to delete the
-color).
-
-This variant is meant to be used in conjunction with a control plane
-agent that uses the underlying binary API bindings of
-*sr_mpls_steering_policy_add*/*sr_mpls_steering_policy_del* for any BGP
-service route received.
diff --git a/src/vnet/srmpls/sr_mpls.api b/src/vnet/srmpls/sr_mpls.api
deleted file mode 100644
index 742f135d493..00000000000
--- a/src/vnet/srmpls/sr_mpls.api
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2015-2016 Cisco and/or its affiliates. Licensed under the
- * Apache License, Version 2.0 (the "License"); you may not use this file
- * except in compliance with the License. You may obtain a copy of the
- * License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-
-option version = "3.0.0";
-
-import "vnet/interface_types.api";
-import "vnet/ip/ip_types.api";
-import "vnet/srv6/sr_types.api";
-
-/** \brief MPLS SR policy add
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param bsid - is the bindingSID of the SR Policy. MPLS label (20bit)
- @param weight - is the weight of the sid list. optional.
- @param is_spray - is the type of the SR policy. (0.Default // 1.Spray)
- @param segments - vector of labels (20bit) composing the segment list
-*/
-autoreply define sr_mpls_policy_add
-{
- u32 client_index;
- u32 context;
- u32 bsid;
- u32 weight;
- bool is_spray;
- u8 n_segments;
- u32 segments[n_segments];
-};
-
-/** \brief MPLS SR policy modification
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param bsid is the bindingSID of the SR Policy. MPLS label (20bit)
- @param sr_policy_index is the index of the SR policy
- @param fib_table is the VRF where to install the FIB entry for the BSID
- @param operation is the operation to perform (among the top ones)
- @param segments is a vector of MPLS labels composing the segment list
- @param sl_index is the index of the Segment List to modify/delete
- @param weight is the weight of the sid list. optional.
- @param is_encap Mode. Encapsulation or SRH insertion.
-*/
-autoreply define sr_mpls_policy_mod
-{
- u32 client_index;
- u32 context;
- u32 bsid;
- vl_api_sr_policy_op_t operation;
- u32 sl_index;
- u32 weight;
- u8 n_segments;
- u32 segments[n_segments];
-};
-
-/** \brief MPLS SR policy deletion
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param bsid is the bindingSID of the SR Policy. MPLS label (20bit)
-*/
-autoreply define sr_mpls_policy_del
-{
- u32 client_index;
- u32 context;
- u32 bsid;
-};
-
-/** \brief MPLS SR steering add/del
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param is_del
- @param bsid - is the bindingSID of the SR Policy (~0 is no bsid)
- @param table_id - is the VRF where to install the FIB entry for the BSID
- @param prefix - is the IPv4/v6 address for L3 traffic type.
- @param mask_width - is the mask for L3 traffic type
- @param next_hop - describes the next_hop (in case no BSID)
- @param color - describes the color
- @param co_bits - are the CO_bits of the steering policy
- @param vpn_label - is an additonal last VPN label. (~0 is no label)
-*/
-autoreply define sr_mpls_steering_add_del
-{
- u32 client_index;
- u32 context;
- bool is_del[default = false];
- u32 bsid;
- u32 table_id;
- vl_api_prefix_t prefix;
- u32 mask_width;
- vl_api_address_t next_hop;
- u32 color;
- u8 co_bits;
- u32 vpn_label;
-};
-
-/** \brief MPLS SR steering add/del
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param bsid is the bindingSID of the SR Policy
- @param endpoint is the endpoint of the SR policy
- @param color is the color of the sr policy
-*/
-autoreply define sr_mpls_policy_assign_endpoint_color
-{
- u32 client_index;
- u32 context;
- u32 bsid;
- vl_api_address_t endpoint;
- u32 color;
-};
-
-/*
- * fd.io coding-style-patch-verification: ON Local Variables: eval:
- * (c-set-style "gnu") End:
- */
diff --git a/src/vnet/srmpls/sr_mpls.h b/src/vnet/srmpls/sr_mpls.h
deleted file mode 100644
index a8f9494428f..00000000000
--- a/src/vnet/srmpls/sr_mpls.h
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates. Licensed under the Apache
- * License, Version 2.0 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-
-/**
- * @file
- * @brief Segment Routing MPLS data structures definitions
- *
- */
-
-#ifndef included_vnet_srmpls_h
-#define included_vnet_srmpls_h
-
-#include <vnet/vnet.h>
-#include <vnet/mpls/packet.h>
-#include <vnet/fib/mpls_fib.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ip/lookup.h>
-#include <vnet/dpo/dpo.h>
-#include <vnet/dpo/replicate_dpo.h>
-
-#include <stdlib.h>
-#include <string.h>
-
-/* SR policy types */
-#define SR_POLICY_TYPE_DEFAULT 0
-#define SR_POLICY_TYPE_SPRAY 1
-
-#define SR_SEGMENT_LIST_WEIGHT_DEFAULT 1
-
-#define SR_STEER_IPV4 4
-#define SR_STEER_IPV6 6
-
-#define SR_TE_CO_BITS_00 0
-#define SR_TE_CO_BITS_01 1
-#define SR_TE_CO_BITS_10 2
-#define SR_TE_CO_BITS_11 3
-
-/**
- * @brief SR Segment List (SID list)
- */
-typedef struct
-{
- /* SIDs (key) */
- mpls_label_t *segments;
-
- /* SID list weight (wECMP / UCMP) */
- u32 weight;
-
-} mpls_sr_sl_t;
-
-typedef struct
-{
- u32 *segments_lists; /**< Pool of SID lists indexes */
-
- mpls_label_t bsid; /**< BindingSID (key) */
-
- u8 type; /**< Type (default is 0) */
- /* SR Policy specific DPO */
- /* IF Type = DEFAULT Then Load-Balancer DPO among SID lists */
- /* IF Type = SPRAY then Spray DPO with all SID lists */
-
- ip46_address_t endpoint; /**< Optional NH for SR TE */
- u8 endpoint_type;
- u32 color; /**< Optional color for SR TE */
-} mpls_sr_policy_t;
-
-/**
- * @brief Steering db key
- *
- * L3 is IPv4/IPv6 + mask
- */
-typedef struct
-{
- ip46_address_t prefix; /**< IP address of the prefix */
- u32 mask_width; /**< Mask width of the prefix */
- u32 fib_table; /**< VRF of the prefix */
- u8 traffic_type; /**< Traffic type (IPv4, IPv6, L2) */
- u8 padding[3];
-} sr_mpls_steering_key_t;
-
-typedef struct
-{
- sr_mpls_steering_key_t classify; /**< Traffic classification */
- mpls_label_t bsid; /**< SR Policy index */
- ip46_address_t next_hop; /**< SR TE NH */
- char nh_type;
- u32 *color; /**< Vector of SR TE colors */
- char co_bits; /**< Color-Only bits */
- mpls_label_t vpn_label;
-} mpls_sr_steering_policy_t;
-
-/**
- * @brief Segment Routing main datastructure
- */
-typedef struct
-{
- /* SR SID lists */
- mpls_sr_sl_t *sid_lists;
-
- /* SR MPLS policies */
- mpls_sr_policy_t *sr_policies;
-
- /* Hash table mapping BindingSID to SR MPLS policy */
- uword *sr_policies_index_hash;
-
- /* Pool of SR steer policies instances */
- mpls_sr_steering_policy_t *steer_policies;
-
- /* MHash table mapping steering rules to SR steer instance */
- mhash_t sr_steer_policies_hash;
-
- /** SR TE **/
- /* Hash table mapping (Color->Endpoint->BSID) for SR policies */
- mhash_t sr_policies_c2e2eclabel_hash;
- /* SR TE (internal) fib table (Endpoint, Color) */
- u32 fib_table_EC;
- /* Pool of (Endpoint, Color) hidden labels */
- u32 *ec_labels;
-
- /* convenience */
- vlib_main_t *vlib_main;
- vnet_main_t *vnet_main;
-} mpls_sr_main_t;
-
-extern mpls_sr_main_t sr_mpls_main;
-
-extern int
-sr_mpls_policy_add (mpls_label_t bsid, mpls_label_t * segments,
- u8 behavior, u32 weight);
-
-extern int
-sr_mpls_policy_mod (mpls_label_t bsid, u8 operation,
- mpls_label_t * segments, u32 sl_index, u32 weight);
-
-extern int sr_mpls_policy_del (mpls_label_t bsid);
-
-extern int
-sr_mpls_policy_assign_endpoint_color (mpls_label_t bsid,
- ip46_address_t * endpoint,
- u8 endpoint_type, u32 color);
-
-extern int
-sr_mpls_steering_policy_add (mpls_label_t bsid, u32 table_id,
- ip46_address_t * prefix, u32 mask_width,
- u8 traffic_type, ip46_address_t * next_hop,
- u8 nh_type, u32 color, char co_bits,
- mpls_label_t vpn_label);
-
-extern int
-sr_mpls_steering_policy_del (ip46_address_t * prefix,
- u32 mask_width, u8 traffic_type, u32 table_id,
- u32 color);
-
-extern u32 find_or_create_internal_label (ip46_address_t endpoint, u32 color);
-
-extern void internal_label_lock (ip46_address_t endpoint, u32 color);
-
-extern void internal_label_unlock (ip46_address_t endpoint, u32 color);
-
-#endif /* included_vnet_sr_mpls_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables: eval: (c-set-style "gnu") End:
- */
diff --git a/src/vnet/srmpls/sr_mpls_api.c b/src/vnet/srmpls/sr_mpls_api.c
deleted file mode 100644
index 7d42f1ba451..00000000000
--- a/src/vnet/srmpls/sr_mpls_api.c
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * ------------------------------------------------------------------
- * sr_api.c - ipv6 segment routing api
- *
- * Copyright (c) 2016 Cisco and/or its affiliates. Licensed under the Apache
- * License, Version 2.0 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- * ------------------------------------------------------------------
- */
-
-#include <vnet/vnet.h>
-#include <vnet/srmpls/sr_mpls.h>
-#include <vlibmemory/api.h>
-
-#include <vnet/interface.h>
-#include <vnet/api_errno.h>
-#include <vnet/feature/feature.h>
-#include <vnet/ip/ip_types_api.h>
-
-#include <vnet/format_fns.h>
-#include <vnet/srmpls/sr_mpls.api_enum.h>
-#include <vnet/srmpls/sr_mpls.api_types.h>
-
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-
-#define vl_api_version(n, v) static u32 api_version = v;
-#include <vnet/srmpls/sr_mpls.api.h>
-#undef vl_api_version
-
-#define vl_endianfun
-#include <vnet/srmpls/sr_mpls.api.h>
-#undef vl_endianfun
-
-#define vl_printfun
-#include <vnet/srmpls/sr_mpls.api.h>
-#undef vl_printfun
-
-#define vl_msg_name_crc_list
-#include <vnet/srmpls/sr_mpls.api.h>
-#undef vl_msg_name_crc_list
-
-#define REPLY_MSG_ID_BASE msg_id_base
-#include <vlibapi/api_helper_macros.h>
-
-#define foreach_vpe_api_msg \
-_(SR_MPLS_POLICY_DEL, sr_mpls_policy_del) \
-_(SR_MPLS_STEERING_ADD_DEL, sr_mpls_steering_add_del) \
-_(SR_MPLS_POLICY_ASSIGN_ENDPOINT_COLOR, sr_mpls_policy_assign_endpoint_color)
-
-static u16 msg_id_base;
-
-static void
-vl_api_sr_mpls_policy_add_t_handler (vl_api_sr_mpls_policy_add_t * mp)
-{
- vl_api_sr_mpls_policy_add_reply_t *rmp;
-
- mpls_label_t *segments = 0, *seg;
- mpls_label_t this_address = 0;
-
- int i;
- for (i = 0; i < mp->n_segments; i++)
- {
- vec_add2 (segments, seg, 1);
- this_address = ntohl (mp->segments[i]);
- clib_memcpy (seg, &this_address, sizeof (this_address));
- }
-
- int rv = 0;
- rv = sr_mpls_policy_add (ntohl (mp->bsid),
- segments, mp->is_spray, ntohl (mp->weight));
- vec_free (segments);
-
- REPLY_MACRO (VL_API_SR_MPLS_POLICY_ADD_REPLY);
-}
-
-static void
-vl_api_sr_mpls_policy_mod_t_handler (vl_api_sr_mpls_policy_mod_t * mp)
-{
- vl_api_sr_mpls_policy_mod_reply_t *rmp;
-
- mpls_label_t *segments = 0, *seg;
- mpls_label_t this_address = 0;
-
- int i;
- for (i = 0; i < mp->n_segments; i++)
- {
- vec_add2 (segments, seg, 1);
- this_address = ntohl (mp->segments[i]);
- clib_memcpy (seg, &this_address, sizeof (this_address));
- }
-
- int rv = 0;
- rv = sr_mpls_policy_mod (ntohl (mp->bsid),
- ntohl (mp->operation), segments,
- ntohl (mp->sl_index), ntohl (mp->weight));
- vec_free (segments);
-
- REPLY_MACRO (VL_API_SR_MPLS_POLICY_MOD_REPLY);
-}
-
-static void
-vl_api_sr_mpls_policy_del_t_handler (vl_api_sr_mpls_policy_del_t * mp)
-{
- vl_api_sr_mpls_policy_del_reply_t *rmp;
- int rv = 0;
- rv = sr_mpls_policy_del (ntohl (mp->bsid));
-
- REPLY_MACRO (VL_API_SR_MPLS_POLICY_DEL_REPLY);
-}
-
-static void vl_api_sr_mpls_steering_add_del_t_handler
- (vl_api_sr_mpls_steering_add_del_t * mp)
-{
- vl_api_sr_mpls_steering_add_del_reply_t *rmp;
- fib_prefix_t prefix;
- ip46_address_t next_hop;
- clib_memset (&prefix, 0, sizeof (ip46_address_t));
-
- ip_prefix_decode (&mp->prefix, &prefix);
- ip_address_decode (&mp->next_hop, &next_hop);
-
- int rv = 0;
- if (mp->is_del)
- rv = sr_mpls_steering_policy_del (&prefix.fp_addr,
- prefix.fp_len,
- ip46_address_is_ip4 (&prefix.fp_addr) ?
- SR_STEER_IPV4 : SR_STEER_IPV6,
- ntohl (mp->table_id),
- ntohl (mp->color));
- else
- rv = sr_mpls_steering_policy_add (ntohl (mp->bsid),
- ntohl (mp->table_id),
- &prefix.fp_addr,
- prefix.fp_len,
- ip46_address_is_ip4 (&prefix.fp_addr) ?
- SR_STEER_IPV4 : SR_STEER_IPV6,
- &next_hop,
- ip46_address_is_ip4 (&next_hop) ?
- SR_STEER_IPV4 : SR_STEER_IPV6,
- ntohl (mp->color), mp->co_bits,
- ntohl (mp->vpn_label));
-
- REPLY_MACRO (VL_API_SR_MPLS_STEERING_ADD_DEL_REPLY);
-}
-
-static void vl_api_sr_mpls_policy_assign_endpoint_color_t_handler
- (vl_api_sr_mpls_policy_assign_endpoint_color_t * mp)
-{
- vl_api_sr_mpls_policy_assign_endpoint_color_reply_t *rmp;
- int rv = 0;
-
- ip46_address_t endpoint;
- clib_memset (&endpoint, 0, sizeof (ip46_address_t));
- ip_address_decode (&mp->endpoint, &endpoint);
-
- rv = sr_mpls_policy_assign_endpoint_color (ntohl (mp->bsid),
- &endpoint,
- ip46_address_is_ip4 (&endpoint) ?
- SR_STEER_IPV4 : SR_STEER_IPV6,
- ntohl (mp->color));
-
- REPLY_MACRO (VL_API_SR_MPLS_POLICY_ASSIGN_ENDPOINT_COLOR_REPLY);
-}
-
-static void
-setup_message_id_table (api_main_t * am)
-{
-#define _(id, n, crc) \
- vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + REPLY_MSG_ID_BASE);
- foreach_vl_msg_name_crc_sr_mpls;
-#undef _
-}
-
-static clib_error_t *
-sr_mpls_api_hookup (vlib_main_t * vm)
-{
- api_main_t *am = vlibapi_get_main ();
-
- u8 *name = format (0, "sr_mpls_%08x%c", api_version, 0);
- REPLY_MSG_ID_BASE =
- vl_msg_api_get_msg_ids ((char *) name, VL_MSG_SR_MPLS_LAST);
- vec_free (name);
-
-#define _(N, n) \
- vl_msg_api_set_handlers ( \
- REPLY_MSG_ID_BASE + VL_API_##N, #n, vl_api_##n##_t_handler, \
- vl_noop_handler, vl_api_##n##_t_endian, vl_api_##n##_t_print, \
- sizeof (vl_api_##n##_t), 1, vl_api_##n##_t_print_json, \
- vl_api_##n##_t_tojson, vl_api_##n##_t_fromjson);
- foreach_vpe_api_msg;
-#undef _
-
- /*
- * Manually register the sr policy add msg, so we trace enough bytes
- * to capture a typical segment list
- */
- vl_msg_api_set_handlers (
- REPLY_MSG_ID_BASE + VL_API_SR_MPLS_POLICY_ADD, "sr_mpls_policy_add",
- vl_api_sr_mpls_policy_add_t_handler, vl_noop_handler,
- vl_api_sr_mpls_policy_add_t_endian, vl_api_sr_mpls_policy_add_t_print, 256,
- 1, vl_api_sr_mpls_policy_add_t_print_json,
- vl_api_sr_mpls_policy_mod_t_tojson, vl_api_sr_mpls_policy_mod_t_fromjson);
-
- /*
- * Manually register the sr policy mod msg, so we trace enough bytes
- * to capture a typical segment list
- */
- vl_msg_api_set_handlers (
- REPLY_MSG_ID_BASE + VL_API_SR_MPLS_POLICY_MOD, "sr_mpls_policy_mod",
- vl_api_sr_mpls_policy_mod_t_handler, vl_noop_handler,
- vl_api_sr_mpls_policy_mod_t_endian, vl_api_sr_mpls_policy_mod_t_print, 256,
- 1, vl_api_sr_mpls_policy_mod_t_print_json,
- vl_api_sr_mpls_policy_mod_t_tojson, vl_api_sr_mpls_policy_mod_t_fromjson);
-
- /*
- * Set up the (msg_name, crc, message-id) table
- */
- setup_message_id_table (am);
-
- return 0;
-}
-
-VLIB_API_INIT_FUNCTION (sr_mpls_api_hookup);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables: eval: (c-set-style "gnu") End:
- */
diff --git a/src/vnet/srmpls/sr_mpls_policy.c b/src/vnet/srmpls/sr_mpls_policy.c
deleted file mode 100644
index 8f0804850f1..00000000000
--- a/src/vnet/srmpls/sr_mpls_policy.c
+++ /dev/null
@@ -1,921 +0,0 @@
-/*
- * sr_mpls_policy.c: SR-MPLS policies
- *
- * Copyright (c) 2016 Cisco and/or its affiliates. Licensed under the Apache
- * License, Version 2.0 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-
-/**
- * @file
- * @brief SR MPLS policy creation and application
- *
- * Create an SR policy.
- * An SR policy can be either of 'default' type or 'spray' type
- * An SR policy has attached a list of SID lists.
- * In case the SR policy is a default one it will load balance among them.
- * An SR policy has associated a BindingSID.
- * In case any packet arrives with MPLS_label == BindingSID then the SR policy
- * associated to such bindingSID will be applied to such packet.
- * Also, a BSID can be associated with a (Next-Hop, Color)
- *
- */
-
-#include <vlib/vlib.h>
-#include <vnet/vnet.h>
-#include <vnet/srmpls/sr_mpls.h>
-#include <vnet/fib/mpls_fib.h>
-#include <vnet/dpo/dpo.h>
-#include <vnet/ip/ip.h>
-
-#include <vppinfra/error.h>
-#include <vppinfra/elog.h>
-
-mpls_sr_main_t sr_mpls_main;
-
-/*************************** SR LB helper functions **************************/
-/**
- * @brief Creates a Segment List and adds it to an SR policy
- *
- * Creates a Segment List and adds it to the SR policy. Notice that the SL are
- * not necessarily unique. Hence there might be two Segment List within the
- * same SR Policy with exactly the same segments and same weight.
- *
- * @param sr_policy is the SR policy where the SL will be added
- * @param sl is a vector of IPv6 addresses composing the Segment List
- * @param weight is the weight of the SegmentList (for load-balancing purposes)
- * @param is_encap represents the mode (SRH insertion vs Encapsulation)
- *
- * @return pointer to the just created segment list
- */
-static inline mpls_sr_sl_t *
-create_sl (mpls_sr_policy_t * sr_policy, mpls_label_t * sl, u32 weight)
-{
- mpls_sr_main_t *sm = &sr_mpls_main;
- mpls_sr_sl_t *segment_list;
- u32 ii;
-
- pool_get (sm->sid_lists, segment_list);
- clib_memset (segment_list, 0, sizeof (*segment_list));
-
- vec_add1 (sr_policy->segments_lists, segment_list - sm->sid_lists);
-
- /* Fill in segment list */
- segment_list->weight =
- (weight != (u32) ~ 0 ? weight : SR_SEGMENT_LIST_WEIGHT_DEFAULT);
- segment_list->segments = vec_dup (sl);
-
- mpls_eos_bit_t eos;
- FOR_EACH_MPLS_EOS_BIT (eos)
- {
- fib_route_path_t path = {
- .frp_proto = DPO_PROTO_MPLS,
- .frp_sw_if_index = ~0,
- .frp_fib_index = 0,
- .frp_weight = segment_list->weight,
- .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
- .frp_label_stack = NULL,
- .frp_local_label = sl[0],
- };
-
- if (vec_len (sl) > 1)
- {
- vec_validate (path.frp_label_stack, vec_len (sl) - 2);
- for (ii = 1; ii < vec_len (sl); ii++)
- {
- path.frp_label_stack[ii - 1].fml_value = sl[ii];
- }
- }
- else
- {
- /*
- * add an impliciet NULL label to allow non-eos recursion
- */
- fib_mpls_label_t lbl = {
- .fml_value = MPLS_IETF_IMPLICIT_NULL_LABEL,
- };
- vec_add1 (path.frp_label_stack, lbl);
- }
-
- fib_route_path_t *paths = NULL;
- vec_add1 (paths, path);
-
- /* *INDENT-OFF* */
- fib_prefix_t pfx = {
- .fp_len = 21,
- .fp_proto = FIB_PROTOCOL_MPLS,
- .fp_label = sr_policy->bsid,
- .fp_eos = eos,
- .fp_payload_proto = DPO_PROTO_MPLS,
- };
- /* *INDENT-ON* */
-
- fib_table_entry_path_add2 (0,
- &pfx,
- FIB_SOURCE_SR,
- (sr_policy->type == SR_POLICY_TYPE_DEFAULT ?
- FIB_ENTRY_FLAG_NONE :
- FIB_ENTRY_FLAG_MULTICAST), paths);
- vec_free (paths);
- }
-
- return segment_list;
-}
-
-/******************************* SR rewrite API *******************************/
-/*
- * Three functions for handling sr policies: -> sr_mpls_policy_add ->
- * sr_mpls_policy_del -> sr_mpls_policy_mod All of them are API. CLI function
- * on sr_policy_command_fn
- */
-
-/**
- * @brief Create a new SR policy
- *
- * @param bsid is the bindingSID of the SR Policy
- * @param segments is a vector of MPLS labels composing the segment list
- * @param behavior is the behavior of the SR policy. (default//spray)
- * @param fib_table is the VRF where to install the FIB entry for the BSID
- * @param weight is the weight of this specific SID list
- *
- * @return 0 if correct, else error
- */
-int
-sr_mpls_policy_add (mpls_label_t bsid, mpls_label_t * segments,
- u8 behavior, u32 weight)
-{
- mpls_sr_main_t *sm = &sr_mpls_main;
- mpls_sr_policy_t *sr_policy = 0;
- uword *p;
-
- if (!sm->sr_policies_index_hash)
- sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t));
-
- /* MPLS SR policies cannot be created unless the MPLS table is present */
- if (~0 == fib_table_find (FIB_PROTOCOL_MPLS, MPLS_FIB_DEFAULT_TABLE_ID))
- return (VNET_API_ERROR_NO_SUCH_TABLE);
-
- /* Search for existing keys (BSID) */
- p = hash_get (sm->sr_policies_index_hash, bsid);
- if (p)
- {
- /* Add SR policy that already exists; complain */
- return -12;
- }
- /* Add an SR policy object */
- pool_get (sm->sr_policies, sr_policy);
- clib_memset (sr_policy, 0, sizeof (*sr_policy));
-
- /* the first policy needs to lock the MPLS table so it doesn't
- * disappear with policies in it */
- if (1 == pool_elts (sm->sr_policies))
- fib_table_find_or_create_and_lock (FIB_PROTOCOL_MPLS,
- MPLS_FIB_DEFAULT_TABLE_ID,
- FIB_SOURCE_SR);
- sr_policy->bsid = bsid;
- sr_policy->type = behavior;
- sr_policy->endpoint_type = 0;
- ip6_address_set_zero (&sr_policy->endpoint.ip6);
- sr_policy->color = (u32) ~ 0;
-
- /* Copy the key */
- hash_set (sm->sr_policies_index_hash, bsid, sr_policy - sm->sr_policies);
-
- /* Create a segment list and add the index to the SR policy */
- create_sl (sr_policy, segments, weight);
-
- return 0;
-}
-
-/**
- * @brief Delete a SR policy
- *
- * @param bsid is the bindingSID of the SR Policy
- * @param index is the index of the SR policy
- *
- * @return 0 if correct, else error
- */
-int
-sr_mpls_policy_del (mpls_label_t bsid)
-{
- mpls_sr_main_t *sm = &sr_mpls_main;
- mpls_sr_policy_t *sr_policy = 0;
- mpls_sr_sl_t *segment_list;
- mpls_eos_bit_t eos;
- u32 *sl_index;
- uword *p;
-
- if (!sm->sr_policies_index_hash)
- sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t));
-
- p = hash_get (sm->sr_policies_index_hash, bsid);
- if (p)
- sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
- else
- return -1;
-
- /* Clean SID Lists */
- vec_foreach (sl_index, sr_policy->segments_lists)
- {
- segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
-
- fib_route_path_t path = {
- .frp_proto = DPO_PROTO_MPLS,
- .frp_sw_if_index = ~0,
- .frp_fib_index = 0,
- .frp_weight = segment_list->weight,
- .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
- .frp_local_label = segment_list->segments[0],
- };
-
- vec_add (path.frp_label_stack, segment_list + 1,
- vec_len (segment_list) - 1);
-
- fib_route_path_t *paths = NULL;
- vec_add1 (paths, path);
-
- /* remove each of the MPLS routes */
- FOR_EACH_MPLS_EOS_BIT (eos)
- {
- /* *INDENT-OFF* */
- fib_prefix_t pfx = {
- .fp_len = 21,
- .fp_proto = FIB_PROTOCOL_MPLS,
- .fp_label = sr_policy->bsid,
- .fp_eos = eos,
- .fp_payload_proto = DPO_PROTO_MPLS,
- };
- /* *INDENT-ON* */
-
- fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths);
- }
- vec_free (paths);
- vec_free (segment_list->segments);
- pool_put_index (sm->sid_lists, *sl_index);
- }
-
- /* If there is still traces of TE, make sure locks are released */
- if (sr_policy->endpoint_type != 0 && sr_policy->color != (u32) ~ 0)
- {
- sr_mpls_policy_assign_endpoint_color (bsid, NULL, 0, (u32) ~ 0);
- }
-
- /* Remove SR policy entry */
- hash_unset (sm->sr_policies_index_hash, sr_policy->bsid);
- pool_put (sm->sr_policies, sr_policy);
-
- if (0 == pool_elts (sm->sr_policies))
- fib_table_unlock (MPLS_FIB_DEFAULT_TABLE_ID,
- FIB_PROTOCOL_MPLS, FIB_SOURCE_SR);
-
- return 0;
-}
-
-/**
- * @brief Modify an existing SR policy
- *
- * The possible modifications are adding a new Segment List, modifying an
- * existing Segment List (modify the weight only) and delete a given
- * Segment List from the SR Policy.
- *
- * @param bsid is the bindingSID of the SR Policy
- * @param fib_table is the VRF where to install the FIB entry for the BSID
- * @param operation is the operation to perform (among the top ones)
- * @param segments is a vector of IPv6 address composing the segment list
- * @param sl_index is the index of the Segment List to modify/delete
- * @param weight is the weight of the sid list. optional.
- *
- * @return 0 ok, >0 index of SL, <0 error
- */
-int
-sr_mpls_policy_mod (mpls_label_t bsid, u8 operation,
- mpls_label_t * segments, u32 sl_index, u32 weight)
-{
- mpls_sr_main_t *sm = &sr_mpls_main;
- mpls_sr_policy_t *sr_policy = 0;
- mpls_sr_sl_t *segment_list;
- u32 *sl_index_iterate;
- uword *p;
-
- if (!sm->sr_policies_index_hash)
- sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t));
-
- p = hash_get (sm->sr_policies_index_hash, bsid);
- if (p)
- sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
- else
- return -1;
-
- if (operation == 1)
- { /* Add SR List to an existing SR policy */
- /* Create the new SL */
- segment_list = create_sl (sr_policy, segments, weight);
- return segment_list - sm->sid_lists;
- }
- else if (operation == 2)
- { /* Delete SR List from an existing SR
- * policy */
- /* Check that currently there are more than one SID list */
- if (vec_len (sr_policy->segments_lists) == 1)
- return -21;
-
- /*
- * Check that the SR list does exist and is assigned to the
- * sr policy
- */
- vec_foreach (sl_index_iterate, sr_policy->segments_lists)
- if (*sl_index_iterate == sl_index)
- break;
-
- if (*sl_index_iterate != sl_index)
- return -22;
-
- /* Remove the lucky SR list that is being kicked out */
- segment_list = pool_elt_at_index (sm->sid_lists, sl_index);
-
- mpls_eos_bit_t eos;
- fib_route_path_t path = {
- .frp_proto = DPO_PROTO_MPLS,
- .frp_sw_if_index = ~0,
- .frp_fib_index = 0,
- .frp_weight = segment_list->weight,
- .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
- .frp_local_label = segment_list->segments[0],
- };
-
- vec_add (path.frp_label_stack, segment_list + 1,
- vec_len (segment_list) - 1);
-
- fib_route_path_t *paths = NULL;
- vec_add1 (paths, path);
-
- FOR_EACH_MPLS_EOS_BIT (eos)
- {
- /* *INDENT-OFF* */
- fib_prefix_t pfx = {
- .fp_len = 21,
- .fp_proto = FIB_PROTOCOL_MPLS,
- .fp_label = sr_policy->bsid,
- .fp_eos = eos,
- .fp_payload_proto = DPO_PROTO_MPLS,
- };
- /* *INDENT-ON* */
-
- fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths);
- }
-
- vec_free (paths);
- vec_free (segment_list->segments);
- pool_put_index (sm->sid_lists, sl_index);
- vec_del1 (sr_policy->segments_lists,
- sl_index_iterate - sr_policy->segments_lists);
- }
- else if (operation == 3)
- { /* Modify the weight of an existing
- * SR List */
- /* Find the corresponding SL */
- vec_foreach (sl_index_iterate, sr_policy->segments_lists)
- if (*sl_index_iterate == sl_index)
- break;
-
- if (*sl_index_iterate != sl_index)
- return -32;
-
- /* Change the weight */
- segment_list = pool_elt_at_index (sm->sid_lists, sl_index);
-
- /* Update LB */
- mpls_eos_bit_t eos;
- fib_route_path_t path = {
- .frp_proto = DPO_PROTO_MPLS,
- .frp_sw_if_index = ~0,
- .frp_fib_index = 0,
- .frp_weight = segment_list->weight,
- .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
- .frp_local_label = segment_list->segments[0],
- };
-
- vec_add (path.frp_label_stack, segment_list + 1,
- vec_len (segment_list) - 1);
-
- fib_route_path_t *paths = NULL;
- vec_add1 (paths, path);
-
- FOR_EACH_MPLS_EOS_BIT (eos)
- {
- /* *INDENT-OFF* */
- fib_prefix_t pfx = {
- .fp_len = 21,
- .fp_proto = FIB_PROTOCOL_MPLS,
- .fp_label = sr_policy->bsid,
- .fp_eos = eos,
- .fp_payload_proto = DPO_PROTO_MPLS,
- };
- /* *INDENT-ON* */
-
- fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths);
- }
-
- segment_list->weight = weight;
-
- path.frp_weight = segment_list->weight;
-
- vec_free (paths);
- paths = NULL;
- vec_add1 (paths, path);
-
- FOR_EACH_MPLS_EOS_BIT (eos)
- {
- /* *INDENT-OFF* */
- fib_prefix_t pfx = {
- .fp_len = 21,
- .fp_proto = FIB_PROTOCOL_MPLS,
- .fp_label = sr_policy->bsid,
- .fp_eos = eos,
- .fp_payload_proto = DPO_PROTO_MPLS,
- };
- /* *INDENT-ON* */
-
- fib_table_entry_path_add2 (0,
- &pfx,
- FIB_SOURCE_SR,
- (sr_policy->type ==
- SR_POLICY_TYPE_DEFAULT ?
- FIB_ENTRY_FLAG_NONE :
- FIB_ENTRY_FLAG_MULTICAST), paths);
- }
- }
- return 0;
-}
-
-/**
- * @brief CLI for 'sr mpls policies' command family
- */
-static clib_error_t *
-sr_mpls_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- int rv = -1;
- char is_del = 0, is_add = 0, is_mod = 0;
- char policy_set = 0;
- mpls_label_t bsid, next_label;
- u32 sl_index = (u32) ~ 0;
- u32 weight = (u32) ~ 0;
- mpls_label_t *segments = 0;
- u8 operation = 0;
- u8 is_spray = 0;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (!is_add && !is_mod && !is_del && unformat (input, "add"))
- is_add = 1;
- else if (!is_add && !is_mod && !is_del && unformat (input, "del"))
- is_del = 1;
- else if (!is_add && !is_mod && !is_del && unformat (input, "mod"))
- is_mod = 1;
- else if (!policy_set
- && unformat (input, "bsid %U", unformat_mpls_unicast_label,
- &bsid))
- policy_set = 1;
- else if (unformat (input, "weight %d", &weight));
- else if (unformat
- (input, "next %U", unformat_mpls_unicast_label, &next_label))
- {
- vec_add (segments, &next_label, 1);
- }
- else if (unformat (input, "add sl"))
- operation = 1;
- else if (unformat (input, "del sl index %d", &sl_index))
- operation = 2;
- else if (unformat (input, "mod sl index %d", &sl_index))
- operation = 3;
- else if (unformat (input, "spray"))
- is_spray = 1;
- else
- break;
- }
-
- if (!is_add && !is_mod && !is_del)
- return clib_error_return (0, "Incorrect CLI");
-
- if (!policy_set)
- return clib_error_return (0, "No SR policy BSID or index specified");
-
- if (is_add)
- {
- if (vec_len (segments) == 0)
- return clib_error_return (0, "No Segment List specified");
-
- rv = sr_mpls_policy_add (bsid, segments,
- (is_spray ? SR_POLICY_TYPE_SPRAY :
- SR_POLICY_TYPE_DEFAULT), weight);
- vec_free (segments);
- }
- else if (is_del)
- rv = sr_mpls_policy_del (bsid);
- else if (is_mod)
- {
- if (!operation)
- return clib_error_return (0, "No SL modification specified");
- if (operation != 1 && sl_index == (u32) ~ 0)
- return clib_error_return (0, "No Segment List index specified");
- if (operation == 1 && vec_len (segments) == 0)
- return clib_error_return (0, "No Segment List specified");
- if (operation == 3 && weight == (u32) ~ 0)
- return clib_error_return (0, "No new weight for the SL specified");
- rv = sr_mpls_policy_mod (bsid, operation, segments, sl_index, weight);
- vec_free (segments);
- }
- switch (rv)
- {
- case 0:
- break;
- case 1:
- return 0;
- case -12:
- return clib_error_return (0,
- "There is already a FIB entry for the BindingSID address.\n"
- "The SR policy could not be created.");
- case -21:
- return clib_error_return (0,
- "The selected SR policy only contains ONE segment list. "
- "Please remove the SR policy instead");
- case -22:
- return clib_error_return (0,
- "Could not delete the segment list. "
- "It is not associated with that SR policy.");
- case -23:
- return clib_error_return (0,
- "Could not delete the segment list. "
- "It is not associated with that SR policy.");
- case -32:
- return clib_error_return (0,
- "Could not modify the segment list. "
- "The given SL is not associated with such SR policy.");
- case VNET_API_ERROR_NO_SUCH_TABLE:
- return clib_error_return (0, "the Default MPLS table is not present");
- default:
- return clib_error_return (0, "BUG: sr policy returns %d", rv);
- }
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND(sr_mpls_policy_command, static)=
-{
- .path = "sr mpls policy",
- .short_help = "sr mpls policy [add||del||mod] bsid 2999 "
- "next 10 next 20 next 30 (weight 1) (spray)",
- .long_help = "TBD.\n",
- .function = sr_mpls_policy_command_fn,
-};
-/* *INDENT-ON* */
-
-/**
- * @brief CLI to display onscreen all the SR MPLS policies
- */
-static clib_error_t *
-show_sr_mpls_policies_command_fn (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- mpls_sr_main_t *sm = &sr_mpls_main;
- mpls_sr_sl_t *segment_list = 0;
- mpls_sr_policy_t *sr_policy = 0;
- mpls_sr_policy_t **vec_policies = 0;
- mpls_label_t *label;
- u32 *sl_index;
- u8 *s;
- int i = 0;
-
- vlib_cli_output (vm, "SR MPLS policies:");
-
- /* *INDENT-OFF* */
- pool_foreach (sr_policy, sm->sr_policies) {
- vec_add1(vec_policies, sr_policy);
- }
- /* *INDENT-ON* */
-
- vec_foreach_index (i, vec_policies)
- {
- sr_policy = vec_policies[i];
- vlib_cli_output (vm, "[%u].-\tBSID: %U",
- (u32) (sr_policy - sm->sr_policies),
- format_mpls_unicast_label, sr_policy->bsid);
- switch (sr_policy->endpoint_type)
- {
- case SR_STEER_IPV6:
- vlib_cli_output (vm, "\tEndpoint: %U", format_ip6_address,
- &sr_policy->endpoint.ip6);
- vlib_cli_output (vm, "\tColor: %u", sr_policy->color);
- break;
- case SR_STEER_IPV4:
- vlib_cli_output (vm, "\tEndpoint: %U", format_ip4_address,
- &sr_policy->endpoint.ip4);
- vlib_cli_output (vm, "\tColor: %u", sr_policy->color);
- break;
- default:
- vlib_cli_output (vm, "\tTE disabled");
- }
- vlib_cli_output (vm, "\tType: %s",
- (sr_policy->type ==
- SR_POLICY_TYPE_DEFAULT ? "Default" : "Spray"));
- vlib_cli_output (vm, "\tSegment Lists:");
- vec_foreach (sl_index, sr_policy->segments_lists)
- {
- s = NULL;
- segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
- s = format (s, "\t[%u].- ", *sl_index);
- s = format (s, "< ");
- vec_foreach (label, segment_list->segments)
- {
- s = format (s, "%U, ", format_mpls_unicast_label, *label);
- }
- s = format (s, "\b\b > ");
- vlib_cli_output (vm, " %s", s);
- }
- vlib_cli_output (vm, "-----------");
- }
- vec_free (vec_policies);
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND(show_sr_mpls_policies_command, static)=
-{
- .path = "show sr mpls policies",
- .short_help = "show sr mpls policies",
- .function = show_sr_mpls_policies_command_fn,
-};
-/* *INDENT-ON* */
-
-/**
- * @brief Update the Endpoint,Color tuple of an SR policy
- *
- * @param bsid is the bindingSID of the SR Policy
- * @param endpoint represents the IP46 of the endpoint
- * @param color represents the color (u32)
- *
- * To reset to NULL use ~0 as parameters.
- *
- * @return 0 if correct, else error
- */
-int
-sr_mpls_policy_assign_endpoint_color (mpls_label_t bsid,
- ip46_address_t * endpoint,
- u8 endpoint_type, u32 color)
-{
- mpls_sr_main_t *sm = &sr_mpls_main;
- mpls_sr_policy_t *sr_policy = 0;
- uword *endpoint_table, *p, *old_value;
-
- ip46_address_t any;
- any.as_u64[0] = any.as_u64[1] = (u64) ~ 0;
-
- if (!sm->sr_policies_index_hash)
- sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t));
-
- p = hash_get (sm->sr_policies_index_hash, bsid);
- if (p)
- sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
- else
- return -1;
-
- /* If previous Endpoint, color existed, remove (NH,C) and (ANY,C) */
- if (sr_policy->endpoint_type)
- {
- endpoint_table =
- mhash_get (&sm->sr_policies_c2e2eclabel_hash, &sr_policy->color);
- if (!endpoint_table)
- return -2;
- old_value =
- mhash_get ((mhash_t *) endpoint_table, &sr_policy->endpoint);
-
- /* CID 180995 This should never be NULL unless the two hash tables
- * get out of sync */
- ALWAYS_ASSERT (old_value != NULL);
-
- fib_prefix_t pfx = { 0 };
- pfx.fp_proto = FIB_PROTOCOL_MPLS;
- pfx.fp_len = 21;
- pfx.fp_label = (u32) * old_value;
-
- mpls_eos_bit_t eos;
- FOR_EACH_MPLS_EOS_BIT (eos)
- {
- pfx.fp_eos = eos;
- fib_table_entry_path_remove (sm->fib_table_EC,
- &pfx,
- FIB_SOURCE_SR,
- DPO_PROTO_MPLS,
- NULL,
- ~0, 0, 1, FIB_ROUTE_PATH_FLAG_NONE);
- }
-
- old_value = mhash_get ((mhash_t *) endpoint_table, &any);
- pfx.fp_label = (u32) * old_value;
-
- FOR_EACH_MPLS_EOS_BIT (eos)
- {
- pfx.fp_eos = eos;
- fib_table_entry_path_remove (sm->fib_table_EC,
- &pfx,
- FIB_SOURCE_SR,
- DPO_PROTO_MPLS,
- NULL,
- ~0, 0, 1, FIB_ROUTE_PATH_FLAG_NONE);
- }
-
- /* Release the lock on (NH, Color) and (ANY, Color) */
- internal_label_unlock (sr_policy->endpoint, sr_policy->color);
- internal_label_unlock (any, sr_policy->color);
-
- /* Reset the values on the SR policy */
- sr_policy->endpoint_type = 0;
- sr_policy->endpoint.as_u64[0] = sr_policy->endpoint.as_u64[1] =
- (u64) ~ 0;
- sr_policy->color = (u32) ~ 0;
- }
-
- if (endpoint_type)
- {
- sr_policy->endpoint_type = endpoint_type;
- sr_policy->endpoint.as_u64[0] = endpoint->as_u64[0];
- sr_policy->endpoint.as_u64[1] = endpoint->as_u64[1];
- sr_policy->color = color;
-
- u32 label = find_or_create_internal_label (*endpoint, color);
- internal_label_lock (*endpoint, sr_policy->color);
-
- /* If FIB doesnt exist, create them */
- if (sm->fib_table_EC == (u32) ~ 0)
- {
- sm->fib_table_EC = fib_table_create_and_lock (FIB_PROTOCOL_MPLS,
- FIB_SOURCE_SR,
- "SR-MPLS Traffic Engineering (NextHop,Color)");
-
- fib_table_flush (sm->fib_table_EC, FIB_PROTOCOL_MPLS,
- FIB_SOURCE_SPECIAL);
- }
-
- fib_prefix_t pfx = { 0 };
- pfx.fp_proto = FIB_PROTOCOL_MPLS;
- pfx.fp_len = 21;
-
- fib_route_path_t path = {
- .frp_proto = DPO_PROTO_MPLS,
- .frp_sw_if_index = ~0,
- .frp_fib_index = 0,
- .frp_weight = 1,
- .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
- .frp_label_stack = 0
- };
- path.frp_local_label = sr_policy->bsid;
-
- //Add the entry to ANY,Color
- u32 any_label = find_or_create_internal_label (any, color);
- internal_label_lock (any, sr_policy->color);
-
- pfx.fp_eos = MPLS_EOS;
- path.frp_eos = MPLS_EOS;
-
- fib_route_path_t *paths = NULL;
- vec_add1 (paths, path);
-
- pfx.fp_label = label;
- fib_table_entry_update (sm->fib_table_EC,
- &pfx,
- FIB_SOURCE_SR,
- FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
-
- pfx.fp_label = any_label;
- fib_table_entry_update (sm->fib_table_EC,
- &pfx,
- FIB_SOURCE_SR,
- FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
-
- fib_mpls_label_t fml = {
- .fml_value = MPLS_IETF_IMPLICIT_NULL_LABEL,
- };
-
- vec_add1 (path.frp_label_stack, fml);
- pfx.fp_eos = MPLS_NON_EOS;
- path.frp_eos = MPLS_NON_EOS;
-
- paths = NULL;
- vec_add1 (paths, path);
-
- pfx.fp_label = label;
- fib_table_entry_update (sm->fib_table_EC,
- &pfx,
- FIB_SOURCE_SR,
- FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
-
- pfx.fp_label = any_label;
- fib_table_entry_update (sm->fib_table_EC,
- &pfx,
- FIB_SOURCE_SR,
- FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
- }
- return 0;
-}
-
-/**
- * @brief CLI to modify the Endpoint,Color of an SR policy
- */
-static clib_error_t *
-cli_sr_mpls_policy_ec_command_fn (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- ip46_address_t endpoint;
- u32 color = (u32) ~ 0;
- mpls_label_t bsid;
- u8 endpoint_type = 0;
- char clear = 0, color_set = 0, bsid_set = 0;
-
- clib_memset (&endpoint, 0, sizeof (ip46_address_t));
-
- int rv;
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (!endpoint_type
- && unformat (input, "endpoint %U", unformat_ip6_address,
- &endpoint.ip6))
- endpoint_type = SR_STEER_IPV6;
- else if (!endpoint_type
- && unformat (input, "endpoint %U", unformat_ip4_address,
- &endpoint.ip4))
- endpoint_type = SR_STEER_IPV4;
- else if (!color_set && unformat (input, "color %u", &color))
- color_set = 1;
- else if (!bsid_set
- && unformat (input, "bsid %U", unformat_mpls_unicast_label,
- &bsid))
- bsid_set = 1;
- else if (!clear && unformat (input, "clear"))
- clear = 1;
- else
- break;
- }
-
- if (!bsid_set)
- return clib_error_return (0, "No BSID specified");
- if (!endpoint_type && !clear)
- return clib_error_return (0, "No Endpoint specified");
- if (!color_set && !clear)
- return clib_error_return (0, "No Color set");
-
- /* In case its a cleanup */
- if (clear)
- {
- ip6_address_set_zero (&endpoint.ip6);
- color = (u32) ~ 0;
- }
- rv =
- sr_mpls_policy_assign_endpoint_color (bsid, &endpoint, endpoint_type,
- color);
-
- if (rv)
- clib_error_return (0, "Error on Endpoint,Color");
-
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND(cli_sr_mpls_policy_ec_command, static)=
-{
- .path = "sr mpls policy te",
- .short_help = "sr mpls policy te bsid xxxxx endpoint x.x.x.x color 12341234",
- .function = cli_sr_mpls_policy_ec_command_fn,
-};
-/* *INDENT-ON* */
-
-/********************* SR MPLS Policy initialization ***********************/
-/**
- * @brief SR MPLS Policy initialization
- */
-clib_error_t *
-sr_mpls_policy_rewrite_init (vlib_main_t * vm)
-{
- mpls_sr_main_t *sm = &sr_mpls_main;
-
- /* Init memory for sr policy keys (bsid <-> ip6_address_t) */
- sm->sr_policies_index_hash = NULL;
- sm->sr_policies_c2e2eclabel_hash.hash = NULL;
- return 0;
-}
-
-VLIB_INIT_FUNCTION (sr_mpls_policy_rewrite_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables: eval: (c-set-style "gnu") End:
- */
diff --git a/src/vnet/srmpls/sr_mpls_steering.c b/src/vnet/srmpls/sr_mpls_steering.c
deleted file mode 100644
index b12e78d2755..00000000000
--- a/src/vnet/srmpls/sr_mpls_steering.c
+++ /dev/null
@@ -1,905 +0,0 @@
-/*
- * sr_steering.c: ipv6 segment routing steering into SR policy
- *
- * Copyright (c) 2016 Cisco and/or its affiliates. Licensed under the Apache
- * License, Version 2.0 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-
-/**
- * @file
- * @brief Packet steering into SR-MPLS Policies
- *
- * This file is in charge of handling the FIB appropiatly to steer packets
- * through SR Policies as defined in 'sr_mpls_policy.c'. Notice that here
- * we are only doing steering. SR policy application is done in
- * sr_policy_rewrite.c
- *
- * Supports:
- * - Steering of IPv6 traffic Destination Address based through BSID
- * - Steering of IPv4 traffic Destination Address based through BSID
- * - Steering of IPv4 and IPv6 traffic through N,C (SR CP)
- */
-
-#include <vlib/vlib.h>
-#include <vnet/vnet.h>
-#include <vnet/srmpls/sr_mpls.h>
-#include <vnet/ip/ip4_packet.h>
-#include <vnet/ip/ip6_packet.h>
-#include <vnet/fib/mpls_fib.h>
-
-#include <vppinfra/error.h>
-#include <vppinfra/elog.h>
-
-#define SRMPLS_TE_OFFSET 50
-
-/**
- * @brief function to sort the colors in descending order
- */
-int
-sort_color_descent (const u32 * x, u32 * y)
-{
- return *y - *x;
-}
-
-/********************* Internal (NH, C) labels *******************************/
-/**
- * @brief find the corresponding label for (endpoint, color) and lock it
- * endpoint might be NULL or ANY
- * NULL = 0, ANY=~0
- */
-u32
-find_or_create_internal_label (ip46_address_t endpoint, u32 color)
-{
- mpls_sr_main_t *sm = &sr_mpls_main;
- uword *color_table, *result_label;
-
- if (!sm->sr_policies_c2e2eclabel_hash.hash)
- mhash_init (&sm->sr_policies_c2e2eclabel_hash, sizeof (mhash_t),
- sizeof (u32));
-
- color_table = mhash_get (&sm->sr_policies_c2e2eclabel_hash, &color);
- if (!color_table)
- {
- mhash_t color_t;
- clib_memset (&color_t, 0, sizeof (mhash_t));
- mhash_init (&color_t, sizeof (u32), sizeof (ip46_address_t));
- mhash_set_mem (&sm->sr_policies_c2e2eclabel_hash, &color,
- (uword *) & color_t, NULL);
- color_table = mhash_get (&sm->sr_policies_c2e2eclabel_hash, &color);
- }
-
- result_label = mhash_get ((mhash_t *) color_table, &endpoint);
-
- if (result_label)
- return (u32) * result_label;
-
- /* Create and set a new internal label */
- u32 *new_internal_label = 0;
- pool_get (sm->ec_labels, new_internal_label);
- *new_internal_label = 0;
- mhash_set ((mhash_t *) color_table, &endpoint,
- (new_internal_label - sm->ec_labels) + SRMPLS_TE_OFFSET, NULL);
-
- return (new_internal_label - sm->ec_labels) + SRMPLS_TE_OFFSET;
-}
-
-always_inline void
-internal_label_lock_co (ip46_address_t endpoint, u32 color, char co_bits)
-{
- ip46_address_t zero, any;
- ip46_address_reset (&zero);
- any.as_u64[0] = any.as_u64[1] = (u64) ~ 0;
- switch (co_bits)
- {
- case SR_TE_CO_BITS_10:
- internal_label_lock (endpoint, color);
- internal_label_lock (zero, color);
- internal_label_lock (any, color);
- break;
- case SR_TE_CO_BITS_01:
- internal_label_lock (endpoint, color);
- internal_label_lock (zero, color);
- break;
- case SR_TE_CO_BITS_00:
- case SR_TE_CO_BITS_11:
- internal_label_lock (endpoint, color);
- break;
- }
-}
-
-/**
- * @brief lock the label for (NH, C)
- * endpoint might be NULL or ANY
- * NULL = 0, ANY=~0
- */
-void
-internal_label_lock (ip46_address_t endpoint, u32 color)
-{
- mpls_sr_main_t *sm = &sr_mpls_main;
- uword *color_table, *result_label;
-
- if (!sm->sr_policies_c2e2eclabel_hash.hash)
- return;
-
- color_table = mhash_get (&sm->sr_policies_c2e2eclabel_hash, &color);
- if (!color_table)
- return;
-
- result_label = mhash_get ((mhash_t *) color_table, &endpoint);
-
- if (!result_label)
- return;
-
- /* Lock it */
- u32 *label_lock =
- pool_elt_at_index (sm->ec_labels, *result_label - SRMPLS_TE_OFFSET);
- (*label_lock)++;
-}
-
-
-always_inline void
-internal_label_unlock_co (ip46_address_t endpoint, u32 color, char co_bits)
-{
- ip46_address_t zero, any;
- ip46_address_reset (&zero);
- any.as_u64[0] = any.as_u64[1] = (u64) ~ 0;
- switch (co_bits)
- {
- case SR_TE_CO_BITS_10:
- internal_label_unlock (endpoint, color);
- internal_label_unlock (zero, color);
- internal_label_unlock (any, color);
- break;
- case SR_TE_CO_BITS_01:
- internal_label_unlock (endpoint, color);
- internal_label_unlock (zero, color);
- break;
- case SR_TE_CO_BITS_00:
- case SR_TE_CO_BITS_11:
- internal_label_unlock (endpoint, color);
- break;
- }
-}
-
-/**
- * @brief Release lock on label for (endpoint, color)
- * endpoint might be NULL or ANY
- * NULL = 0, ANY=~0
- */
-void
-internal_label_unlock (ip46_address_t endpoint, u32 color)
-{
- mpls_sr_main_t *sm = &sr_mpls_main;
- uword *color_table, *result_label;
-
- if (!sm->sr_policies_c2e2eclabel_hash.hash)
- return;
-
- color_table = mhash_get (&sm->sr_policies_c2e2eclabel_hash, &color);
- if (!color_table)
- return;
-
- result_label = mhash_get ((mhash_t *) color_table, &endpoint);
-
- if (!result_label)
- return;
-
- u32 *label_lock =
- pool_elt_at_index (sm->ec_labels, *result_label - SRMPLS_TE_OFFSET);
- (*label_lock)--;
-
- if (*label_lock == 0)
- {
- pool_put (sm->ec_labels, label_lock);
- mhash_unset ((mhash_t *) color_table, &endpoint, NULL);
- if (mhash_elts ((mhash_t *) color_table) == 0)
- {
- mhash_free ((mhash_t *) color_table);
- mhash_unset (&sm->sr_policies_c2e2eclabel_hash, &color, NULL);
- if (mhash_elts (&sm->sr_policies_c2e2eclabel_hash) == 0)
- {
- mhash_free (&sm->sr_policies_c2e2eclabel_hash);
- sm->sr_policies_c2e2eclabel_hash.hash = NULL;
- fib_table_unlock (sm->fib_table_EC, FIB_PROTOCOL_MPLS,
- FIB_SOURCE_SR);
- sm->fib_table_EC = (u32) ~ 0;
- }
- }
- }
-}
-
-/********************* steering computation *********************************/
-/**
- * @brief function to update the FIB
- */
-void
-compute_sr_te_automated_steering_fib_entry (mpls_sr_steering_policy_t *
- steer_pl)
-{
- mpls_sr_main_t *sm = &sr_mpls_main;
- fib_prefix_t pfx = { 0 };
-
- u32 *internal_labels = 0;
- ip46_address_t zero, any;
- ip46_address_reset (&zero);
- any.as_u64[0] = any.as_u64[1] = (u64) ~ 0;
-
- u32 *color_i = NULL;
- vec_foreach (color_i, steer_pl->color)
- {
- switch (steer_pl->co_bits)
- {
- case SR_TE_CO_BITS_10:
- vec_add1 (internal_labels,
- find_or_create_internal_label (steer_pl->next_hop,
- *color_i));
- vec_add1 (internal_labels,
- find_or_create_internal_label (zero, *color_i));
- vec_add1 (internal_labels,
- find_or_create_internal_label (any, *color_i));
- break;
- case SR_TE_CO_BITS_01:
- vec_add1 (internal_labels,
- find_or_create_internal_label (steer_pl->next_hop,
- *color_i));
- vec_add1 (internal_labels,
- find_or_create_internal_label (zero, *color_i));
- break;
- case SR_TE_CO_BITS_00:
- case SR_TE_CO_BITS_11:
- vec_add1 (internal_labels,
- find_or_create_internal_label (steer_pl->next_hop,
- *color_i));
- break;
- }
- }
-
- /* Does hidden FIB already exist? */
- if (sm->fib_table_EC == (u32) ~ 0)
- {
- sm->fib_table_EC = fib_table_create_and_lock (FIB_PROTOCOL_MPLS,
- FIB_SOURCE_SR,
- "SR-MPLS Traffic Engineering (NextHop,Color)");
-
- fib_table_flush (sm->fib_table_EC, FIB_PROTOCOL_MPLS,
- FIB_SOURCE_SPECIAL);
- }
-
- /* Add the corresponding FIB entries */
- fib_route_path_t path = {
- .frp_proto = DPO_PROTO_MPLS,
- .frp_eos = MPLS_EOS,
- .frp_sw_if_index = ~0,
- .frp_fib_index = sm->fib_table_EC,
- .frp_weight = 1,
- .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
- .frp_label_stack = 0
- };
- fib_route_path_t *paths = NULL;
-
- if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
- {
- pfx.fp_proto = FIB_PROTOCOL_IP6;
- pfx.fp_len = steer_pl->classify.mask_width;
- pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6;
- }
- else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
- {
- pfx.fp_proto = FIB_PROTOCOL_IP4;
- pfx.fp_len = steer_pl->classify.mask_width;
- pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4;
- }
-
- if (steer_pl->vpn_label != (u32) ~ 0)
- {
- fib_mpls_label_t fml = {
- .fml_value = steer_pl->vpn_label,
- };
- vec_add1 (path.frp_label_stack, fml);
- path.frp_eos = MPLS_NON_EOS;
- }
-
- u32 label_i;
- vec_foreach_index (label_i, internal_labels)
- {
- path.frp_local_label = internal_labels[label_i];
- path.frp_preference = label_i;
- vec_add1 (paths, path);
- }
-
- /* Finally we must add to FIB IGP to N */
- clib_memcpy (&path.frp_addr, &steer_pl->next_hop,
- sizeof (steer_pl->next_hop));
- path.frp_preference = vec_len (internal_labels);
- path.frp_label_stack = NULL;
-
- if (steer_pl->nh_type == SR_STEER_IPV6)
- {
- path.frp_proto = DPO_PROTO_IP6;
- path.frp_fib_index =
- fib_table_find (FIB_PROTOCOL_IP6,
- (steer_pl->classify.fib_table !=
- (u32) ~ 0 ? steer_pl->classify.fib_table : 0));
- }
- else if (steer_pl->nh_type == SR_STEER_IPV4)
- {
- path.frp_proto = DPO_PROTO_IP4;
- path.frp_fib_index =
- fib_table_find (FIB_PROTOCOL_IP4,
- (steer_pl->classify.fib_table !=
- (u32) ~ 0 ? steer_pl->classify.fib_table : 0));
- }
-
- vec_add1 (paths, path);
- if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
- fib_table_entry_update (fib_table_find
- (FIB_PROTOCOL_IP6,
- (steer_pl->classify.fib_table !=
- (u32) ~ 0 ? steer_pl->classify.fib_table : 0)),
- &pfx, FIB_SOURCE_SR,
- FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
- else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
- fib_table_entry_update (fib_table_find
- (FIB_PROTOCOL_IP4,
- (steer_pl->classify.fib_table !=
- (u32) ~ 0 ? steer_pl->classify.fib_table : 0)),
- &pfx, FIB_SOURCE_SR,
- FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
-
- vec_free (paths);
- paths = NULL;
-}
-
-/**
- * @brief Steer traffic L3 traffic through a given SR-MPLS policy
- *
- * @param is_del
- * @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index)
- * @param sr_policy is the index of the SR Policy (alt to bsid)
- * @param table_id is the VRF where to install the FIB entry for the BSID
- * @param prefix is the IPv4/v6 address for L3 traffic type
- * @param mask_width is the mask for L3 traffic type
- * @param traffic_type describes the type of traffic
- * @param next_hop SR TE Next-Hop
- * @param nh_type is the AF of Next-Hop
- * @param color SR TE color
- * @param co_bits SR TE color-only bits
- *
- * @return 0 if correct, else error
- */
-int
-sr_mpls_steering_policy_add (mpls_label_t bsid, u32 table_id,
- ip46_address_t * prefix, u32 mask_width,
- u8 traffic_type, ip46_address_t * next_hop,
- u8 nh_type, u32 color, char co_bits,
- mpls_label_t vpn_label)
-{
- mpls_sr_main_t *sm = &sr_mpls_main;
- sr_mpls_steering_key_t key;
- mpls_sr_steering_policy_t *steer_pl;
- fib_prefix_t pfx = { 0 };
-
- mpls_sr_policy_t *sr_policy = 0;
- uword *p = 0;
-
- clib_memset (&key, 0, sizeof (sr_mpls_steering_key_t));
-
- if (traffic_type != SR_STEER_IPV4 && traffic_type != SR_STEER_IPV6)
- return -1;
-
- /* Compute the steer policy key */
- key.prefix.as_u64[0] = prefix->as_u64[0];
- key.prefix.as_u64[1] = prefix->as_u64[1];
- key.mask_width = mask_width;
- key.fib_table = (table_id != (u32) ~ 0 ? table_id : 0);
- key.traffic_type = traffic_type;
-
- /*
- * Search for steering policy. If already exists we are adding a new
- * color.
- */
- if (!sm->sr_steer_policies_hash.hash)
- mhash_init (&sm->sr_steer_policies_hash, sizeof (uword),
- sizeof (sr_mpls_steering_key_t));
-
- p = mhash_get (&sm->sr_steer_policies_hash, &key);
- if (p)
- {
- steer_pl = pool_elt_at_index (sm->steer_policies, p[0]);
- if (steer_pl->bsid != (u32) ~ 0)
- return -1; //Means we are rewritting the steering. Not allowed.
-
- /* Means we are adding a color. Check that NH match. */
- if (ip46_address_cmp (&steer_pl->next_hop, next_hop))
- return -2;
- if (vec_search (steer_pl->color, color) != ~0)
- return -3;
- if (steer_pl->co_bits != co_bits)
- return -4; /* CO colors should be the same */
- if (steer_pl->vpn_label != vpn_label)
- return -5; /* VPN label should be the same */
-
- /* Remove the steering and ReDo it */
- vec_add1 (steer_pl->color, color);
- vec_sort_with_function (steer_pl->color, sort_color_descent);
- compute_sr_te_automated_steering_fib_entry (steer_pl);
- internal_label_lock_co (steer_pl->next_hop, color, steer_pl->co_bits);
- return 0;
- }
-
- /* Create a new steering policy */
- pool_get (sm->steer_policies, steer_pl);
- clib_memset (steer_pl, 0, sizeof (*steer_pl));
- clib_memcpy (&steer_pl->classify.prefix, prefix, sizeof (ip46_address_t));
- clib_memcpy (&steer_pl->next_hop, next_hop, sizeof (ip46_address_t));
- steer_pl->nh_type = nh_type;
- steer_pl->co_bits = co_bits;
- steer_pl->classify.mask_width = mask_width;
- steer_pl->classify.fib_table = (table_id != (u32) ~ 0 ? table_id : 0);
- steer_pl->classify.traffic_type = traffic_type;
- steer_pl->color = NULL;
- steer_pl->vpn_label = vpn_label;
-
- /* Create and store key */
- mhash_set (&sm->sr_steer_policies_hash, &key, steer_pl - sm->steer_policies,
- NULL);
-
- /* Local steering */
- if (bsid != (u32) ~ 0)
- {
- if (!sm->sr_policies_index_hash)
- sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t));
- steer_pl->bsid = bsid;
- p = hash_get (sm->sr_policies_index_hash, bsid);
- if (!p)
- return -1;
- sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
-
- fib_route_path_t path = {
- .frp_proto = DPO_PROTO_MPLS,
- .frp_local_label = sr_policy->bsid,
- .frp_eos = MPLS_EOS,
- .frp_sw_if_index = ~0,
- .frp_fib_index = 0,
- .frp_weight = 1,
- .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
- .frp_label_stack = 0
- };
- fib_route_path_t *paths = NULL;
-
- if (steer_pl->vpn_label != (u32) ~ 0)
- {
- fib_mpls_label_t fml = {
- .fml_value = steer_pl->vpn_label,
- };
- vec_add1 (path.frp_label_stack, fml);
- }
-
- /* FIB API calls - Recursive route through the BindingSID */
- if (traffic_type == SR_STEER_IPV6)
- {
- pfx.fp_proto = FIB_PROTOCOL_IP6;
- pfx.fp_len = steer_pl->classify.mask_width;
- pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6;
- path.frp_fib_index = 0;
- path.frp_preference = 0;
- vec_add1 (paths, path);
- fib_table_entry_path_add2 (fib_table_find
- (FIB_PROTOCOL_IP6,
- (table_id != (u32) ~ 0 ? table_id : 0)),
- &pfx, FIB_SOURCE_SR,
- FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
- vec_free (paths);
- }
- else if (traffic_type == SR_STEER_IPV4)
- {
- pfx.fp_proto = FIB_PROTOCOL_IP4;
- pfx.fp_len = steer_pl->classify.mask_width;
- pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4;
- path.frp_fib_index = 0;
- path.frp_preference = 0;
- vec_add1 (paths, path);
- fib_table_entry_path_add2 (fib_table_find
- (FIB_PROTOCOL_IP4,
- (table_id != (u32) ~ 0 ? table_id : 0)),
- &pfx, FIB_SOURCE_SR,
- FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
- vec_free (paths);
- }
- }
- /* Automated steering */
- else
- {
- steer_pl->bsid = (u32) ~ 0;
- vec_add1 (steer_pl->color, color);
- compute_sr_te_automated_steering_fib_entry (steer_pl);
- internal_label_lock_co (steer_pl->next_hop, color, steer_pl->co_bits);
- }
- return 0;
-}
-
-/**
- * @brief Delete steering rule for an SR-MPLS policy
- *
- * @param is_del
- * @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index)
- * @param sr_policy is the index of the SR Policy (alt to bsid)
- * @param table_id is the VRF where to install the FIB entry for the BSID
- * @param prefix is the IPv4/v6 address for L3 traffic type
- * @param mask_width is the mask for L3 traffic type
- * @param traffic_type describes the type of traffic
- * @param next_hop SR TE Next-HOP
- * @param nh_type is the AF of Next-Hop
- * @param color SR TE color
- *
- * @return 0 if correct, else error
- */
-int
-sr_mpls_steering_policy_del (ip46_address_t * prefix, u32 mask_width,
- u8 traffic_type, u32 table_id, u32 color)
-{
- mpls_sr_main_t *sm = &sr_mpls_main;
- sr_mpls_steering_key_t key;
- mpls_sr_steering_policy_t *steer_pl;
- fib_prefix_t pfx = { 0 };
- uword *p = 0;
-
- clib_memset (&key, 0, sizeof (sr_mpls_steering_key_t));
-
- /* Compute the steer policy key */
- if (traffic_type != SR_STEER_IPV4 && traffic_type != SR_STEER_IPV6)
- return -1;
-
- key.prefix.as_u64[0] = prefix->as_u64[0];
- key.prefix.as_u64[1] = prefix->as_u64[1];
- key.mask_width = mask_width;
- key.fib_table = (table_id != (u32) ~ 0 ? table_id : 0);
- key.traffic_type = traffic_type;
-
- if (!sm->sr_steer_policies_hash.hash)
- mhash_init (&sm->sr_steer_policies_hash, sizeof (uword),
- sizeof (sr_mpls_steering_key_t));
-
- /* Search for the item */
- p = mhash_get (&sm->sr_steer_policies_hash, &key);
-
- if (!p)
- return -1;
-
- /* Retrieve Steer Policy function */
- steer_pl = pool_elt_at_index (sm->steer_policies, p[0]);
-
- if (steer_pl->bsid == (u32) ~ 0)
- {
- /* Remove the color from the color vector */
- vec_del1 (steer_pl->color, vec_search (steer_pl->color, color));
-
- if (vec_len (steer_pl->color))
- {
- /* Reorder Colors */
- vec_sort_with_function (steer_pl->color, sort_color_descent);
- compute_sr_te_automated_steering_fib_entry (steer_pl);
- /* Remove all the locks for this ones... */
- internal_label_unlock_co (steer_pl->next_hop, color,
- steer_pl->co_bits);
- return 0;
- }
- else
- {
- vec_free (steer_pl->color);
- /* Remove FIB entry */
- if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
- {
- pfx.fp_proto = FIB_PROTOCOL_IP6;
- pfx.fp_len = steer_pl->classify.mask_width;
- pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6;
- fib_table_entry_delete (fib_table_find
- (FIB_PROTOCOL_IP6,
- steer_pl->classify.fib_table), &pfx,
- FIB_SOURCE_SR);
- }
- else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
- {
- pfx.fp_proto = FIB_PROTOCOL_IP4;
- pfx.fp_len = steer_pl->classify.mask_width;
- pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4;
- fib_table_entry_delete (fib_table_find
- (FIB_PROTOCOL_IP4,
- steer_pl->classify.fib_table), &pfx,
- FIB_SOURCE_SR);
- }
- /* Remove all the locks for this ones... */
- internal_label_unlock_co (steer_pl->next_hop, color,
- steer_pl->co_bits);
- }
- }
- else //Remove by BSID
- {
- if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
- {
- pfx.fp_proto = FIB_PROTOCOL_IP6;
- pfx.fp_len = steer_pl->classify.mask_width;
- pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6;
- fib_table_entry_delete (fib_table_find
- (FIB_PROTOCOL_IP6,
- steer_pl->classify.fib_table), &pfx,
- FIB_SOURCE_SR);
- }
- else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
- {
- pfx.fp_proto = FIB_PROTOCOL_IP4;
- pfx.fp_len = steer_pl->classify.mask_width;
- pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4;
- fib_table_entry_delete (fib_table_find
- (FIB_PROTOCOL_IP4,
- steer_pl->classify.fib_table), &pfx,
- FIB_SOURCE_SR);
- }
- }
- /* Delete SR steering policy entry */
- pool_put (sm->steer_policies, steer_pl);
- mhash_unset (&sm->sr_steer_policies_hash, &key, NULL);
- if (mhash_elts (&sm->sr_steer_policies_hash) == 0)
- {
- mhash_free (&sm->sr_steer_policies_hash);
- sm->sr_steer_policies_hash.hash = NULL;
- }
- return 0;
-}
-
-static clib_error_t *
-sr_mpls_steer_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- int is_del = 0;
-
- ip46_address_t prefix, nh;
- u32 dst_mask_width = 0;
- u8 traffic_type = 0;
- u8 nh_type = 0;
- u32 fib_table = (u32) ~ 0, color = (u32) ~ 0;
- u32 co_bits = 0;
-
- mpls_label_t bsid, vpn_label = (u32) ~ 0;
-
- u8 sr_policy_set = 0;
-
- clib_memset (&prefix, 0, sizeof (ip46_address_t));
- clib_memset (&nh, 0, sizeof (ip46_address_t));
-
- int rv;
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "del"))
- is_del = 1;
- else if (!traffic_type
- && unformat (input, "l3 %U/%d", unformat_ip6_address,
- &prefix.ip6, &dst_mask_width))
- traffic_type = SR_STEER_IPV6;
- else if (!traffic_type
- && unformat (input, "l3 %U/%d", unformat_ip4_address,
- &prefix.ip4, &dst_mask_width))
- traffic_type = SR_STEER_IPV4;
- else if (!sr_policy_set
- && unformat (input, "via sr policy bsid %U",
- unformat_mpls_unicast_label, &bsid))
- sr_policy_set = 1;
- else if (!sr_policy_set
- && unformat (input, "via next-hop %U color %d co %d",
- unformat_ip4_address, &nh.ip4, &color, &co_bits))
- {
- sr_policy_set = 1;
- nh_type = SR_STEER_IPV4;
- }
- else if (!sr_policy_set
- && unformat (input, "via next-hop %U color %d co %d",
- unformat_ip6_address, &nh.ip6, &color, &co_bits))
- {
- sr_policy_set = 1;
- nh_type = SR_STEER_IPV6;
- }
- else if (fib_table == (u32) ~ 0
- && unformat (input, "fib-table %d", &fib_table));
- else if (unformat (input, "vpn-label %U",
- unformat_mpls_unicast_label, &vpn_label));
- else
- break;
- }
-
- if (!traffic_type)
- return clib_error_return (0, "No L3 traffic specified");
- if (!sr_policy_set)
- return clib_error_return (0, "No SR policy specified");
-
- /* Make sure that the prefixes are clean */
- if (traffic_type == SR_STEER_IPV4)
- {
- u32 mask =
- (dst_mask_width ? (0xFFFFFFFFu >> (32 - dst_mask_width)) : 0);
- prefix.ip4.as_u32 &= mask;
- }
- else if (traffic_type == SR_STEER_IPV6)
- {
- ip6_address_t mask;
- ip6_address_mask_from_width (&mask, dst_mask_width);
- ip6_address_mask (&prefix.ip6, &mask);
- }
-
- if (nh_type)
- bsid = (u32) ~ 0;
-
- if (is_del)
- rv =
- sr_mpls_steering_policy_del (&prefix, dst_mask_width,
- traffic_type, fib_table, color);
-
- else
- rv =
- sr_mpls_steering_policy_add (bsid, fib_table, &prefix, dst_mask_width,
- traffic_type, &nh, nh_type, color, co_bits,
- vpn_label);
-
- switch (rv)
- {
- case 0:
- break;
- case 1:
- return 0;
- case -1:
- return clib_error_return (0, "Incorrect API usage.");
- case -2:
- return clib_error_return (0, "The Next-Hop does not match.");
- case -3:
- return clib_error_return (0, "The color already exists.");
- case -4:
- return clib_error_return (0, "The co-bits do not match.");
- case -5:
- return clib_error_return (0, "The VPN-labels do not match.");
- default:
- return clib_error_return (0, "BUG: sr steer policy returns %d", rv);
- }
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND(sr_mpls_steer_policy_command, static)=
-{
- .path = "sr mpls steer",
- .short_help = "sr mpls steer (del) l3 <ip_addr/mask> "
- "via [sr policy bsid <mpls_label> || next-hop <ip46_addr> color <u32> co <0|1|2|3> ](fib-table <fib_table_index>)(vpn-label 500)",
- .long_help =
- "\tSteer L3 traffic through an existing SR policy.\n"
- "\tExamples:\n"
- "\t\tsr steer l3 2001::/64 via sr_policy bsid 29999\n"
- "\t\tsr steer del l3 2001::/64 via sr_policy bsid 29999\n"
- "\t\tsr steer l3 2001::/64 via next-hop 1.1.1.1 color 1234 co 0\n"
- "\t\tsr steer l3 2001::/64 via next-hop 2001::1 color 1234 co 2 vpn-label 500\n",
- .function = sr_mpls_steer_policy_command_fn,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-show_sr_mpls_steering_policies_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- mpls_sr_main_t *sm = &sr_mpls_main;
- mpls_sr_steering_policy_t **steer_policies = 0;
- mpls_sr_steering_policy_t *steer_pl;
-
- int i;
-
- vlib_cli_output (vm, "SR MPLS steering policies:");
- /* *INDENT-OFF* */
- pool_foreach (steer_pl, sm->steer_policies) {
- vec_add1(steer_policies, steer_pl);
- }
- /* *INDENT-ON* */
- for (i = 0; i < vec_len (steer_policies); i++)
- {
- vlib_cli_output (vm, "==========================");
- steer_pl = steer_policies[i];
- if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
- {
- vlib_cli_output (vm, "Prefix: %U/%d via:",
- format_ip4_address,
- &steer_pl->classify.prefix.ip4,
- steer_pl->classify.mask_width);
- }
- else if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
- {
- vlib_cli_output (vm, "Prefix: %U/%d via:",
- format_ip6_address,
- &steer_pl->classify.prefix.ip6,
- steer_pl->classify.mask_width);
- }
-
- if (steer_pl->bsid != (u32) ~ 0)
- {
- vlib_cli_output (vm, "· BSID %U",
- format_mpls_unicast_label, steer_pl->bsid);
- }
- else
- {
- if (steer_pl->nh_type == SR_STEER_IPV4)
- {
- vlib_cli_output (vm, "· Next-hop %U",
- format_ip4_address, &steer_pl->next_hop.ip4);
- }
- else if (steer_pl->nh_type == SR_STEER_IPV6)
- {
- vlib_cli_output (vm, "· Next-hop %U",
- format_ip6_address, &steer_pl->next_hop.ip6);
- }
-
- u32 *color_i = 0;
- u8 *s = NULL;
- s = format (s, "[ ");
- vec_foreach (color_i, steer_pl->color)
- {
- s = format (s, "%d, ", *color_i);
- }
- s = format (s, "\b\b ]");
- vlib_cli_output (vm, "· Color %s", s);
-
- switch (steer_pl->co_bits)
- {
- case SR_TE_CO_BITS_00:
- vlib_cli_output (vm, "· CO-bits: 00");
- break;
- case SR_TE_CO_BITS_01:
- vlib_cli_output (vm, "· CO-bits: 01");
- break;
- case SR_TE_CO_BITS_10:
- vlib_cli_output (vm, "· CO-bits: 10");
- break;
- case SR_TE_CO_BITS_11:
- vlib_cli_output (vm, "· CO-bits: 11");
- break;
- }
- }
- }
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND(show_sr_mpls_steering_policies_command, static)=
-{
- .path = "show sr mpls steering policies",
- .short_help = "show sr mpls steering policies",
- .function = show_sr_mpls_steering_policies_command_fn,
-};
-/* *INDENT-ON* */
-
-clib_error_t *
-sr_mpls_steering_init (vlib_main_t * vm)
-{
- mpls_sr_main_t *sm = &sr_mpls_main;
-
- /* Init memory for function keys */
- sm->sr_steer_policies_hash.hash = NULL;
-
- sm->fib_table_EC = (u32) ~ 0;
- sm->ec_labels = 0;
-
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_INIT_FUNCTION(sr_mpls_steering_init);
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables: eval: (c-set-style "gnu") End:
- */
diff --git a/src/vnet/srmpls/sr_mpls_test.c b/src/vnet/srmpls/sr_mpls_test.c
deleted file mode 100644
index e5d68462443..00000000000
--- a/src/vnet/srmpls/sr_mpls_test.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- *------------------------------------------------------------------
- * Copyright (c) 2021 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-#include <vat/vat.h>
-#include <vlibapi/api.h>
-#include <vlibmemory/api.h>
-#include <vppinfra/error.h>
-#include <vpp/api/types.h>
-
-#define __plugin_msg_base sr_mpls_test_main.msg_id_base
-#include <vlibapi/vat_helper_macros.h>
-
-/* Declare message IDs */
-#include <vnet/format_fns.h>
-#include <vnet/srmpls/sr_mpls.api_enum.h>
-#include <vnet/srmpls/sr_mpls.api_types.h>
-
-#define vl_endianfun /* define message structures */
-#include <vnet/srmpls/sr_mpls.api.h>
-#undef vl_endianfun
-
-typedef struct
-{
- /* API message ID base */
- u16 msg_id_base;
- u32 ping_id;
- vat_main_t *vat_main;
-} sr_mpls_test_main_t;
-
-static sr_mpls_test_main_t sr_mpls_test_main;
-
-static int
-api_sr_mpls_policy_mod (vat_main_t *vam)
-{
- return -1;
-}
-
-static int
-api_sr_mpls_steering_add_del (vat_main_t *vam)
-{
- return -1;
-}
-
-static int
-api_sr_mpls_policy_assign_endpoint_color (vat_main_t *vam)
-{
- return -1;
-}
-
-static int
-api_sr_mpls_policy_add (vat_main_t *vam)
-{
- unformat_input_t *i = vam->input;
- vl_api_sr_mpls_policy_add_t *mp;
- u32 bsid = 0;
- u32 weight = 1;
- u8 type = 0;
- u8 n_segments = 0;
- u32 sid;
- u32 *segments = NULL;
- int ret;
-
- /* Parse args required to build the message */
- while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (i, "bsid %d", &bsid))
- ;
- else if (unformat (i, "weight %d", &weight))
- ;
- else if (unformat (i, "spray"))
- type = 1;
- else if (unformat (i, "next %d", &sid))
- {
- n_segments += 1;
- vec_add1 (segments, htonl (sid));
- }
- else
- {
- clib_warning ("parse error '%U'", format_unformat_error, i);
- return -99;
- }
- }
-
- if (bsid == 0)
- {
- errmsg ("bsid not set");
- return -99;
- }
-
- if (n_segments == 0)
- {
- errmsg ("no sid in segment stack");
- return -99;
- }
-
- /* Construct the API message */
- M2 (SR_MPLS_POLICY_ADD, mp, sizeof (u32) * n_segments);
-
- mp->bsid = htonl (bsid);
- mp->weight = htonl (weight);
- mp->is_spray = type;
- mp->n_segments = n_segments;
- memcpy (mp->segments, segments, sizeof (u32) * n_segments);
- vec_free (segments);
-
- /* send it... */
- S (mp);
-
- /* Wait for a reply... */
- W (ret);
- return ret;
-}
-
-static int
-api_sr_mpls_policy_del (vat_main_t *vam)
-{
- unformat_input_t *i = vam->input;
- vl_api_sr_mpls_policy_del_t *mp;
- u32 bsid = 0;
- int ret;
-
- /* Parse args required to build the message */
- while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (i, "bsid %d", &bsid))
- ;
- else
- {
- clib_warning ("parse error '%U'", format_unformat_error, i);
- return -99;
- }
- }
-
- if (bsid == 0)
- {
- errmsg ("bsid not set");
- return -99;
- }
-
- /* Construct the API message */
- M (SR_MPLS_POLICY_DEL, mp);
-
- mp->bsid = htonl (bsid);
-
- /* send it... */
- S (mp);
-
- /* Wait for a reply... */
- W (ret);
- return ret;
-}
-
-#include <vnet/srmpls/sr_mpls.api_test.c>
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/srp/node.c b/src/vnet/srp/node.c
index 12c14012b61..26c3f0b8c1f 100644
--- a/src/vnet/srp/node.c
+++ b/src/vnet/srp/node.c
@@ -878,9 +878,11 @@ static clib_error_t * srp_init (vlib_main_t * vm)
sm->default_data_ttl = 255;
sm->vlib_main = vm;
- vlib_register_node (vm, &srp_ips_process_node);
- vlib_register_node (vm, &srp_input_node);
- vlib_register_node (vm, &srp_control_input_node);
+ vlib_register_node (vm, &srp_ips_process_node, "%s",
+ srp_ips_process_node.name);
+ vlib_register_node (vm, &srp_input_node, "%s", srp_input_node.name);
+ vlib_register_node (vm, &srp_control_input_node, "%s",
+ srp_control_input_node.name);
srp_setup_node (vm, srp_input_node.index);
return 0;
diff --git a/src/vnet/srp/packet.h b/src/vnet/srp/packet.h
index 96dab648b32..38296ac6ec8 100644
--- a/src/vnet/srp/packet.h
+++ b/src/vnet/srp/packet.h
@@ -40,8 +40,7 @@
#ifndef included_srp_packet_h
#define included_srp_packet_h
-#include <vppinfra/byte_order.h>
-#include <vppinfra/bitops.h>
+#include <vppinfra/clib.h>
#include <vnet/ethernet/packet.h>
/* SRP version 2. */
diff --git a/src/vnet/srv6/dir.dox b/src/vnet/srv6/dir.dox
index 3f539a58ef1..3f539a58ef1 100755..100644
--- a/src/vnet/srv6/dir.dox
+++ b/src/vnet/srv6/dir.dox
diff --git a/src/vnet/srv6/sr.api b/src/vnet/srv6/sr.api
index 6190a8c7ff5..4766ce3ba11 100644
--- a/src/vnet/srv6/sr.api
+++ b/src/vnet/srv6/sr.api
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-option version = "2.0.0";
+option version = "2.1.0";
import "vnet/interface_types.api";
import "vnet/ip/ip_types.api";
@@ -109,6 +109,65 @@ autoreply define sr_policy_mod
vl_api_srv6_sid_list_t sids;
};
+enum sr_policy_type : u8
+{
+ SR_API_POLICY_TYPE_DEFAULT = 0,
+ SR_API_POLICY_TYPE_SPRAY = 1,
+ SR_API_POLICY_TYPE_TEF = 2,
+};
+
+/** \brief IPv6 SR policy add
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bsid is the bindingSID of the SR Policy
+ @param weight is the weight of the sid list. optional.
+ @param is_encap is the behavior of the SR policy. (0.SRH insert // 1.Encapsulation)
+ @param type is the SR policy param. (0.Default // 1.Spray // 2.Tef)
+ @param fib_table is the VRF where to install the FIB entry for the BSID
+ @param sids is a srv6_sid_list object
+ @param encap_src is a encaps IPv6 source addr. optional.
+*/
+autoreply define sr_policy_add_v2
+{
+ u32 client_index;
+ u32 context;
+ vl_api_ip6_address_t bsid_addr;
+ u32 weight;
+ bool is_encap;
+ vl_api_sr_policy_type_t type [default=0x0];
+ u32 fib_table;
+ vl_api_srv6_sid_list_t sids;
+ vl_api_ip6_address_t encap_src;
+ option status="in_progress";
+};
+
+/** \brief IPv6 SR policy modification
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bsid is the bindingSID of the SR Policy
+ @param sr_policy_index is the index of the SR policy
+ @param fib_table is the VRF where to install the FIB entry for the BSID
+ @param operation is the operation to perform (among the top ones)
+ @param sl_index is the index of the Segment List to modify/delete
+ @param weight is the weight of the sid list. optional.
+ @param sids is a srv6_sid_list object
+ @param encap_src is a encaps IPv6 source addr. optional.
+*/
+autoreply define sr_policy_mod_v2
+{
+ u32 client_index;
+ u32 context;
+ vl_api_ip6_address_t bsid_addr;
+ u32 sr_policy_index;
+ u32 fib_table;
+ vl_api_sr_policy_op_t operation;
+ u32 sl_index;
+ u32 weight;
+ vl_api_srv6_sid_list_t sids;
+ vl_api_ip6_address_t encap_src;
+ option status="in_progress";
+};
+
/** \brief IPv6 SR policy deletion
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -195,12 +254,45 @@ define sr_localsids_details
u32 xconnect_iface_or_vrf_table;
};
+
+/** \brief Dump the list of SR LocalSIDs along with packet statistics
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define sr_localsids_with_packet_stats_dump
+{
+ u32 client_index;
+ u32 context;
+ option status="in_progress";
+};
+
+define sr_localsids_with_packet_stats_details
+{
+ u32 context;
+ vl_api_ip6_address_t addr;
+ bool end_psp;
+ vl_api_sr_behavior_t behavior;
+ u32 fib_table;
+ u32 vlan_index;
+ vl_api_address_t xconnect_nh_addr;
+ u32 xconnect_iface_or_vrf_table;
+ u64 good_traffic_bytes;
+ u64 good_traffic_pkt_count;
+ u64 bad_traffic_bytes;
+ u64 bad_traffic_pkt_count;
+ option status="in_progress";
+};
+
+
+
/** \brief Dump the list of SR policies
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
*/
define sr_policies_dump
{
+ option deprecated;
+
u32 client_index;
u32 context;
};
@@ -217,6 +309,28 @@ define sr_policies_details
vl_api_srv6_sid_list_t sid_lists[num_sid_lists];
};
+/** \brief Dump the list of SR policies v2
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define sr_policies_v2_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+define sr_policies_v2_details
+{
+ u32 context;
+ vl_api_ip6_address_t bsid;
+ vl_api_ip6_address_t encap_src;
+ vl_api_sr_policy_type_t type;
+ bool is_encap;
+ u32 fib_table;
+ u8 num_sid_lists;
+ vl_api_srv6_sid_list_t sid_lists[num_sid_lists];
+};
+
/** \brief Dump the list of SR policies along with actual segment list index on VPP
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/vnet/srv6/sr.h b/src/vnet/srv6/sr.h
index ea9ff709feb..c2867eb7508 100644
--- a/src/vnet/srv6/sr.h
+++ b/src/vnet/srv6/sr.h
@@ -56,13 +56,11 @@
#define SR_SEGMENT_LIST_WEIGHT_DEFAULT 1
-/* *INDENT-OFF* */
typedef struct
{
ip6_header_t ip;
ip6_sr_header_t sr;
} __attribute__ ((packed)) ip6srv_combo_header_t;
-/* *INDENT-ON* */
/**
* @brief SR Segment List (SID list)
@@ -90,6 +88,7 @@ typedef struct
/* SR policy types */
#define SR_POLICY_TYPE_DEFAULT 0
#define SR_POLICY_TYPE_SPRAY 1
+#define SR_POLICY_TYPE_TEF 2
/**
* @brief SR Policy
*/
@@ -111,6 +110,8 @@ typedef struct
u8 is_encap; /**< Mode (0 is SRH insert, 1 Encaps) */
+ ip6_address_t encap_src;
+
u16 plugin;
void *plugin_mem;
} ip6_sr_policy_t;
@@ -128,7 +129,7 @@ typedef struct
char end_psp; /**< Combined with End.PSP? */
- u16 behavior; /**< Behavior associated to this localsid */
+ u8 behavior; /**< Behavior associated to this localsid */
union
{
@@ -344,11 +345,12 @@ sr_policy_register_function (vlib_main_t * vm, u8 * fn_name,
sr_p_plugin_callback_t * removal_fn);
extern int sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments,
- u32 weight, u8 type, u32 fib_table, u8 is_encap,
- u16 plugin, void *plugin_mem);
-extern int sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table,
- u8 operation, ip6_address_t * segments,
- u32 sl_index, u32 weight);
+ ip6_address_t *encap_src, u32 weight, u8 type,
+ u32 fib_table, u8 is_encap, u16 plugin,
+ void *plugin_mem);
+extern int sr_policy_mod (ip6_address_t *bsid, u32 index, u32 fib_table,
+ u8 operation, ip6_address_t *segments,
+ ip6_address_t *encap_src, u32 sl_index, u32 weight);
extern int sr_policy_del (ip6_address_t * bsid, u32 index);
extern int
diff --git a/src/vnet/srv6/sr_api.c b/src/vnet/srv6/sr_api.c
index c68b355922b..a44c3098112 100644
--- a/src/vnet/srv6/sr_api.c
+++ b/src/vnet/srv6/sr_api.c
@@ -82,17 +82,16 @@ vl_api_sr_policy_add_t_handler (vl_api_sr_policy_add_t * mp)
ip6_address_decode (mp->bsid_addr, &bsid_addr);
-/*
- * sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments,
- * u32 weight, u8 behavior, u32 fib_table, u8 is_encap,
- * u16 behavior, void *plugin_mem)
- */
+ /*
+ * sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments,
+ * ip6_address_t *encap_src,
+ * u32 weight, u8 behavior, u32 fib_table, u8 is_encap,
+ * u16 behavior, void *plugin_mem)
+ */
int rv = 0;
- rv = sr_policy_add (&bsid_addr,
- segments,
- ntohl (mp->sids.weight),
- mp->is_spray, ntohl (mp->fib_table), mp->is_encap, 0,
- NULL);
+ rv =
+ sr_policy_add (&bsid_addr, segments, NULL, ntohl (mp->sids.weight),
+ mp->is_spray, ntohl (mp->fib_table), mp->is_encap, 0, NULL);
vec_free (segments);
REPLY_MACRO (VL_API_SR_POLICY_ADD_REPLY);
@@ -115,18 +114,93 @@ vl_api_sr_policy_mod_t_handler (vl_api_sr_policy_mod_t * mp)
ip6_address_decode (mp->bsid_addr, &bsid_addr);
int rv = 0;
-/*
- * int
- * sr_policy_mod(ip6_address_t *bsid, u32 index, u32 fib_table,
- * u8 operation, ip6_address_t *segments, u32 sl_index,
- * u32 weight, u8 is_encap)
- */
- rv = sr_policy_mod (&bsid_addr,
- ntohl (mp->sr_policy_index),
- ntohl (mp->fib_table),
- mp->operation,
- segments, ntohl (mp->sl_index),
- ntohl (mp->sids.weight));
+ /*
+ * int
+ * sr_policy_mod(ip6_address_t *bsid, u32 index, u32 fib_table,
+ * u8 operation, ip6_address_t *segments,
+ * ip6_address_t *encap_src, u32 sl_index,
+ * u32 weight, u8 is_encap)
+ */
+ rv = sr_policy_mod (&bsid_addr, ntohl (mp->sr_policy_index),
+ ntohl (mp->fib_table), mp->operation, segments, NULL,
+ ntohl (mp->sl_index), ntohl (mp->sids.weight));
+ vec_free (segments);
+
+ REPLY_MACRO (VL_API_SR_POLICY_MOD_REPLY);
+}
+
+static void
+vl_api_sr_policy_add_v2_t_handler (vl_api_sr_policy_add_v2_t *mp)
+{
+ vl_api_sr_policy_add_v2_reply_t *rmp;
+ ip6_address_t *segments = 0, *seg;
+ ip6_address_t bsid_addr;
+ ip6_address_t encap_src;
+
+ int i;
+ for (i = 0; i < mp->sids.num_sids; i++)
+ {
+ vec_add2 (segments, seg, 1);
+ ip6_address_decode (mp->sids.sids[i], seg);
+ }
+
+ ip6_address_decode (mp->bsid_addr, &bsid_addr);
+ ip6_address_decode (mp->encap_src, &encap_src);
+
+ if (ip6_address_is_zero (&encap_src))
+ {
+ encap_src = *sr_get_encaps_source ();
+ }
+ /*
+ * sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments,
+ * ip6_address_t *encap_src,
+ * u32 weight, u8 behavior, u32 fib_table, u8 is_encap,
+ * u16 behavior, void *plugin_mem)
+ */
+ int rv = 0;
+ rv =
+ sr_policy_add (&bsid_addr, segments, &encap_src, ntohl (mp->sids.weight),
+ mp->type, ntohl (mp->fib_table), mp->is_encap, 0, NULL);
+ vec_free (segments);
+
+ REPLY_MACRO (VL_API_SR_POLICY_ADD_V2_REPLY);
+}
+
+static void
+vl_api_sr_policy_mod_v2_t_handler (vl_api_sr_policy_mod_v2_t *mp)
+{
+ vl_api_sr_policy_mod_v2_reply_t *rmp;
+ ip6_address_t *segments = 0, *seg;
+ ip6_address_t bsid_addr;
+ ip6_address_t encap_src;
+
+ int i;
+ for (i = 0; i < mp->sids.num_sids; i++)
+ {
+ vec_add2 (segments, seg, 1);
+ ip6_address_decode (mp->sids.sids[i], seg);
+ }
+
+ ip6_address_decode (mp->bsid_addr, &bsid_addr);
+ ip6_address_decode (mp->encap_src, &encap_src);
+
+ if (ip6_address_is_zero (&encap_src))
+ {
+ encap_src = *sr_get_encaps_source ();
+ }
+
+ int rv = 0;
+ /*
+ * int
+ * sr_policy_mod(ip6_address_t *bsid, u32 index, u32 fib_table,
+ * u8 operation, ip6_address_t *segments,
+ * ip6_address_t *encap_src, u32 sl_index,
+ * u32 weight, u8 is_encap)
+ */
+ rv =
+ sr_policy_mod (&bsid_addr, ntohl (mp->sr_policy_index),
+ ntohl (mp->fib_table), mp->operation, segments, &encap_src,
+ ntohl (mp->sl_index), ntohl (mp->sids.weight));
vec_free (segments);
REPLY_MACRO (VL_API_SR_POLICY_MOD_REPLY);
@@ -217,7 +291,7 @@ static void send_sr_localsid_details
rmp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_SR_LOCALSIDS_DETAILS);
ip6_address_encode (&t->localsid, rmp->addr);
rmp->end_psp = t->end_psp;
- rmp->behavior = htons (t->behavior);
+ rmp->behavior = t->behavior;
rmp->fib_table = htonl (t->fib_table);
rmp->vlan_index = htonl (t->vlan_index);
ip_address_encode (&t->next_hop, IP46_TYPE_ANY, &rmp->xconnect_nh_addr);
@@ -247,12 +321,77 @@ static void vl_api_sr_localsids_dump_t_handler
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (t, sm->localsids)
{
send_sr_localsid_details(t, reg, mp->context);
}
- /* *INDENT-ON* */
+}
+
+static void
+send_sr_localsid_with_packet_stats_details (int local_sid_index,
+ ip6_sr_localsid_t *t,
+ vl_api_registration_t *reg,
+ u32 context)
+{
+ vl_api_sr_localsids_with_packet_stats_details_t *rmp;
+ vlib_counter_t good_traffic, bad_traffic;
+ ip6_sr_main_t *sm = &sr_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ clib_memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id =
+ ntohs (REPLY_MSG_ID_BASE + VL_API_SR_LOCALSIDS_WITH_PACKET_STATS_DETAILS);
+ ip6_address_encode (&t->localsid, rmp->addr);
+ rmp->end_psp = t->end_psp;
+ rmp->behavior = t->behavior;
+ rmp->fib_table = htonl (t->fib_table);
+ rmp->vlan_index = htonl (t->vlan_index);
+ ip_address_encode (&t->next_hop, IP46_TYPE_ANY, &rmp->xconnect_nh_addr);
+
+ if (t->behavior == SR_BEHAVIOR_T || t->behavior == SR_BEHAVIOR_DT6)
+ rmp->xconnect_iface_or_vrf_table =
+ htonl (fib_table_get_table_id (t->sw_if_index, FIB_PROTOCOL_IP6));
+ else if (t->behavior == SR_BEHAVIOR_DT4)
+ rmp->xconnect_iface_or_vrf_table =
+ htonl (fib_table_get_table_id (t->sw_if_index, FIB_PROTOCOL_IP4));
+ else
+ rmp->xconnect_iface_or_vrf_table = htonl (t->sw_if_index);
+
+ rmp->context = context;
+ vlib_get_combined_counter (&(sm->sr_ls_valid_counters), local_sid_index,
+ &good_traffic);
+ vlib_get_combined_counter (&(sm->sr_ls_invalid_counters), local_sid_index,
+ &bad_traffic);
+ rmp->good_traffic_bytes = clib_host_to_net_u64 (good_traffic.bytes);
+ rmp->good_traffic_pkt_count = clib_host_to_net_u64 (good_traffic.packets);
+ rmp->bad_traffic_bytes = clib_host_to_net_u64 (bad_traffic.bytes);
+ rmp->bad_traffic_pkt_count = clib_host_to_net_u64 (bad_traffic.packets);
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
+vl_api_sr_localsids_with_packet_stats_dump_t_handler (
+ vl_api_sr_localsids_with_packet_stats_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+ ip6_sr_main_t *sm = &sr_main;
+ ip6_sr_localsid_t **localsid_list = 0;
+ ip6_sr_localsid_t *t;
+ int i;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ pool_foreach (t, sm->localsids)
+ {
+ vec_add1 (localsid_list, t);
+ }
+ for (i = 0; i < vec_len (localsid_list); i++)
+ {
+ t = localsid_list[i];
+ send_sr_localsid_with_packet_stats_details (i, t, reg, mp->context);
+ }
}
static void send_sr_policies_details
@@ -312,15 +451,74 @@ vl_api_sr_policies_dump_t_handler (vl_api_sr_policies_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (t, sm->sr_policies)
{
send_sr_policies_details(t, reg, mp->context);
}
- /* *INDENT-ON* */
}
+static void
+send_sr_policies_v2_details (ip6_sr_policy_t *t, vl_api_registration_t *reg,
+ u32 context)
+{
+ vl_api_sr_policies_v2_details_t *rmp;
+ ip6_sr_main_t *sm = &sr_main;
+
+ u32 *sl_index, slidx = 0;
+ ip6_sr_sl_t *segment_list = 0;
+ ip6_address_t *segment;
+ vl_api_srv6_sid_list_t *api_sid_list;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp) + vec_len (t->segments_lists) *
+ sizeof (vl_api_srv6_sid_list_t));
+ clib_memset (rmp, 0,
+ (sizeof (*rmp) + vec_len (t->segments_lists) *
+ sizeof (vl_api_srv6_sid_list_t)));
+
+ rmp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_SR_POLICIES_V2_DETAILS);
+ ip6_address_encode (&t->bsid, rmp->bsid);
+ ip6_address_encode (&t->encap_src, rmp->encap_src);
+ rmp->is_encap = t->is_encap;
+ rmp->type = t->type;
+ rmp->fib_table = htonl (t->fib_table);
+ rmp->num_sid_lists = vec_len (t->segments_lists);
+
+ /* Fill in all the segments lists */
+ vec_foreach (sl_index, t->segments_lists)
+ {
+ segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
+
+ api_sid_list = &rmp->sid_lists[sl_index - t->segments_lists];
+
+ api_sid_list->num_sids = vec_len (segment_list->segments);
+ api_sid_list->weight = htonl (segment_list->weight);
+ slidx = 0;
+ vec_foreach (segment, segment_list->segments)
+ {
+ ip6_address_encode (segment, api_sid_list->sids[slidx++]);
+ }
+ }
+
+ rmp->context = context;
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
+vl_api_sr_policies_v2_dump_t_handler (vl_api_sr_policies_v2_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+ ip6_sr_main_t *sm = &sr_main;
+ ip6_sr_policy_t *t;
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ pool_foreach (t, sm->sr_policies)
+ {
+ send_sr_policies_v2_details (t, reg, mp->context);
+ }
+}
static void send_sr_policies_details_with_sl_index
(ip6_sr_policy_t * t, vl_api_registration_t * reg, u32 context)
@@ -381,12 +579,10 @@ static void
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (t, sm->sr_policies)
{
send_sr_policies_details_with_sl_index(t, reg, mp->context);
}
- /* *INDENT-ON* */
}
static void send_sr_steering_pol_details
@@ -428,12 +624,10 @@ static void vl_api_sr_steering_pol_dump_t_handler
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (t, sm->steer_policies)
{
send_sr_steering_pol_details(t, reg, mp->context);
}
- /* *INDENT-ON* */
}
#include <vnet/srv6/sr.api.c>
diff --git a/src/vnet/srv6/sr_localsid.c b/src/vnet/srv6/sr_localsid.c
index a055c923be9..12349bb95e8 100644
--- a/src/vnet/srv6/sr_localsid.c
+++ b/src/vnet/srv6/sr_localsid.c
@@ -396,12 +396,10 @@ sr_cli_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input,
sr_localsid_fn_registration_t **plugin_it = 0;
/* Create a vector out of the plugin pool as recommended */
- /* *INDENT-OFF* */
pool_foreach (plugin, sm->plugin_functions)
{
vec_add1 (vec_plugins, plugin);
}
- /* *INDENT-ON* */
vec_foreach (plugin_it, vec_plugins)
{
@@ -506,7 +504,6 @@ sr_cli_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (sr_localsid_command, static) = {
.path = "sr localsid",
.short_help = "sr localsid (del) address XX:XX::YY:YY"
@@ -534,7 +531,6 @@ VLIB_CLI_COMMAND (sr_localsid_command, static) = {
"\t\tParameters: '<ip4_fib_table>'\n",
.function = sr_cli_localsid_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief CLI function to 'show' all SR LocalSIDs on console.
@@ -551,9 +547,7 @@ show_sr_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "SRv6 - My LocalSID Table:");
vlib_cli_output (vm, "=========================");
- /* *INDENT-OFF* */
pool_foreach (ls, sm->localsids) { vec_add1 (localsid_list, ls); }
- /* *INDENT-ON* */
for (i = 0; i < vec_len (localsid_list); i++)
{
ls = localsid_list[i];
@@ -676,13 +670,11 @@ show_sr_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_sr_localsid_command, static) = {
.path = "show sr localsids",
.short_help = "show sr localsids",
.function = show_sr_localsid_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief Function to 'clear' ALL SR localsid counters
@@ -700,13 +692,11 @@ clear_sr_localsid_counters_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_sr_localsid_counters_command, static) = {
.path = "clear sr localsid-counters",
.short_help = "clear sr localsid-counters",
.function = clear_sr_localsid_counters_command_fn,
};
-/* *INDENT-ON* */
/************************ SR LocalSID graphs node ****************************/
/**
@@ -1438,7 +1428,6 @@ sr_localsid_d_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_localsid_d_node) = {
.function = sr_localsid_d_fn,
.name = "sr-localsid-d",
@@ -1454,7 +1443,6 @@ VLIB_REGISTER_NODE (sr_localsid_d_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/**
* @brief SR LocalSID graph node. Supports all default SR Endpoint without decaps
@@ -1748,7 +1736,6 @@ sr_localsid_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_localsid_node) = {
.function = sr_localsid_fn,
.name = "sr-localsid",
@@ -1764,7 +1751,6 @@ VLIB_REGISTER_NODE (sr_localsid_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/**
* @brief SR LocalSID uN graph node. Supports all default SR Endpoint without decaps
@@ -2058,7 +2044,6 @@ sr_localsid_un_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_localsid_un_node) = {
.function = sr_localsid_un_fn,
.name = "sr-localsid-un",
@@ -2074,7 +2059,6 @@ VLIB_REGISTER_NODE (sr_localsid_un_node) = {
#undef _
},
};
-/* *INDENT-ON* */
static uword
sr_localsid_un_perf_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
@@ -2270,7 +2254,6 @@ sr_localsid_un_perf_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_localsid_un_perf_node) = {
.function = sr_localsid_un_perf_fn,
.name = "sr-localsid-un-perf",
@@ -2286,7 +2269,6 @@ VLIB_REGISTER_NODE (sr_localsid_un_perf_node) = {
#undef _
},
};
-/* *INDENT-ON* */
static u8 *
format_sr_dpo (u8 * s, va_list * args)
@@ -2406,10 +2388,8 @@ show_sr_localsid_behaviors_command_fn (vlib_main_t * vm,
vlib_cli_output (vm,
"SR LocalSIDs behaviors:\n-----------------------\n\n");
- /* *INDENT-OFF* */
pool_foreach (plugin, sm->plugin_functions)
{ vec_add1 (plugins_vec, plugin); }
- /* *INDENT-ON* */
/* Print static behaviors */
vlib_cli_output (vm, "Default behaviors:\n"
@@ -2439,13 +2419,11 @@ show_sr_localsid_behaviors_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_sr_localsid_behaviors_command, static) = {
.path = "show sr localsids behaviors",
.short_help = "show sr localsids behaviors",
.function = show_sr_localsid_behaviors_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief SR LocalSID initialization
diff --git a/src/vnet/srv6/sr_packet.h b/src/vnet/srv6/sr_packet.h
index dda776b4037..cf9fcb70bcc 100644
--- a/src/vnet/srv6/sr_packet.h
+++ b/src/vnet/srv6/sr_packet.h
@@ -116,6 +116,9 @@
#define ROUTING_HEADER_TYPE_SR 4
+#define IP6_SRH_PT_TLV_TYPE 128
+#define IP6_SRH_PT_TLV_LEN 14
+
typedef struct
{
/* Protocol for next header. */
@@ -156,6 +159,21 @@ typedef struct
u8 value[0];
} __attribute__ ((packed)) ip6_sr_tlv_t;
+typedef struct
+{
+ u32 sec;
+ u32 nsec;
+} __attribute__ ((packed)) timestamp_64_t;
+
+typedef struct
+{
+ u8 type;
+ u8 length;
+ u16 id_ld;
+ timestamp_64_t t64;
+ u16 session_id;
+ u16 seq_num;
+} __attribute__ ((packed)) ip6_sr_pt_tlv_t;
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/srv6/sr_policy_rewrite.c b/src/vnet/srv6/sr_policy_rewrite.c
index 500772e8065..0aa88cc273e 100644
--- a/src/vnet/srv6/sr_policy_rewrite.c
+++ b/src/vnet/srv6/sr_policy_rewrite.c
@@ -47,7 +47,9 @@
#include <vnet/fib/ip6_fib.h>
#include <vnet/dpo/dpo.h>
#include <vnet/dpo/replicate_dpo.h>
+#include <vnet/srv6/sr_pt.h>
+#include <vppinfra/byte_order.h>
#include <vppinfra/error.h>
#include <vppinfra/elog.h>
@@ -140,13 +142,11 @@ set_sr_src_command_fn (vlib_main_t * vm, unformat_input_t * input,
return clib_error_return (0, "No address specified");
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_sr_src_command, static) = {
.path = "set sr encaps source",
.short_help = "set sr encaps source addr <ip6_addr>",
.function = set_sr_src_command_fn,
};
-/* *INDENT-ON* */
/******************** SR rewrite set encaps IPv6 hop-limit ********************/
@@ -178,34 +178,40 @@ set_sr_hop_limit_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_sr_hop_limit_command, static) = {
.path = "set sr encaps hop-limit",
.short_help = "set sr encaps hop-limit <value>",
.function = set_sr_hop_limit_command_fn,
};
-/* *INDENT-ON* */
/*********************** SR rewrite string computation ************************/
/**
* @brief SR rewrite string computation for IPv6 encapsulation (inline)
*
* @param sl is a vector of IPv6 addresses composing the Segment List
+ * @param src_v6addr is a encaps IPv6 source addr
*
* @return precomputed rewrite string for encapsulation
*/
static inline u8 *
-compute_rewrite_encaps (ip6_address_t *sl, u8 type)
+compute_rewrite_encaps (ip6_address_t *sl, ip6_address_t *src_v6addr, u8 type)
{
ip6_header_t *iph;
ip6_sr_header_t *srh;
+ ip6_sr_pt_tlv_t *srh_pt_tlv;
ip6_address_t *addrp, *this_address;
u32 header_length = 0;
u8 *rs = NULL;
header_length = 0;
header_length += IPv6_DEFAULT_HEADER_LENGTH;
- if (vec_len (sl) > 1)
+ if (type == SR_POLICY_TYPE_TEF)
+ {
+ header_length += sizeof (ip6_sr_header_t);
+ header_length += vec_len (sl) * sizeof (ip6_address_t);
+ header_length += sizeof (ip6_sr_pt_tlv_t);
+ }
+ else if (vec_len (sl) > 1)
{
header_length += sizeof (ip6_sr_header_t);
header_length += vec_len (sl) * sizeof (ip6_address_t);
@@ -216,13 +222,39 @@ compute_rewrite_encaps (ip6_address_t *sl, u8 type)
iph = (ip6_header_t *) rs;
iph->ip_version_traffic_class_and_flow_label =
clib_host_to_net_u32 (0 | ((6 & 0xF) << 28));
- iph->src_address.as_u64[0] = sr_pr_encaps_src.as_u64[0];
- iph->src_address.as_u64[1] = sr_pr_encaps_src.as_u64[1];
+ iph->src_address.as_u64[0] = src_v6addr->as_u64[0];
+ iph->src_address.as_u64[1] = src_v6addr->as_u64[1];
iph->payload_length = header_length - IPv6_DEFAULT_HEADER_LENGTH;
iph->protocol = IP_PROTOCOL_IPV6;
iph->hop_limit = sr_pr_encaps_hop_limit;
- if (vec_len (sl) > 1)
+ if (type == SR_POLICY_TYPE_TEF)
+ {
+ srh = (ip6_sr_header_t *) (iph + 1);
+ iph->protocol = IP_PROTOCOL_IPV6_ROUTE;
+ srh->protocol = IP_PROTOCOL_IPV6;
+ srh->type = ROUTING_HEADER_TYPE_SR;
+ srh->flags = 0x00;
+ srh->tag = 0x0000;
+ srh->segments_left = vec_len (sl) - 1;
+ srh->last_entry = vec_len (sl) - 1;
+ srh->length =
+ ((sizeof (ip6_sr_header_t) + (vec_len (sl) * sizeof (ip6_address_t)) +
+ sizeof (ip6_sr_pt_tlv_t)) /
+ 8) -
+ 1;
+ addrp = srh->segments + vec_len (sl) - 1;
+ vec_foreach (this_address, sl)
+ {
+ clib_memcpy_fast (addrp->as_u8, this_address->as_u8,
+ sizeof (ip6_address_t));
+ addrp--;
+ }
+ srh_pt_tlv = (ip6_sr_pt_tlv_t *) (srh->segments + vec_len (sl));
+ srh_pt_tlv->type = IP6_SRH_PT_TLV_TYPE;
+ srh_pt_tlv->length = IP6_SRH_PT_TLV_LEN;
+ }
+ else if (vec_len (sl) > 1)
{
srh = (ip6_sr_header_t *) (iph + 1);
iph->protocol = IP_PROTOCOL_IPV6_ROUTE;
@@ -335,18 +367,20 @@ compute_rewrite_bsid (ip6_address_t * sl)
*
* @param sr_policy is the SR policy where the SL will be added
* @param sl is a vector of IPv6 addresses composing the Segment List
+ * @param encap_src is a encaps IPv6 source addr. optional.
* @param weight is the weight of the SegmentList (for load-balancing purposes)
* @param is_encap represents the mode (SRH insertion vs Encapsulation)
*
* @return pointer to the just created segment list
*/
static inline ip6_sr_sl_t *
-create_sl (ip6_sr_policy_t * sr_policy, ip6_address_t * sl, u32 weight,
- u8 is_encap)
+create_sl (ip6_sr_policy_t *sr_policy, ip6_address_t *sl,
+ ip6_address_t *encap_src, u32 weight, u8 is_encap)
{
ip6_sr_main_t *sm = &sr_main;
ip6_sr_sl_t *segment_list;
sr_policy_fn_registration_t *plugin = 0;
+ ip6_address_t encap_srcv6 = sr_pr_encaps_src;
pool_get (sm->sid_lists, segment_list);
clib_memset (segment_list, 0, sizeof (*segment_list));
@@ -365,8 +399,14 @@ create_sl (ip6_sr_policy_t * sr_policy, ip6_address_t * sl, u32 weight,
if (is_encap)
{
- segment_list->rewrite = compute_rewrite_encaps (sl, sr_policy->type);
+ if (encap_src)
+ {
+ clib_memcpy_fast (&encap_srcv6, encap_src, sizeof (ip6_address_t));
+ }
+ segment_list->rewrite =
+ compute_rewrite_encaps (sl, &encap_srcv6, sr_policy->type);
segment_list->rewrite_bsid = segment_list->rewrite;
+ sr_policy->encap_src = encap_srcv6;
}
else
{
@@ -625,17 +665,19 @@ update_replicate (ip6_sr_policy_t * sr_policy)
*
* @param bsid is the bindingSID of the SR Policy
* @param segments is a vector of IPv6 address composing the segment list
+ * @param encap_src is a encaps IPv6 source addr. optional.
* @param weight is the weight of the sid list. optional.
* @param behavior is the behavior of the SR policy. (default//spray)
* @param fib_table is the VRF where to install the FIB entry for the BSID
- * @param is_encap (bool) whether SR policy should behave as Encap/SRH Insertion
+ * @param is_encap (bool) whether SR policy should behave as Encap/SRH
+ * Insertion
*
* @return 0 if correct, else error
*/
int
-sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments, u32 weight,
- u8 type, u32 fib_table, u8 is_encap, u16 plugin,
- void *ls_plugin_mem)
+sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments,
+ ip6_address_t *encap_src, u32 weight, u8 type, u32 fib_table,
+ u8 is_encap, u16 plugin, void *ls_plugin_mem)
{
ip6_sr_main_t *sm = &sr_main;
ip6_sr_policy_t *sr_policy = 0;
@@ -691,7 +733,7 @@ sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments, u32 weight,
NULL);
/* Create a segment list and add the index to the SR policy */
- create_sl (sr_policy, segments, weight, is_encap);
+ create_sl (sr_policy, segments, encap_src, weight, is_encap);
/* If FIB doesnt exist, create them */
if (sm->fib_table_ip6 == (u32) ~ 0)
@@ -705,7 +747,8 @@ sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments, u32 weight,
}
/* Create IPv6 FIB for the BindingSID attached to the DPO of the only SL */
- if (sr_policy->type == SR_POLICY_TYPE_DEFAULT)
+ if (sr_policy->type == SR_POLICY_TYPE_DEFAULT ||
+ sr_policy->type == SR_POLICY_TYPE_TEF)
update_lb (sr_policy);
else if (sr_policy->type == SR_POLICY_TYPE_SPRAY)
update_replicate (sr_policy);
@@ -740,8 +783,6 @@ sr_policy_del (ip6_address_t * bsid, u32 index)
else
{
sr_policy = pool_elt_at_index (sm->sr_policies, index);
- if (!sr_policy)
- return -1;
}
/* Remove BindingSID FIB entry */
@@ -822,6 +863,7 @@ sr_policy_del (ip6_address_t * bsid, u32 index)
* @param fib_table is the VRF where to install the FIB entry for the BSID
* @param operation is the operation to perform (among the top ones)
* @param segments is a vector of IPv6 address composing the segment list
+ * @param encap_src is a encaps IPv6 source addr. optional.
* @param sl_index is the index of the Segment List to modify/delete
* @param weight is the weight of the sid list. optional.
* @param is_encap Mode. Encapsulation or SRH insertion.
@@ -829,8 +871,8 @@ sr_policy_del (ip6_address_t * bsid, u32 index)
* @return 0 if correct, else error
*/
int
-sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table,
- u8 operation, ip6_address_t * segments, u32 sl_index,
+sr_policy_mod (ip6_address_t *bsid, u32 index, u32 fib_table, u8 operation,
+ ip6_address_t *segments, ip6_address_t *encap_src, u32 sl_index,
u32 weight)
{
ip6_sr_main_t *sm = &sr_main;
@@ -850,15 +892,13 @@ sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table,
else
{
sr_policy = pool_elt_at_index (sm->sr_policies, index);
- if (!sr_policy)
- return -1;
}
if (operation == 1) /* Add SR List to an existing SR policy */
{
/* Create the new SL */
- segment_list =
- create_sl (sr_policy, segments, weight, sr_policy->is_encap);
+ segment_list = create_sl (sr_policy, segments, encap_src, weight,
+ sr_policy->is_encap);
/* Create a new LB DPO */
if (sr_policy->type == SR_POLICY_TYPE_DEFAULT)
@@ -931,7 +971,7 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
int rv = -1;
char is_del = 0, is_add = 0, is_mod = 0;
char policy_set = 0;
- ip6_address_t bsid, next_address;
+ ip6_address_t bsid, next_address, src_v6addr;
u32 sr_policy_index = (u32) ~ 0, sl_index = (u32) ~ 0;
u32 weight = (u32) ~ 0, fib_table = (u32) ~ 0;
ip6_address_t *segments = 0, *this_seg;
@@ -940,6 +980,7 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
u8 type = SR_POLICY_TYPE_DEFAULT;
u16 behavior = 0;
void *ls_plugin_mem = 0;
+ ip6_address_t *encap_src = 0;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
@@ -963,6 +1004,10 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
clib_memcpy_fast (this_seg->as_u8, next_address.as_u8,
sizeof (*this_seg));
}
+ else if (unformat (input, "v6src %U", unformat_ip6_address, &src_v6addr))
+ {
+ encap_src = &src_v6addr;
+ }
else if (unformat (input, "add sl"))
operation = 1;
else if (unformat (input, "del sl index %d", &sl_index))
@@ -977,17 +1022,17 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
is_encap = 0;
else if (unformat (input, "spray"))
type = SR_POLICY_TYPE_SPRAY;
+ else if (unformat (input, "tef"))
+ type = SR_POLICY_TYPE_TEF;
else if (!behavior && unformat (input, "behavior"))
{
sr_policy_fn_registration_t *plugin = 0, **vec_plugins = 0;
sr_policy_fn_registration_t **plugin_it = 0;
- /* *INDENT-OFF* */
pool_foreach (plugin, sm->policy_plugin_functions)
{
vec_add1 (vec_plugins, plugin);
}
- /* *INDENT-ON* */
vec_foreach (plugin_it, vec_plugins)
{
@@ -1025,8 +1070,8 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (vec_len (segments) == 0)
return clib_error_return (0, "No Segment List specified");
- rv = sr_policy_add (&bsid, segments, weight, type, fib_table, is_encap,
- behavior, ls_plugin_mem);
+ rv = sr_policy_add (&bsid, segments, encap_src, weight, type, fib_table,
+ is_encap, behavior, ls_plugin_mem);
vec_free (segments);
}
@@ -1044,9 +1089,9 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (operation == 3 && weight == (u32) ~ 0)
return clib_error_return (0, "No new weight for the SL specified");
- rv = sr_policy_mod ((sr_policy_index != (u32) ~ 0 ? NULL : &bsid),
+ rv = sr_policy_mod ((sr_policy_index != (u32) ~0 ? NULL : &bsid),
sr_policy_index, fib_table, operation, segments,
- sl_index, weight);
+ encap_src, sl_index, weight);
if (segments)
vec_free (segments);
@@ -1082,7 +1127,6 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (sr_policy_command, static) = {
.path = "sr policy",
.short_help = "sr policy [add||del||mod] [bsid 2001::1||index 5] "
@@ -1102,7 +1146,6 @@ VLIB_CLI_COMMAND (sr_policy_command, static) = {
"SID lists.\n",
.function = sr_policy_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief CLI to display onscreen all the SR policies
@@ -1122,10 +1165,8 @@ show_sr_policies_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "SR policies:");
- /* *INDENT-OFF* */
pool_foreach (sr_policy, sm->sr_policies)
{vec_add1 (vec_policies, sr_policy); }
- /* *INDENT-ON* */
vec_foreach_index (i, vec_policies)
{
@@ -1136,11 +1177,20 @@ show_sr_policies_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "\tBehavior: %s",
(sr_policy->is_encap ? "Encapsulation" :
"SRH insertion"));
+ if (sr_policy->is_encap)
+ {
+ vlib_cli_output (vm, "\tEncapSrcIP: %U", format_ip6_address,
+ &sr_policy->encap_src);
+ }
switch (sr_policy->type)
{
case SR_POLICY_TYPE_SPRAY:
vlib_cli_output (vm, "\tType: %s", "Spray");
break;
+ case SR_POLICY_TYPE_TEF:
+ vlib_cli_output (vm, "\tType: %s",
+ "TEF (Timestamp, Encapsulate, and Forward)");
+ break;
default:
vlib_cli_output (vm, "\tType: %s", "Default");
break;
@@ -1168,13 +1218,11 @@ show_sr_policies_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_sr_policies_command, static) = {
.path = "show sr policies",
.short_help = "show sr policies",
.function = show_sr_policies_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief CLI to display onscreen the SR encaps source addr
@@ -1189,13 +1237,11 @@ show_sr_encaps_source_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_sr_encaps_source_command, static) = {
.path = "show sr encaps source addr",
.short_help = "show sr encaps source addr",
.function = show_sr_encaps_source_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief CLI to display onscreen the hop-limit value used for SRv6 encapsulation
@@ -1210,13 +1256,11 @@ show_sr_encaps_hop_limit_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_sr_encaps_hop_limit_command, static) = {
.path = "show sr encaps hop-limit",
.short_help = "show sr encaps hop-limit",
.function = show_sr_encaps_hop_limit_command_fn,
};
-/* *INDENT-ON* */
/*************************** SR rewrite graph node ****************************/
/**
@@ -1236,6 +1280,36 @@ format_sr_policy_rewrite_trace (u8 * s, va_list * args)
return s;
}
+/**
+ * @brief SRv6 TEF (Timestamp, Encapsulate, and Forward) behavior
+ */
+static_always_inline void
+srv6_tef_behavior (vlib_node_runtime_t *node, vlib_buffer_t *b0,
+ ip6_header_t *ip0)
+{
+ ip6_sr_header_t *srh;
+ ip6_sr_pt_tlv_t *srh_pt_tlv;
+ timestamp_64_t ts;
+ sr_pt_iface_t *ls = 0;
+ u16 id_ld = 0;
+ srh = (ip6_sr_header_t *) (ip0 + 1);
+
+ srh_pt_tlv =
+ (ip6_sr_pt_tlv_t *) ((u8 *) ip0 + sizeof (ip6_header_t) +
+ sizeof (ip6_sr_header_t) +
+ sizeof (ip6_address_t) * (srh->last_entry + 1));
+
+ unix_time_now_nsec_fraction (&ts.sec, &ts.nsec);
+ srh_pt_tlv->t64.sec = clib_host_to_net_u32 (ts.sec);
+ srh_pt_tlv->t64.nsec = clib_host_to_net_u32 (ts.nsec);
+ ls = sr_pt_find_iface (vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+ if (ls)
+ {
+ id_ld = ls->id << 4;
+ id_ld |= ls->ingress_load;
+ srh_pt_tlv->id_ld = clib_host_to_net_u16 (id_ld);
+ }
+}
/**
* @brief IPv6 encapsulation processing as per RFC2473
@@ -1261,6 +1335,8 @@ encaps_processing_v6 (vlib_node_runtime_t *node, vlib_buffer_t *b0,
ip0_encap->ip_version_traffic_class_and_flow_label) &
0xfff00000) |
(flow_label & 0x0000ffff));
+ if (policy_type == SR_POLICY_TYPE_TEF)
+ srv6_tef_behavior (node, b0, ip0);
}
/**
@@ -1502,7 +1578,6 @@ sr_policy_rewrite_encaps (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_node) = {
.function = sr_policy_rewrite_encaps,
.name = "sr-pl-rewrite-encaps",
@@ -1518,7 +1593,6 @@ VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/**
* @brief IPv4 encapsulation processing as per RFC2473
@@ -1795,7 +1869,6 @@ sr_policy_rewrite_encaps_v4 (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_v4_node) = {
.function = sr_policy_rewrite_encaps_v4,
.name = "sr-pl-rewrite-encaps-v4",
@@ -1811,7 +1884,6 @@ VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_v4_node) = {
#undef _
},
};
-/* *INDENT-ON* */
always_inline u32
ip_flow_hash (void *data)
@@ -2237,7 +2309,6 @@ sr_policy_rewrite_encaps_l2 (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_l2_node) = {
.function = sr_policy_rewrite_encaps_l2,
.name = "sr-pl-rewrite-encaps-l2",
@@ -2253,7 +2324,6 @@ VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_l2_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/**
* @brief Graph node for applying a SR policy into a packet. SRH insertion.
@@ -2659,7 +2729,6 @@ sr_policy_rewrite_insert (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_policy_rewrite_insert_node) = {
.function = sr_policy_rewrite_insert,
.name = "sr-pl-rewrite-insert",
@@ -2675,7 +2744,6 @@ VLIB_REGISTER_NODE (sr_policy_rewrite_insert_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/**
* @brief Graph node for applying a SR policy into a packet. BSID - SRH insertion.
@@ -3070,7 +3138,6 @@ sr_policy_rewrite_b_insert (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_policy_rewrite_b_insert_node) = {
.function = sr_policy_rewrite_b_insert,
.name = "sr-pl-rewrite-b-insert",
@@ -3086,7 +3153,6 @@ VLIB_REGISTER_NODE (sr_policy_rewrite_b_insert_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/**
* @brief Function BSID encapsulation
@@ -3112,6 +3178,8 @@ end_bsid_encaps_srh_processing (vlib_node_runtime_t *node, vlib_buffer_t *b0,
ip0->dst_address.as_u64[1] = new_dst0->as_u64[1];
return;
}
+ else if (sr0->segments_left == 0 && policy_type == SR_POLICY_TYPE_TEF)
+ return;
}
error_bsid_encaps:
@@ -3379,7 +3447,6 @@ sr_policy_rewrite_b_encaps (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_policy_rewrite_b_encaps_node) = {
.function = sr_policy_rewrite_b_encaps,
.name = "sr-pl-rewrite-b-encaps",
@@ -3395,7 +3462,6 @@ VLIB_REGISTER_NODE (sr_policy_rewrite_b_encaps_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/*************************** SR Policy plugins ******************************/
/**
@@ -3463,10 +3529,8 @@ show_sr_policy_behaviors_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "SR Policy behaviors:\n-----------------------\n\n");
- /* *INDENT-OFF* */
pool_foreach (plugin, sm->policy_plugin_functions)
{ vec_add1 (plugins_vec, plugin); }
- /* *INDENT-ON* */
vlib_cli_output (vm, "Plugin behaviors:\n");
for (i = 0; i < vec_len (plugins_vec); i++)
@@ -3479,13 +3543,11 @@ show_sr_policy_behaviors_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_sr_policy_behaviors_command, static) = {
.path = "show sr policy behaviors",
.short_help = "show sr policy behaviors",
.function = show_sr_policy_behaviors_command_fn,
};
-/* *INDENT-ON* */
/*************************** SR Segment Lists DPOs ****************************/
static u8 *
diff --git a/src/vnet/srv6/sr_pt.api b/src/vnet/srv6/sr_pt.api
new file mode 100644
index 00000000000..e86359b421f
--- /dev/null
+++ b/src/vnet/srv6/sr_pt.api
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+option version = "1.0.0";
+
+import "vnet/interface_types.api";
+
+/** \brief SR PT iface dump request
+ @param client_index - opaque cookie to identifty the sender
+ @param context - sender context, to match reply w/ request
+*/
+define sr_pt_iface_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+define sr_pt_iface_details
+{
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ u16 id;
+ u8 ingress_load;
+ u8 egress_load;
+ u8 tts_template;
+};
+
+/** \brief SR PT iface add request
+ @param client_index - opaque cookie to identifty the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - index of the interface to add to SR PT
+ @param id - SR PT interface id
+ @param ingress_load - incoming interface load
+ @param egress_load - outgoing interface load
+ @param tts_template - truncated timestamp template to use
+*/
+autoreply define sr_pt_iface_add
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ u16 id;
+ u8 ingress_load;
+ u8 egress_load;
+ u8 tts_template;
+};
+
+/** \brief SR PT iface del request
+ @param client_index - opaque cookie to identifty the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - index of the interface to delete from SR PT
+*/
+autoreply define sr_pt_iface_del
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+}; \ No newline at end of file
diff --git a/src/vnet/srv6/sr_pt.c b/src/vnet/srv6/sr_pt.c
new file mode 100644
index 00000000000..6299faa84ab
--- /dev/null
+++ b/src/vnet/srv6/sr_pt.c
@@ -0,0 +1,281 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+/**
+ * @file
+ * @brief SR Path Tracing (PT)
+ *
+ * SR PT CLI
+ *
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/srv6/sr.h>
+#include <vnet/ip/ip.h>
+#include <vnet/srv6/sr_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/adj/adj.h>
+#include <vnet/srv6/sr_pt.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+sr_pt_main_t sr_pt_main;
+
+void *
+sr_pt_find_iface (u32 iface)
+{
+ sr_pt_main_t *sr_pt = &sr_pt_main;
+ uword *p;
+
+ /* Search for the item */
+ p = mhash_get (&sr_pt->sr_pt_iface_index_hash, &iface);
+ if (p)
+ {
+ /* Retrieve sr_pt_iface */
+ return pool_elt_at_index (sr_pt->sr_pt_iface, p[0]);
+ }
+ return NULL;
+}
+
+int
+sr_pt_add_iface (u32 iface, u16 id, u8 ingress_load, u8 egress_load,
+ u8 tts_template)
+{
+ sr_pt_main_t *sr_pt = &sr_pt_main;
+ uword *p;
+
+ sr_pt_iface_t *ls = 0;
+
+ if (iface == (u32) ~0)
+ return SR_PT_ERR_IFACE_INVALID;
+
+ /* Search for the item */
+ p = mhash_get (&sr_pt->sr_pt_iface_index_hash, &iface);
+
+ if (p)
+ return SR_PT_ERR_EXIST;
+
+ if (id > SR_PT_ID_MAX)
+ return SR_PT_ERR_ID_INVALID;
+
+ if (ingress_load > SR_PT_LOAD_MAX || egress_load > SR_PT_LOAD_MAX)
+ return SR_PT_ERR_LOAD_INVALID;
+
+ if (tts_template > SR_PT_TTS_TEMPLATE_MAX)
+ return SR_PT_ERR_TTS_TEMPLATE_INVALID;
+
+ vnet_feature_enable_disable ("ip6-output", "pt", iface, 1, 0, 0);
+
+ /* Create a new sr_pt_iface */
+ pool_get_zero (sr_pt->sr_pt_iface, ls);
+ ls->iface = iface;
+ ls->id = id;
+ ls->ingress_load = ingress_load;
+ ls->egress_load = egress_load;
+ ls->tts_template = tts_template;
+
+ /* Set hash key for searching sr_pt_iface by iface */
+ mhash_set (&sr_pt->sr_pt_iface_index_hash, &iface, ls - sr_pt->sr_pt_iface,
+ NULL);
+ return 0;
+}
+
+int
+sr_pt_del_iface (u32 iface)
+{
+ sr_pt_main_t *sr_pt = &sr_pt_main;
+ uword *p;
+
+ sr_pt_iface_t *ls = 0;
+
+ if (iface == (u32) ~0)
+ return SR_PT_ERR_IFACE_INVALID;
+
+ /* Search for the item */
+ p = mhash_get (&sr_pt->sr_pt_iface_index_hash, &iface);
+
+ if (p)
+ {
+ /* Retrieve sr_pt_iface */
+ ls = pool_elt_at_index (sr_pt->sr_pt_iface, p[0]);
+ vnet_feature_enable_disable ("ip6-output", "pt", iface, 0, 0, 0);
+ /* Delete sr_pt_iface */
+ pool_put (sr_pt->sr_pt_iface, ls);
+ mhash_unset (&sr_pt->sr_pt_iface_index_hash, &iface, NULL);
+ }
+ else
+ {
+ return SR_PT_ERR_NOENT;
+ }
+ return 0;
+}
+
+/**
+ * @brief "sr pt add iface" CLI function.
+ *
+ * @see sr_pt_add_iface
+ */
+static clib_error_t *
+sr_pt_add_iface_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 iface = (u32) ~0;
+ u32 id = (u32) ~0;
+ u32 ingress_load = 0;
+ u32 egress_load = 0;
+ u32 tts_template = SR_PT_TTS_TEMPLATE_DEFAULT;
+
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, &iface))
+ ;
+ else if (unformat (input, "id %u", &id))
+ ;
+ else if (unformat (input, "ingress-load %u", &ingress_load))
+ ;
+ else if (unformat (input, "egress-load %u", &egress_load))
+ ;
+ else if (unformat (input, "tts-template %u", &tts_template))
+ ;
+ else
+ break;
+ }
+
+ rv = sr_pt_add_iface (iface, id, ingress_load, egress_load, tts_template);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ case SR_PT_ERR_EXIST:
+ return clib_error_return (0, "Error: Identical iface already exists.");
+ case SR_PT_ERR_IFACE_INVALID:
+ return clib_error_return (0, "Error: The iface name invalid.");
+ case SR_PT_ERR_ID_INVALID:
+ return clib_error_return (0, "Error: The iface id value invalid.");
+ case SR_PT_ERR_LOAD_INVALID:
+ return clib_error_return (
+ 0, "Error: The iface ingress or egress load value invalid.");
+ case SR_PT_ERR_TTS_TEMPLATE_INVALID:
+ return clib_error_return (
+ 0, "Error: The iface TTS Template value invalid.");
+ default:
+ return clib_error_return (0, "Error: unknown error.");
+ }
+ return 0;
+}
+
+/**
+ * @brief "sr pt del iface" CLI function.
+ *
+ * @see sr_pt_del_iface
+ */
+static clib_error_t *
+sr_pt_del_iface_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 iface = (u32) ~0;
+
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, &iface))
+ ;
+ else
+ break;
+ }
+
+ rv = sr_pt_del_iface (iface);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ case SR_PT_ERR_NOENT:
+ return clib_error_return (0, "Error: No such iface.");
+ case SR_PT_ERR_IFACE_INVALID:
+ return clib_error_return (0, "Error: The iface name is not valid.");
+ default:
+ return clib_error_return (0, "Error: unknown error.");
+ }
+ return 0;
+}
+
+/**
+ * @brief CLI function to show all SR PT interfcaes
+ */
+static clib_error_t *
+sr_pt_show_iface_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ sr_pt_main_t *sr_pt = &sr_pt_main;
+ sr_pt_iface_t **sr_pt_iface_list = 0;
+ sr_pt_iface_t *ls;
+ int i;
+
+ vlib_cli_output (vm, "SR PT Interfaces");
+ vlib_cli_output (vm, "==================================");
+
+ pool_foreach (ls, sr_pt->sr_pt_iface)
+ {
+ vec_add1 (sr_pt_iface_list, ls);
+ };
+
+ for (i = 0; i < vec_len (sr_pt_iface_list); i++)
+ {
+ ls = sr_pt_iface_list[i];
+ vlib_cli_output (
+ vm,
+ "\tiface : \t%U\n\tid : \t%d\n\tingress-load: "
+ "\t%d\n\tegress-load : \t%d\n\ttts-template: \t%d ",
+ format_vnet_sw_if_index_name, vnm, ls->iface, ls->id, ls->ingress_load,
+ ls->egress_load, ls->tts_template);
+ vlib_cli_output (vm, "--------------------------------");
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (sr_pt_add_iface_command, static) = {
+ .path = "sr pt add iface",
+ .short_help = "sr pt add iface <iface-name> id <pt-iface-id> ingress-load "
+ "<ingress-load-value> egress-load <egress-load-value> "
+ "tts-template <tts-template-value>",
+ .function = sr_pt_add_iface_command_fn,
+};
+
+VLIB_CLI_COMMAND (sr_pt_del_iface_command, static) = {
+ .path = "sr pt del iface",
+ .short_help = "sr pt del iface <iface-name>",
+ .function = sr_pt_del_iface_command_fn,
+};
+
+VLIB_CLI_COMMAND (sr_pt_show_iface_command, static) = {
+ .path = "sr pt show iface",
+ .short_help = "sr pt show iface",
+ .function = sr_pt_show_iface_command_fn,
+};
+
+/**
+ * * @brief SR PT initialization
+ * */
+clib_error_t *
+sr_pt_init (vlib_main_t *vm)
+{
+ sr_pt_main_t *pt = &sr_pt_main;
+ mhash_init (&pt->sr_pt_iface_index_hash, sizeof (uword), sizeof (u32));
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (sr_pt_init); \ No newline at end of file
diff --git a/src/vnet/srv6/sr_pt.h b/src/vnet/srv6/sr_pt.h
new file mode 100644
index 00000000000..53001e10ac7
--- /dev/null
+++ b/src/vnet/srv6/sr_pt.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+/**
+ * @file
+ * @brief SR Path Tracing data structures definitions
+ *
+ */
+
+#ifndef included_vnet_sr_pt_h
+#define included_vnet_sr_pt_h
+
+#define IP6_HBH_PT_TYPE 50
+
+/*SR PT error codes*/
+#define SR_PT_ERR_NOENT -1 /* No such entry*/
+#define SR_PT_ERR_EXIST -2 /* Entry exists */
+#define SR_PT_ERR_IFACE_INVALID -3 /* IFACE invalid */
+#define SR_PT_ERR_ID_INVALID -4 /* ID invalid */
+#define SR_PT_ERR_LOAD_INVALID -5 /* LOAD invalid*/
+#define SR_PT_ERR_TTS_TEMPLATE_INVALID -6 /* TTS Template invalid */
+
+/*SR PT paramters max values*/
+#define SR_PT_ID_MAX 4095
+#define SR_PT_LOAD_MAX 15
+#define SR_PT_TTS_TEMPLATE_MAX 3
+
+/*SR PT TTS Templates*/
+#define SR_PT_TTS_TEMPLATE_0 0
+#define SR_PT_TTS_TEMPLATE_1 1
+#define SR_PT_TTS_TEMPLATE_2 2
+#define SR_PT_TTS_TEMPLATE_3 3
+#define SR_PT_TTS_TEMPLATE_DEFAULT 2
+
+/*SR PT TTS Template shift value*/
+#define SR_PT_TTS_SHIFT_TEMPLATE_0 8
+#define SR_PT_TTS_SHIFT_TEMPLATE_1 12
+#define SR_PT_TTS_SHIFT_TEMPLATE_2 16
+#define SR_PT_TTS_SHIFT_TEMPLATE_3 20
+
+/*PT node behaviors*/
+#define PT_BEHAVIOR_SRC 0
+#define PT_BEHAVIOR_MID 1
+#define PT_BEHAVIOR_SNK 2
+
+typedef struct
+{
+ u32 iface; /**< Interface */
+ u16 id; /**< Interface ID */
+ u8 ingress_load; /**< Interface Ingress Load */
+ u8 egress_load; /**< Interface Egress Load */
+ u8 tts_template; /**< Interface TTS Template */
+} sr_pt_iface_t;
+
+typedef struct
+{
+ u16 oif_oil;
+ u8 tts;
+} __clib_packed sr_pt_cmd_t;
+
+typedef struct
+{
+ sr_pt_cmd_t cmd_stack[12];
+} __clib_packed ip6_hop_by_hop_option_pt_t;
+
+/**
+ * @brief SR Path Tracing main datastructure
+ */
+typedef struct
+{
+ /* Pool of sr_pt_iface instances */
+ sr_pt_iface_t *sr_pt_iface;
+
+ /* Hash table for sr_pt_iface parameters */
+ mhash_t sr_pt_iface_index_hash;
+
+ /* convenience */
+ u16 msg_id_base;
+} sr_pt_main_t;
+
+extern sr_pt_main_t sr_pt_main;
+extern vlib_node_registration_t sr_pt_node;
+extern int sr_pt_add_iface (u32 iface, u16 id, u8 ingress_load, u8 egress_load,
+ u8 tts_template);
+extern int sr_pt_del_iface (u32 iface);
+extern void *sr_pt_find_iface (u32 iface);
+
+#endif /* included_vnet_sr_pt_h */ \ No newline at end of file
diff --git a/src/vnet/srv6/sr_pt_api.c b/src/vnet/srv6/sr_pt_api.c
new file mode 100644
index 00000000000..b0b67a210fb
--- /dev/null
+++ b/src/vnet/srv6/sr_pt_api.c
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+#include <vnet/srv6/sr_pt.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+
+#include <vnet/srv6/sr_pt.api_enum.h>
+#include <vnet/srv6/sr_pt.api_types.h>
+
+#define REPLY_MSG_ID_BASE sr_pt_main.msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+static void
+send_sr_pt_iface_details (sr_pt_iface_t *t, vl_api_registration_t *reg,
+ u32 context)
+{
+ vl_api_sr_pt_iface_details_t *rmp;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ clib_memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_SR_PT_IFACE_DETAILS);
+
+ rmp->sw_if_index = ntohl (t->iface);
+ rmp->id = ntohs (t->id);
+ rmp->ingress_load = t->ingress_load;
+ rmp->egress_load = t->egress_load;
+ rmp->tts_template = t->tts_template;
+
+ rmp->context = context;
+
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
+vl_api_sr_pt_iface_dump_t_handler (vl_api_sr_pt_iface_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+ sr_pt_main_t *pt = &sr_pt_main;
+ sr_pt_iface_t *t;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ pool_foreach (t, pt->sr_pt_iface)
+ {
+ send_sr_pt_iface_details (t, reg, mp->context);
+ }
+}
+
+static void
+vl_api_sr_pt_iface_add_t_handler (vl_api_sr_pt_iface_add_t *mp)
+{
+ vl_api_sr_pt_iface_add_reply_t *rmp;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = sr_pt_add_iface (ntohl (mp->sw_if_index), ntohs (mp->id),
+ mp->ingress_load, mp->egress_load, mp->tts_template);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_SR_PT_IFACE_ADD_REPLY);
+}
+
+static void
+vl_api_sr_pt_iface_del_t_handler (vl_api_sr_pt_iface_del_t *mp)
+{
+ vl_api_sr_pt_iface_del_reply_t *rmp;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = sr_pt_del_iface (ntohl (mp->sw_if_index));
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_SR_PT_IFACE_DEL_REPLY);
+}
+
+#include <vnet/srv6/sr_pt.api.c>
+static clib_error_t *
+sr_pt_api_hookup (vlib_main_t *vm)
+{
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ REPLY_MSG_ID_BASE = setup_message_id_table ();
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (sr_pt_api_hookup); \ No newline at end of file
diff --git a/src/vnet/srv6/sr_pt_node.c b/src/vnet/srv6/sr_pt_node.c
new file mode 100644
index 00000000000..fa8b1f69b57
--- /dev/null
+++ b/src/vnet/srv6/sr_pt_node.c
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/srv6/sr.h>
+#include <vnet/srv6/sr_pt.h>
+
+/**
+ * @brief PT node trace
+ */
+typedef struct
+{
+ u32 iface;
+ u16 id;
+ u8 load;
+ timestamp_64_t t64;
+ u8 tts_template;
+ u8 tts;
+ u8 behavior;
+} pt_trace_t;
+
+static u8 *
+format_pt_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ pt_trace_t *t = va_arg (*args, pt_trace_t *);
+ switch (t->behavior)
+ {
+ case PT_BEHAVIOR_MID:
+ s = format (
+ s,
+ "Behavior Midpoint, outgoing interface %U, outgoing interface id %u, "
+ "outgoing interface load %u, t64_sec %u, t64_nsec %u, tts_template "
+ "%u, tts %u",
+ format_vnet_sw_if_index_name, vnet_get_main (), t->iface, t->id,
+ t->load, clib_host_to_net_u32 (t->t64.sec),
+ clib_host_to_net_u32 (t->t64.nsec), t->tts_template, t->tts);
+ break;
+ default:
+ break;
+ }
+ return s;
+}
+
+static_always_inline void
+pt_midpoint_processing (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_buffer_t *b0, ip6_header_t *ip0,
+ sr_pt_iface_t *ls, timestamp_64_t t64)
+{
+ ip6_hop_by_hop_header_t *hbh;
+ ip6_hop_by_hop_option_t *hbh_opt;
+ ip6_hop_by_hop_option_pt_t *hbh_opt_pt;
+
+ if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ {
+ hbh = (void *) (ip0 + 1);
+ hbh_opt = (void *) (hbh + 1);
+ if (hbh_opt->type == IP6_HBH_PT_TYPE)
+ {
+ hbh_opt_pt = (void *) (hbh_opt + 1);
+ clib_memcpy_fast (&hbh_opt_pt->cmd_stack[1],
+ &hbh_opt_pt->cmd_stack[0], 33);
+ hbh_opt_pt->cmd_stack[0].oif_oil =
+ clib_host_to_net_u16 (ls->id << 4);
+ hbh_opt_pt->cmd_stack[0].oif_oil |= ls->egress_load;
+ switch (ls->tts_template)
+ {
+ case SR_PT_TTS_TEMPLATE_0:
+ hbh_opt_pt->cmd_stack[0].tts =
+ t64.nsec >> SR_PT_TTS_SHIFT_TEMPLATE_0;
+ break;
+ case SR_PT_TTS_TEMPLATE_1:
+ hbh_opt_pt->cmd_stack[0].tts =
+ t64.nsec >> SR_PT_TTS_SHIFT_TEMPLATE_1;
+ break;
+ case SR_PT_TTS_TEMPLATE_2:
+ hbh_opt_pt->cmd_stack[0].tts =
+ t64.nsec >> SR_PT_TTS_SHIFT_TEMPLATE_2;
+ break;
+ case SR_PT_TTS_TEMPLATE_3:
+ hbh_opt_pt->cmd_stack[0].tts =
+ t64.nsec >> SR_PT_TTS_SHIFT_TEMPLATE_0;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ return;
+}
+
+VLIB_NODE_FN (sr_pt_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ next_index = node->cached_next_index;
+ u8 pt_behavior = ~(u8) 0;
+ sr_pt_iface_t *ls = 0;
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ // Getting the timestamp (one for each batch of packets)
+ timestamp_64_t t64 = {};
+ unix_time_now_nsec_fraction (&t64.sec, &t64.nsec);
+
+ // Single loop for potentially the last three packets
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ u32 iface;
+ vlib_buffer_t *b0;
+ u32 next0 = 0;
+ ethernet_header_t *en0;
+ ip6_header_t *ip0 = 0;
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ iface = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ ls = sr_pt_find_iface (iface);
+ if (ls)
+ {
+ en0 = vlib_buffer_get_current (b0);
+ ip0 = (void *) (en0 + 1);
+ pt_midpoint_processing (vm, node, b0, ip0, ls, t64);
+ pt_behavior = PT_BEHAVIOR_MID;
+ }
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ pt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->iface = iface;
+ tr->id = ls->id;
+ tr->load = ls->egress_load;
+ tr->tts_template = ls->tts_template;
+ tr->t64.sec = t64.sec;
+ tr->t64.nsec = t64.nsec;
+ tr->tts = t64.nsec >> 20;
+ tr->behavior = pt_behavior;
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (sr_pt_node) = {
+ .name = "pt",
+ .vector_size = sizeof (u32),
+ .format_trace = format_pt_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = 0,
+ .n_next_nodes = 1,
+ .next_nodes = { [0] = "interface-output" },
+};
+
+VNET_FEATURE_INIT (sr_pt_node, static) = {
+ .arc_name = "ip6-output",
+ .node_name = "pt",
+}; \ No newline at end of file
diff --git a/src/vnet/srv6/sr_steering.c b/src/vnet/srv6/sr_steering.c
index cb1d81742eb..94c3d67a27a 100644
--- a/src/vnet/srv6/sr_steering.c
+++ b/src/vnet/srv6/sr_steering.c
@@ -184,9 +184,6 @@ sr_steering_policy (int is_del, ip6_address_t * bsid, u32 sr_policy_index,
else
sr_policy = pool_elt_at_index (sm->sr_policies, sr_policy_index);
- if (!sr_policy)
- return -2;
-
steer_pl->sr_policy = sr_policy - sm->sr_policies;
/* Remove old FIB/hw redirection and create a new one */
@@ -459,7 +456,6 @@ sr_steer_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (sr_steer_policy_command, static) = {
.path = "sr steer",
.short_help = "sr steer (del) [l3 <ip_addr/mask>|l2 <sf_if>] "
@@ -474,7 +470,6 @@ VLIB_CLI_COMMAND (sr_steer_policy_command, static) = {
"\t\tsr steer del l3 2001::/64 via sr_policy index 5\n",
.function = sr_steer_policy_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_sr_steering_policies_command_fn (vlib_main_t * vm,
@@ -491,9 +486,7 @@ show_sr_steering_policies_command_fn (vlib_main_t * vm,
int i;
vlib_cli_output (vm, "SR steering policies:");
- /* *INDENT-OFF* */
pool_foreach (steer_pl, sm->steer_policies) {vec_add1(steer_policies, steer_pl);}
- /* *INDENT-ON* */
vlib_cli_output (vm, "Traffic\t\tSR policy BSID");
for (i = 0; i < vec_len (steer_policies); i++)
{
@@ -526,13 +519,11 @@ show_sr_steering_policies_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_sr_steering_policies_command, static) = {
.path = "show sr steering-policies",
.short_help = "show sr steering-policies",
.function = show_sr_steering_policies_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
sr_steering_init (vlib_main_t * vm)
@@ -550,18 +541,14 @@ sr_steering_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (sr_steering_init);
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (sr_pl_rewrite_encaps_l2, static) =
{
.arc_name = "device-input",
.node_name = "sr-pl-rewrite-encaps-l2",
.runs_before = VNET_FEATURES ("ethernet-input"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/srv6/sr_test.c b/src/vnet/srv6/sr_test.c
index 85f64e1e230..be898599e96 100644
--- a/src/vnet/srv6/sr_test.c
+++ b/src/vnet/srv6/sr_test.c
@@ -80,6 +80,18 @@ api_sr_policy_add (vat_main_t *vam)
}
static int
+api_sr_policy_mod_v2 (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sr_policy_add_v2 (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
api_sr_localsids_dump (vat_main_t *vam)
{
return -1;
@@ -92,6 +104,12 @@ api_sr_policies_dump (vat_main_t *vam)
}
static int
+api_sr_policies_v2_dump (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
api_sr_policies_with_sl_index_dump (vat_main_t *vam)
{
return -1;
@@ -109,6 +127,11 @@ vl_api_sr_policies_details_t_handler (vl_api_sr_policies_details_t *mp)
}
static void
+vl_api_sr_policies_v2_details_t_handler (vl_api_sr_policies_v2_details_t *mp)
+{
+}
+
+static void
vl_api_sr_localsids_details_t_handler (vl_api_sr_localsids_details_t *mp)
{
}
diff --git a/src/vnet/syslog/syslog.c b/src/vnet/syslog/syslog.c
index 8f3313950e8..caa55830eb3 100644
--- a/src/vnet/syslog/syslog.c
+++ b/src/vnet/syslog/syslog.c
@@ -506,7 +506,6 @@ show_syslog_filter_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
/*?
* Set syslog sender configuration.
*
@@ -599,7 +598,6 @@ VLIB_CLI_COMMAND (show_syslog_filter_command, static) = {
.short_help = "show syslog filter",
.function = show_syslog_filter_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
syslog_init (vlib_main_t * vm)
diff --git a/src/vnet/syslog/syslog_api.c b/src/vnet/syslog/syslog_api.c
index 21e79c6e2bd..195a6e52eef 100644
--- a/src/vnet/syslog/syslog_api.c
+++ b/src/vnet/syslog/syslog_api.c
@@ -128,7 +128,6 @@ vl_api_syslog_get_sender_t_handler (vl_api_syslog_get_sender_t * mp)
syslog_main_t *sm = &syslog_main;
u32 vrf_id;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_SYSLOG_GET_SENDER_REPLY,
({
clib_memcpy (&rmp->collector_address, &(sm->collector),
@@ -143,7 +142,6 @@ vl_api_syslog_get_sender_t_handler (vl_api_syslog_get_sender_t * mp)
rmp->vrf_id = vrf_id;
rmp->max_msg_size = htonl (sm->max_msg_size);
}))
- /* *INDENT-ON* */
}
static void
@@ -171,12 +169,10 @@ vl_api_syslog_get_filter_t_handler (vl_api_syslog_get_filter_t * mp)
vl_api_syslog_get_filter_reply_t *rmp;
syslog_main_t *sm = &syslog_main;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_SYSLOG_GET_FILTER_REPLY,
({
rv = syslog_severity_encode (sm->severity_filter, &rmp->severity);
}))
- /* *INDENT-ON* */
}
#include <vnet/syslog/syslog.api.c>
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index d4b1d77f556..efc72a227e8 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -25,6 +25,8 @@
#include <vnet/dpo/load_balance.h>
#include <math.h>
+#include <vlib/stats/stats.h>
+
tcp_main_t tcp_main;
typedef struct
@@ -71,6 +73,10 @@ tcp_add_del_adjacency (tcp_connection_t * tc, u8 is_add)
static void
tcp_cc_init (tcp_connection_t * tc)
{
+ /* As per RFC 6582 initialize "recover" to iss */
+ if (tcp_opts_sack_permitted (&tc->rcv_opts))
+ tc->snd_congestion = tc->iss;
+
tc->cc_algo->init (tc);
}
@@ -108,7 +114,7 @@ tcp_cc_algo_new_type (const tcp_cc_algorithm_t * vft)
}
static u32
-tcp_connection_bind (u32 session_index, transport_endpoint_t * lcl)
+tcp_connection_bind (u32 session_index, transport_endpoint_cfg_t *lcl)
{
tcp_main_t *tm = &tcp_main;
tcp_connection_t *listener;
@@ -143,7 +149,7 @@ tcp_connection_bind (u32 session_index, transport_endpoint_t * lcl)
}
static u32
-tcp_session_bind (u32 session_index, transport_endpoint_t * tep)
+tcp_session_bind (u32 session_index, transport_endpoint_cfg_t *tep)
{
return tcp_connection_bind (session_index, tep);
}
@@ -184,8 +190,7 @@ tcp_session_get_listener (u32 listener_index)
static tcp_connection_t *
tcp_half_open_connection_alloc (void)
{
- ASSERT (vlib_get_thread_index () == 0);
- return tcp_connection_alloc (0);
+ return tcp_connection_alloc (transport_cl_thread ());
}
/**
@@ -195,7 +200,8 @@ tcp_half_open_connection_alloc (void)
static void
tcp_half_open_connection_free (tcp_connection_t * tc)
{
- ASSERT (vlib_get_thread_index () == 0);
+ ASSERT (vlib_get_thread_index () == tc->c_thread_index ||
+ vlib_thread_is_main_w_barrier ());
return tcp_connection_free (tc);
}
@@ -236,8 +242,8 @@ tcp_connection_cleanup (tcp_connection_t * tc)
/* Cleanup local endpoint if this was an active connect */
if (!(tc->cfg_flags & TCP_CFG_F_NO_ENDPOINT))
- transport_endpoint_cleanup (TRANSPORT_PROTO_TCP, &tc->c_lcl_ip,
- tc->c_lcl_port);
+ transport_release_local_endpoint (TRANSPORT_PROTO_TCP, &tc->c_lcl_ip,
+ tc->c_lcl_port);
/* Check if connection is not yet fully established */
if (tc->state == TCP_STATE_SYN_SENT)
@@ -289,7 +295,7 @@ tcp_connection_alloc (u8 thread_index)
tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
tcp_connection_t *tc;
- pool_get (wrk->connections, tc);
+ pool_get_aligned_safe (wrk->connections, tc, CLIB_CACHE_LINE_BYTES);
clib_memset (tc, 0, sizeof (*tc));
tc->c_c_index = tc - wrk->connections;
tc->c_thread_index = thread_index;
@@ -306,12 +312,12 @@ tcp_connection_alloc_w_base (u8 thread_index, tcp_connection_t **base)
if ((*base)->c_thread_index == thread_index)
{
u32 base_index = (*base)->c_c_index;
- pool_get (wrk->connections, tc);
+ pool_get_aligned_safe (wrk->connections, tc, CLIB_CACHE_LINE_BYTES);
*base = tcp_connection_get (base_index, thread_index);
}
else
{
- pool_get (wrk->connections, tc);
+ pool_get_aligned_safe (wrk->connections, tc, CLIB_CACHE_LINE_BYTES);
}
clib_memcpy_fast (tc, *base, sizeof (*tc));
tc->c_c_index = tc - wrk->connections;
@@ -404,8 +410,8 @@ tcp_connection_close (tcp_connection_t * tc)
case TCP_STATE_CLOSE_WAIT:
if (!transport_max_tx_dequeue (&tc->connection))
{
- tcp_send_fin (tc);
tcp_connection_timers_reset (tc);
+ tcp_send_fin (tc);
tcp_connection_set_state (tc, TCP_STATE_LAST_ACK);
tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
tcp_cfg.lastack_time);
@@ -485,6 +491,14 @@ tcp_session_reset (u32 conn_index, u32 thread_index)
{
tcp_connection_t *tc;
tc = tcp_connection_get (conn_index, thread_index);
+
+ /* For half-opens just cleanup */
+ if (tc->state == TCP_STATE_SYN_SENT)
+ {
+ tcp_connection_cleanup (tc);
+ return;
+ }
+
tcp_send_reset (tc);
tcp_connection_timers_reset (tc);
tcp_cong_recovery_off (tc);
@@ -760,15 +774,18 @@ tcp_connection_init_vars (tcp_connection_t * tc)
}
static int
-tcp_alloc_custom_local_endpoint (tcp_main_t * tm, ip46_address_t * lcl_addr,
- u16 * lcl_port, u8 is_ip4)
+tcp_alloc_custom_local_endpoint (ip46_address_t *lcl_addr, u16 *lcl_port,
+ transport_endpoint_cfg_t *rmt)
{
+ tcp_main_t *tm = vnet_get_tcp_main ();
int index, port;
- if (is_ip4)
+
+ if (rmt->is_ip4)
{
index = tm->last_v4_addr_rotor++;
if (tm->last_v4_addr_rotor >= vec_len (tcp_cfg.ip4_src_addrs))
tm->last_v4_addr_rotor = 0;
+ clib_memset (lcl_addr, 0, sizeof (*lcl_addr));
lcl_addr->ip4.as_u32 = tcp_cfg.ip4_src_addrs[index].as_u32;
}
else
@@ -779,7 +796,7 @@ tcp_alloc_custom_local_endpoint (tcp_main_t * tm, ip46_address_t * lcl_addr,
clib_memcpy_fast (&lcl_addr->ip6, &tcp_cfg.ip6_src_addrs[index],
sizeof (ip6_address_t));
}
- port = transport_alloc_local_port (TRANSPORT_PROTO_TCP, lcl_addr);
+ port = transport_alloc_local_port (TRANSPORT_PROTO_TCP, lcl_addr, rmt);
if (port < 1)
return SESSION_E_NOPORT;
*lcl_port = port;
@@ -789,7 +806,6 @@ tcp_alloc_custom_local_endpoint (tcp_main_t * tm, ip46_address_t * lcl_addr,
static int
tcp_session_open (transport_endpoint_cfg_t * rmt)
{
- tcp_main_t *tm = vnet_get_tcp_main ();
tcp_connection_t *tc;
ip46_address_t lcl_addr;
u16 lcl_port;
@@ -800,27 +816,13 @@ tcp_session_open (transport_endpoint_cfg_t * rmt)
*/
if ((rmt->is_ip4 && vec_len (tcp_cfg.ip4_src_addrs))
|| (!rmt->is_ip4 && vec_len (tcp_cfg.ip6_src_addrs)))
- rv = tcp_alloc_custom_local_endpoint (tm, &lcl_addr, &lcl_port,
- rmt->is_ip4);
+ rv = tcp_alloc_custom_local_endpoint (&lcl_addr, &lcl_port, rmt);
else
- rv = transport_alloc_local_endpoint (TRANSPORT_PROTO_TCP,
- rmt, &lcl_addr, &lcl_port);
+ rv = transport_alloc_local_endpoint (TRANSPORT_PROTO_TCP, rmt, &lcl_addr,
+ &lcl_port);
if (rv)
- {
- if (rv != SESSION_E_PORTINUSE)
- return rv;
-
- if (session_lookup_connection (rmt->fib_index, &lcl_addr, &rmt->ip,
- lcl_port, rmt->port, TRANSPORT_PROTO_TCP,
- rmt->is_ip4))
- return SESSION_E_PORTINUSE;
-
- /* 5-tuple is available so increase lcl endpoint refcount and proceed
- * with connection allocation */
- transport_share_local_endpoint (TRANSPORT_PROTO_TCP, &lcl_addr,
- lcl_port);
- }
+ return rv;
/*
* Create connection and send SYN
@@ -829,7 +831,7 @@ tcp_session_open (transport_endpoint_cfg_t * rmt)
ip_copy (&tc->c_rmt_ip, &rmt->ip, rmt->is_ip4);
ip_copy (&tc->c_lcl_ip, &lcl_addr, rmt->is_ip4);
tc->c_rmt_port = rmt->port;
- tc->c_lcl_port = clib_host_to_net_u16 (lcl_port);
+ tc->c_lcl_port = lcl_port;
tc->c_is_ip4 = rmt->is_ip4;
tc->c_proto = TRANSPORT_PROTO_TCP;
tc->c_fib_index = rmt->fib_index;
@@ -1221,7 +1223,6 @@ tcp_timer_waitclose_handler (tcp_connection_t * tc)
}
}
-/* *INDENT-OFF* */
static timer_expiration_handler *timer_expiration_handlers[TCP_N_TIMERS] =
{
tcp_timer_retransmit_handler,
@@ -1229,7 +1230,6 @@ static timer_expiration_handler *timer_expiration_handlers[TCP_N_TIMERS] =
tcp_timer_waitclose_handler,
tcp_timer_retransmit_syn_handler,
};
-/* *INDENT-ON* */
static void
tcp_dispatch_pending_timers (tcp_worker_ctx_t * wrk)
@@ -1337,7 +1337,6 @@ tcp_session_app_rx_evt (transport_connection_t *conn)
return 0;
}
-/* *INDENT-OFF* */
const static transport_proto_vft_t tcp_proto = {
.enable = vnet_tcp_enable_disable,
.start_listen = tcp_session_bind,
@@ -1368,7 +1367,6 @@ const static transport_proto_vft_t tcp_proto = {
.service_type = TRANSPORT_SERVICE_VC,
},
};
-/* *INDENT-ON* */
void
tcp_connection_tx_pacer_update (tcp_connection_t * tc)
@@ -1437,7 +1435,8 @@ tcp_expired_timers_dispatch (u32 * expired_timers)
clib_fifo_add (wrk->pending_timers, expired_timers, n_expired);
- max_loops = clib_max (1, 0.5 * TCP_TIMER_TICK * wrk->vm->loops_per_second);
+ max_loops =
+ clib_max ((u32) 0.5 * TCP_TIMER_TICK * wrk->vm->loops_per_second, 1);
max_per_loop = clib_max ((n_left + n_expired) / max_loops, 10);
max_per_loop = clib_min (max_per_loop, VLIB_FRAME_SIZE);
wrk->max_timers_per_loop = clib_max (n_left ? wrk->max_timers_per_loop : 0,
@@ -1457,6 +1456,51 @@ tcp_initialize_iss_seed (tcp_main_t * tm)
tm->iss_seed.second = random_u64 (&time_now);
}
+static void
+tcp_stats_collector_fn (vlib_stats_collector_data_t *d)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ counter_t **counters = d->entry->data;
+ counter_t *cb = counters[0];
+ tcp_wrk_stats_t acc = {};
+ tcp_worker_ctx_t *wrk;
+
+ vec_foreach (wrk, tm->wrk_ctx)
+ {
+#define _(name, type, str) acc.name += wrk->stats.name;
+ foreach_tcp_wrk_stat
+#undef _
+ }
+
+#define _(name, type, str) cb[TCP_STAT_##name] = acc.name;
+ foreach_tcp_wrk_stat
+#undef _
+}
+
+static void
+tcp_counters_init (tcp_main_t *tm)
+{
+ vlib_stats_collector_reg_t r = {};
+ u32 idx;
+
+ if (tm->counters_init)
+ return;
+
+ r.entry_index = idx = vlib_stats_add_counter_vector ("/sys/tcp");
+ r.collect_fn = tcp_stats_collector_fn;
+ vlib_stats_validate (idx, 0, TCP_STAT_no_buffer);
+
+#define _(name, type, str) \
+ vlib_stats_add_symlink (idx, TCP_STAT_##name, "/sys/tcp/%s", \
+ CLIB_STRING_MACRO (name));
+ foreach_tcp_wrk_stat
+#undef _
+
+ vlib_stats_register_collector_fn (&r);
+
+ tm->counters_init = 1;
+}
+
static clib_error_t *
tcp_main_enable (vlib_main_t * vm)
{
@@ -1533,10 +1577,8 @@ tcp_main_enable (vlib_main_t * vm)
tm->bytes_per_buffer = vlib_buffer_get_default_data_size (vm);
tm->cc_last_type = TCP_CC_LAST;
- tm->ipl_next_node[0] = vlib_node_get_next (vm, session_queue_node.index,
- ip4_lookup_node.index);
- tm->ipl_next_node[1] = vlib_node_get_next (vm, session_queue_node.index,
- ip6_lookup_node.index);
+ tcp_counters_init (tm);
+
return error;
}
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index 3ddd324873b..2362a8bb857 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -66,6 +66,13 @@ typedef struct tcp_wrk_stats_
#undef _
} tcp_wrk_stats_t;
+typedef enum
+{
+#define _(name, type, str) TCP_STAT_##name,
+ foreach_tcp_wrk_stat
+#undef _
+} tcp_wrk_stats_e;
+
typedef struct tcp_free_req_
{
clib_time_type_t free_time;
@@ -215,9 +222,6 @@ typedef struct _tcp_main
/** vlib buffer size */
u32 bytes_per_buffer;
- /** Session layer edge indices to ip lookup (syns, rst) */
- u32 ipl_next_node[2];
-
/** Dispatch table by state and flags */
tcp_lookup_dispatch_t dispatch_table[TCP_N_STATES][64];
@@ -236,6 +240,9 @@ typedef struct _tcp_main
/** Flag that indicates if stack is on or off */
u8 is_enabled;
+ /** Set if counters on stats segment initialized */
+ u8 counters_init;
+
/** Flag that indicates if v4 punting is enabled */
u8 punt_unknown4;
@@ -268,6 +275,10 @@ extern vlib_node_registration_t tcp4_rcv_process_node;
extern vlib_node_registration_t tcp6_rcv_process_node;
extern vlib_node_registration_t tcp4_listen_node;
extern vlib_node_registration_t tcp6_listen_node;
+extern vlib_node_registration_t tcp4_input_nolookup_node;
+extern vlib_node_registration_t tcp6_input_nolookup_node;
+extern vlib_node_registration_t tcp4_drop_node;
+extern vlib_node_registration_t tcp6_drop_node;
#define tcp_cfg tcp_main.cfg
#define tcp_node_index(node_id, is_ip4) \
@@ -313,8 +324,8 @@ u32 tcp_snd_space (tcp_connection_t * tc);
int tcp_fastrecovery_prr_snd_space (tcp_connection_t * tc);
void tcp_reschedule (tcp_connection_t * tc);
fib_node_index_t tcp_lookup_rmt_in_fib (tcp_connection_t * tc);
-u32 tcp_session_push_header (transport_connection_t * tconn,
- vlib_buffer_t * b);
+u32 tcp_session_push_header (transport_connection_t *tconn, vlib_buffer_t **b,
+ u32 n_bufs);
int tcp_session_custom_tx (void *conn, transport_send_params_t * sp);
void tcp_connection_timers_init (tcp_connection_t * tc);
@@ -327,6 +338,7 @@ void tcp_connection_tx_pacer_reset (tcp_connection_t * tc, u32 window,
void tcp_program_cleanup (tcp_worker_ctx_t * wrk, tcp_connection_t * tc);
void tcp_check_gso (tcp_connection_t *tc);
+int tcp_buffer_make_reset (vlib_main_t *vm, vlib_buffer_t *b, u8 is_ip4);
void tcp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
int tcp_configure_v4_source_address_range (vlib_main_t * vm,
ip4_address_t * start,
diff --git a/src/vnet/tcp/tcp_bt.c b/src/vnet/tcp/tcp_bt.c
index 67e9a14ceda..3cb57a550de 100644
--- a/src/vnet/tcp/tcp_bt.c
+++ b/src/vnet/tcp/tcp_bt.c
@@ -638,11 +638,9 @@ tcp_bt_flush_samples (tcp_connection_t * tc)
vec_validate (samples, pool_elts (bt->samples) - 1);
vec_reset_length (samples);
- /* *INDENT-OFF* */
pool_foreach (bts, bt->samples) {
vec_add1 (samples, bts - bt->samples);
}
- /* *INDENT-ON* */
vec_foreach (si, samples)
{
diff --git a/src/vnet/tcp/tcp_cli.c b/src/vnet/tcp/tcp_cli.c
index c7b5c0b441e..b04c0bdc0cf 100644
--- a/src/vnet/tcp/tcp_cli.c
+++ b/src/vnet/tcp/tcp_cli.c
@@ -411,6 +411,8 @@ tcp_configure_v4_source_address_range (vlib_main_t * vm,
return VNET_API_ERROR_NEXT_HOP_NOT_IN_FIB;
sw_if_index = fib_entry_get_resolving_interface (fei);
+ if (sw_if_index == (u32) ~0)
+ return VNET_API_ERROR_NO_MATCHING_INTERFACE;
/* Configure proxy arp across the range */
rv = ip4_neighbor_proxy_add (fib_index, start, end);
@@ -431,7 +433,7 @@ tcp_configure_v4_source_address_range (vlib_main_t * vm,
/* Add local adjacencies for the range */
- receive_dpo_add_or_lock (DPO_PROTO_IP4, ~0 /* sw_if_index */ ,
+ receive_dpo_add_or_lock (DPO_PROTO_IP4, sw_if_index /* sw_if_index */,
NULL, &dpo);
prefix.fp_len = 32;
prefix.fp_proto = FIB_PROTOCOL_IP4;
@@ -506,7 +508,7 @@ tcp_configure_v6_source_address_range (vlib_main_t * vm,
ip6_neighbor_proxy_add (sw_if_index, start);
/* Add a receive adjacency for this address */
- receive_dpo_add_or_lock (DPO_PROTO_IP6, ~0 /* sw_if_index */ ,
+ receive_dpo_add_or_lock (DPO_PROTO_IP6, sw_if_index /* sw_if_index */,
NULL, &dpo);
fib_table_entry_special_dpo_update (fib_index,
@@ -611,14 +613,12 @@ tcp_src_address_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tcp_src_address_command, static) =
{
.path = "tcp src-address",
.short_help = "tcp src-address <ip-addr> [- <ip-addr>] add src address range",
.function = tcp_src_address_fn,
};
-/* *INDENT-ON* */
static u8 *
tcp_scoreboard_dump_trace (u8 * s, sack_scoreboard_t * sb)
@@ -674,14 +674,12 @@ tcp_show_scoreboard_trace_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tcp_show_scoreboard_trace_command, static) =
{
.path = "show tcp scoreboard trace",
.short_help = "show tcp scoreboard trace <connection>",
.function = tcp_show_scoreboard_trace_fn,
};
-/* *INDENT-ON* */
u8 *
tcp_scoreboard_replay (u8 * s, tcp_connection_t * tc, u8 verbose)
@@ -799,14 +797,12 @@ tcp_scoreboard_trace_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tcp_replay_scoreboard_command, static) =
{
.path = "tcp replay scoreboard",
.short_help = "tcp replay scoreboard <connection>",
.function = tcp_scoreboard_trace_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_tcp_punt_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -822,14 +818,12 @@ show_tcp_punt_fn (vlib_main_t * vm, unformat_input_t * input,
tm->punt_unknown6 ? "enabled" : "disabled");
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_tcp_punt_command, static) =
{
.path = "show tcp punt",
.short_help = "show tcp punt",
.function = show_tcp_punt_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -861,14 +855,12 @@ show_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_tcp_stats_command, static) =
{
.path = "show tcp stats",
.short_help = "show tcp stats",
.function = show_tcp_stats_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
clear_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -891,14 +883,12 @@ clear_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_tcp_stats_command, static) =
{
.path = "clear tcp stats",
.short_help = "clear tcp stats",
.function = clear_tcp_stats_fn,
};
-/* *INDENT-ON* */
uword
unformat_tcp_cc_algo (unformat_input_t * input, va_list * va)
diff --git a/src/vnet/tcp/tcp_cubic.c b/src/vnet/tcp/tcp_cubic.c
index cc2ffeae9f0..cf2b9a17d18 100644
--- a/src/vnet/tcp/tcp_cubic.c
+++ b/src/vnet/tcp/tcp_cubic.c
@@ -141,7 +141,7 @@ cubic_cwnd_accumulate (tcp_connection_t * tc, u32 thresh, u32 bytes_acked)
tc->cwnd_acc_bytes = 0;
}
- tcp_cwnd_accumulate (tc, thresh, tc->bytes_acked);
+ tcp_cwnd_accumulate (tc, thresh, bytes_acked);
}
static void
@@ -158,7 +158,7 @@ cubic_rcv_ack (tcp_connection_t * tc, tcp_rate_sample_t * rs)
if (tcp_in_slowstart (tc))
{
- tc->cwnd += tc->bytes_acked;
+ tc->cwnd += rs->delivered;
return;
}
@@ -169,7 +169,7 @@ cubic_rcv_ack (tcp_connection_t * tc, tcp_rate_sample_t * rs)
w_aimd = (u64) W_est (cd, t, rtt_sec) * tc->snd_mss;
if (w_cubic < w_aimd)
{
- cubic_cwnd_accumulate (tc, tc->cwnd, tc->bytes_acked);
+ cubic_cwnd_accumulate (tc, tc->cwnd, rs->delivered);
}
else
{
@@ -195,7 +195,7 @@ cubic_rcv_ack (tcp_connection_t * tc, tcp_rate_sample_t * rs)
/* Practically we can't increment so just inflate threshold */
thresh = 50 * tc->cwnd;
}
- cubic_cwnd_accumulate (tc, thresh, tc->bytes_acked);
+ cubic_cwnd_accumulate (tc, thresh, rs->delivered);
}
}
@@ -232,6 +232,23 @@ cubic_unformat_config (unformat_input_t * input)
return 1;
}
+void
+cubic_event (tcp_connection_t *tc, tcp_cc_event_t evt)
+{
+ cubic_data_t *cd;
+ f64 now;
+
+ if (evt != TCP_CC_EVT_START_TX)
+ return;
+
+ /* App was idle so update t_start to avoid artificially
+ * inflating cwnd if nothing recently sent and acked */
+ cd = (cubic_data_t *) tcp_cc_data (tc);
+ now = cubic_time (tc->c_thread_index);
+ if (now > tc->mrtt_us + 1)
+ cd->t_start = now;
+}
+
const static tcp_cc_algorithm_t tcp_cubic = {
.name = "cubic",
.unformat_cfg = cubic_unformat_config,
@@ -240,6 +257,7 @@ const static tcp_cc_algorithm_t tcp_cubic = {
.recovered = cubic_recovered,
.rcv_ack = cubic_rcv_ack,
.rcv_cong_ack = newreno_rcv_cong_ack,
+ .event = cubic_event,
.init = cubic_conn_init,
};
diff --git a/src/vnet/tcp/tcp_debug.c b/src/vnet/tcp/tcp_debug.c
index e3d7452b591..ab466f30efb 100644
--- a/src/vnet/tcp/tcp_debug.c
+++ b/src/vnet/tcp/tcp_debug.c
@@ -26,7 +26,7 @@ tcp_evt_track_register (elog_track_t * et)
if (fl_len)
{
track_index = tdm->free_track_indices[fl_len - 1];
- _vec_len (tdm->free_track_indices) -= 1;
+ vec_dec_len (tdm->free_track_indices, 1);
et->track_index_plus_one = track_index + 1;
}
else
@@ -134,14 +134,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tcp_debug_command, static) =
{
.path = "tcp debug",
.short_help = "tcp [show] [debug group <N> level <N>]",
.function = tcp_debug_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h
index 1202f7f44d3..04e921cd601 100644
--- a/src/vnet/tcp/tcp_debug.h
+++ b/src/vnet/tcp/tcp_debug.h
@@ -17,13 +17,18 @@
#define SRC_VNET_TCP_TCP_DEBUG_H_
#include <vlib/vlib.h>
+#include <vpp/vnet/config.h>
/**
* Build debugging infra unconditionally. Debug components controlled via
* debug configuration. Comes with some overhead so it's not recommended for
* production/performance scenarios. Takes priority over TCP_DEBUG_ENABLE.
*/
+#ifdef VPP_TCP_DEBUG_ALWAYS
+#define TCP_DEBUG_ALWAYS (1)
+#else
#define TCP_DEBUG_ALWAYS (0)
+#endif
/**
* Build debugging infra only if enabled. Debug components controlled via
* macros that follow.
@@ -867,11 +872,12 @@ if (TCP_DEBUG_CC > 1) \
*/
#if TCP_DEBUG_CS || TCP_DEBUG_ALWAYS
-#define STATS_INTERVAL 1
+#define STATS_INTERVAL 0.001
-#define tcp_cc_time_to_print_stats(_tc) \
- _tc->c_cc_stat_tstamp + STATS_INTERVAL < tcp_time_now() \
- || tcp_in_fastrecovery (_tc) \
+#define tcp_cc_time_to_print_stats(_tc) \
+ _tc->c_cc_stat_tstamp + STATS_INTERVAL < \
+ tcp_time_now_us (_tc->c_thread_index) || \
+ tcp_in_fastrecovery (_tc)
#define TCP_EVT_CC_RTO_STAT_PRINT(_tc) \
{ \
@@ -887,14 +893,14 @@ if (TCP_DEBUG_CC > 1) \
ed->data[3] = _tc->rttvar; \
}
-#define TCP_EVT_CC_RTO_STAT_HANDLER(_tc, ...) \
-{ \
-if (tcp_cc_time_to_print_stats (_tc)) \
-{ \
- TCP_EVT_CC_RTO_STAT_PRINT (_tc); \
- _tc->c_cc_stat_tstamp = tcp_time_now (); \
-} \
-}
+#define TCP_EVT_CC_RTO_STAT_HANDLER(_tc, ...) \
+ { \
+ if (tcp_cc_time_to_print_stats (_tc)) \
+ { \
+ TCP_EVT_CC_RTO_STAT_PRINT (_tc); \
+ _tc->c_cc_stat_tstamp = tcp_time_now_us (_tc->c_thread_index); \
+ } \
+ }
#define TCP_EVT_CC_SND_STAT_PRINT(_tc) \
{ \
@@ -911,14 +917,14 @@ if (tcp_cc_time_to_print_stats (_tc)) \
ed->data[3] = _tc->snd_rxt_bytes; \
}
-#define TCP_EVT_CC_SND_STAT_HANDLER(_tc, ...) \
-{ \
-if (tcp_cc_time_to_print_stats (_tc)) \
-{ \
- TCP_EVT_CC_SND_STAT_PRINT(_tc); \
- _tc->c_cc_stat_tstamp = tcp_time_now (); \
-} \
-}
+#define TCP_EVT_CC_SND_STAT_HANDLER(_tc, ...) \
+ { \
+ if (tcp_cc_time_to_print_stats (_tc)) \
+ { \
+ TCP_EVT_CC_SND_STAT_PRINT (_tc); \
+ _tc->c_cc_stat_tstamp = tcp_time_now_us (_tc->c_thread_index); \
+ } \
+ }
#define TCP_EVT_CC_STAT_PRINT(_tc) \
{ \
@@ -937,14 +943,14 @@ if (tcp_cc_time_to_print_stats (_tc)) \
TCP_EVT_CC_SND_STAT_PRINT (_tc); \
}
-#define TCP_EVT_CC_STAT_HANDLER(_tc, ...) \
-{ \
-if (tcp_cc_time_to_print_stats (_tc)) \
-{ \
- TCP_EVT_CC_STAT_PRINT (_tc); \
- _tc->c_cc_stat_tstamp = tcp_time_now(); \
-} \
-}
+#define TCP_EVT_CC_STAT_HANDLER(_tc, ...) \
+ { \
+ if (tcp_cc_time_to_print_stats (_tc)) \
+ { \
+ TCP_EVT_CC_STAT_PRINT (_tc); \
+ _tc->c_cc_stat_tstamp = tcp_time_now_us (_tc->c_thread_index); \
+ } \
+ }
#else
#define TCP_EVT_CC_STAT_HANDLER(_tc, ...)
#define TCP_EVT_CC_STAT_PRINT(_tc)
diff --git a/src/vnet/tcp/tcp_error.def b/src/vnet/tcp/tcp_error.def
index a6f0ce4b35f..87fdcc02615 100644
--- a/src/vnet/tcp/tcp_error.def
+++ b/src/vnet/tcp/tcp_error.def
@@ -49,3 +49,4 @@ tcp_error (RCV_WND, rcv_wnd, WARN, "Segment not in receive window")
tcp_error (FIN_RCVD, fin_rcvd, INFO, "FINs received")
tcp_error (LINK_LOCAL_RW, link_local_rw, ERROR, "No rewrite for link local connection")
tcp_error (ZERO_RWND, zero_rwnd, WARN, "Zero receive window")
+tcp_error (CONN_ACCEPTED, conn_accepted, INFO, "Connections accepted") \ No newline at end of file
diff --git a/src/vnet/tcp/tcp_format.c b/src/vnet/tcp/tcp_format.c
index a3245f2046a..4674f2cbaed 100644
--- a/src/vnet/tcp/tcp_format.c
+++ b/src/vnet/tcp/tcp_format.c
@@ -52,12 +52,68 @@ format_tcp_flags (u8 * s, va_list * args)
return s;
}
+u8 *
+format_tcp_options (u8 *s, va_list *args)
+{
+ tcp_options_t *opts = va_arg (*args, tcp_options_t *);
+ u32 indent, n_opts = 0;
+ int i;
+
+ if (!opts->flags)
+ return s;
+
+ indent = format_get_indent (s);
+ indent += 2;
+
+ s = format (s, "options:\n%U", format_white_space, indent);
+
+ if (tcp_opts_mss (opts))
+ {
+ s = format (s, "mss %d", opts->mss);
+ n_opts++;
+ }
+ if (tcp_opts_wscale (opts))
+ {
+ s = format (s, "%swindow scale %d", n_opts > 0 ? ", " : "",
+ format_white_space, indent, opts->wscale);
+ n_opts++;
+ }
+ if (tcp_opts_tstamp (opts))
+ {
+ s = format (s, "%stimestamp %d, echo/reflected timestamp",
+ n_opts > 0 ? ", " : "", format_white_space, indent,
+ opts->tsval, opts->tsecr);
+ n_opts++;
+ }
+ if (tcp_opts_sack_permitted (opts))
+ {
+ s = format (s, "%ssack permitted", n_opts > 0 ? ", " : "",
+ format_white_space, indent);
+ n_opts++;
+ }
+ if (tcp_opts_sack (opts))
+ {
+ s = format (s, "%ssacks:", n_opts > 0 ? ", " : "", format_white_space,
+ indent);
+ for (i = 0; i < opts->n_sack_blocks; ++i)
+ {
+ s = format (s, "\n%Ublock %d: start %d, end %d", format_white_space,
+ indent + 2, i + 1, opts->sacks[i].start,
+ opts->sacks[i].end);
+ }
+ n_opts++;
+ }
+
+ return s;
+}
+
/* Format TCP header. */
u8 *
format_tcp_header (u8 * s, va_list * args)
{
tcp_header_t *tcp = va_arg (*args, tcp_header_t *);
u32 max_header_bytes = va_arg (*args, u32);
+ tcp_options_t opts = { .flags = 0 };
u32 header_bytes;
u32 indent;
@@ -83,32 +139,13 @@ format_tcp_header (u8 * s, va_list * args)
clib_net_to_host_u16 (tcp->window),
clib_net_to_host_u16 (tcp->checksum));
-
-#if 0
- /* Format TCP options. */
- {
- u8 *o;
- u8 *option_start = (void *) (tcp + 1);
- u8 *option_end = (void *) tcp + header_bytes;
-
- for (o = option_start; o < option_end;)
- {
- u32 length = o[1];
- switch (o[0])
- {
- case TCP_OPTION_END:
- length = 1;
- o = option_end;
- break;
-
- case TCP_OPTION_NOOP:
- length = 1;
- break;
-
- }
- }
- }
-#endif
+ if (header_bytes > max_header_bytes)
+ s = format (s, "\n%Uoptions: truncated", format_white_space, indent);
+ else if (tcp_options_parse (tcp, &opts, tcp_is_syn (tcp)) < 0)
+ s = format (s, "\n%Uoptions: parsing failed", format_white_space, indent);
+ else
+ s = format (s, "\n%U%U", format_white_space, indent, format_tcp_options,
+ &opts);
/* Recurse into next protocol layer. */
if (max_header_bytes != 0 && header_bytes < max_header_bytes)
diff --git a/src/vnet/tcp/tcp_inlines.h b/src/vnet/tcp/tcp_inlines.h
index e82f308d9b8..ccd0e3fe3ee 100644
--- a/src/vnet/tcp/tcp_inlines.h
+++ b/src/vnet/tcp/tcp_inlines.h
@@ -18,6 +18,35 @@
#include <vnet/tcp/tcp.h>
+always_inline void
+tcp_node_inc_counter_i (vlib_main_t *vm, u32 tcp4_node, u32 tcp6_node,
+ u8 is_ip4, u32 evt, u32 val)
+{
+ if (is_ip4)
+ vlib_node_increment_counter (vm, tcp4_node, evt, val);
+ else
+ vlib_node_increment_counter (vm, tcp6_node, evt, val);
+}
+
+#define tcp_inc_counter(node_id, err, count) \
+ tcp_node_inc_counter_i (vm, tcp4_##node_id##_node.index, \
+ tcp6_##node_id##_node.index, is_ip4, err, count)
+#define tcp_maybe_inc_err_counter(cnts, err) \
+ { \
+ cnts[err] += (next0 != tcp_next_drop (is_ip4)); \
+ }
+#define tcp_inc_err_counter(cnts, err, val) \
+ { \
+ cnts[err] += val; \
+ }
+#define tcp_store_err_counters(node_id, cnts) \
+ { \
+ int i; \
+ for (i = 0; i < TCP_N_ERROR; i++) \
+ if (cnts[i]) \
+ tcp_inc_counter (node_id, i, cnts[i]); \
+ }
+
always_inline tcp_header_t *
tcp_buffer_hdr (vlib_buffer_t * b)
{
@@ -66,7 +95,7 @@ tcp_listener_get (u32 tli)
always_inline tcp_connection_t *
tcp_half_open_connection_get (u32 conn_index)
{
- return tcp_connection_get (conn_index, 0);
+ return tcp_connection_get (conn_index, transport_cl_thread ());
}
/**
@@ -237,13 +266,6 @@ tcp_input_lookup_buffer (vlib_buffer_t * b, u8 thread_index, u32 * error,
tcp_header_t *tcp;
u8 result = 0;
- /* Set the sw_if_index[VLIB_RX] to the interface we received
- * the connection on (the local interface) */
- vnet_buffer (b)->sw_if_index[VLIB_RX] =
- vnet_buffer (b)->ip.rx_sw_if_index != ~0 ?
- vnet_buffer (b)->ip.rx_sw_if_index :
- vnet_buffer (b)->sw_if_index[VLIB_RX];
-
if (is_ip4)
{
ip4_header_t *ip4 = vlib_buffer_get_current (b);
@@ -300,7 +322,7 @@ tcp_input_lookup_buffer (vlib_buffer_t * b, u8 thread_index, u32 * error,
{
ip6_main_t *im = &ip6_main;
fib_index = vec_elt (im->fib_index_by_sw_if_index,
- vnet_buffer (b)->sw_if_index[VLIB_RX]);
+ vnet_buffer (b)->ip.rx_sw_if_index);
}
tc = session_lookup_connection_wt6 (fib_index, &ip6->dst_address,
@@ -311,6 +333,10 @@ tcp_input_lookup_buffer (vlib_buffer_t * b, u8 thread_index, u32 * error,
}
}
+ /* Set the sw_if_index[VLIB_RX] to the interface we received
+ * the connection on (the local interface) */
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->ip.rx_sw_if_index;
+
if (is_nolookup)
tc =
(transport_connection_t *) tcp_connection_get (vnet_buffer (b)->
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index df31c9e775f..70b5d28e0cc 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -27,59 +27,17 @@ static vlib_error_desc_t tcp_input_error_counters[] = {
#undef tcp_error
};
-/* All TCP nodes have the same outgoing arcs */
-#define foreach_tcp_state_next \
- _ (DROP4, "ip4-drop") \
- _ (DROP6, "ip6-drop") \
- _ (TCP4_OUTPUT, "tcp4-output") \
- _ (TCP6_OUTPUT, "tcp6-output")
-
-typedef enum _tcp_established_next
-{
-#define _(s,n) TCP_ESTABLISHED_NEXT_##s,
- foreach_tcp_state_next
-#undef _
- TCP_ESTABLISHED_N_NEXT,
-} tcp_established_next_t;
-
-typedef enum _tcp_rcv_process_next
-{
-#define _(s,n) TCP_RCV_PROCESS_NEXT_##s,
- foreach_tcp_state_next
-#undef _
- TCP_RCV_PROCESS_N_NEXT,
-} tcp_rcv_process_next_t;
-
-typedef enum _tcp_syn_sent_next
-{
-#define _(s,n) TCP_SYN_SENT_NEXT_##s,
- foreach_tcp_state_next
-#undef _
- TCP_SYN_SENT_N_NEXT,
-} tcp_syn_sent_next_t;
-
-typedef enum _tcp_listen_next
-{
-#define _(s,n) TCP_LISTEN_NEXT_##s,
- foreach_tcp_state_next
-#undef _
- TCP_LISTEN_N_NEXT,
-} tcp_listen_next_t;
-
-/* Generic, state independent indices */
-typedef enum _tcp_state_next
+typedef enum _tcp_input_next
{
-#define _(s,n) TCP_NEXT_##s,
- foreach_tcp_state_next
-#undef _
- TCP_STATE_N_NEXT,
-} tcp_state_next_t;
-
-#define tcp_next_output(is_ip4) (is_ip4 ? TCP_NEXT_TCP4_OUTPUT \
- : TCP_NEXT_TCP6_OUTPUT)
-
-#define tcp_next_drop(is_ip4) (is_ip4 ? TCP_NEXT_DROP4 \
- : TCP_NEXT_DROP6)
+ TCP_INPUT_NEXT_DROP,
+ TCP_INPUT_NEXT_LISTEN,
+ TCP_INPUT_NEXT_RCV_PROCESS,
+ TCP_INPUT_NEXT_SYN_SENT,
+ TCP_INPUT_NEXT_ESTABLISHED,
+ TCP_INPUT_NEXT_RESET,
+ TCP_INPUT_NEXT_PUNT,
+ TCP_INPUT_N_NEXT
+} tcp_input_next_t;
/**
* Validate segment sequence number. As per RFC793:
@@ -404,17 +362,10 @@ tcp_rcv_ack_no_cc (tcp_connection_t * tc, vlib_buffer_t * b, u32 * error)
if (!(seq_leq (tc->snd_una, vnet_buffer (b)->tcp.ack_number)
&& seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)))
{
- if (seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)
- && seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_una))
- {
- tc->snd_nxt = vnet_buffer (b)->tcp.ack_number;
- goto acceptable;
- }
*error = TCP_ERROR_ACK_INVALID;
return -1;
}
-acceptable:
tc->bytes_acked = vnet_buffer (b)->tcp.ack_number - tc->snd_una;
tc->snd_una = vnet_buffer (b)->tcp.ack_number;
*error = TCP_ERROR_ACK_OK;
@@ -594,7 +545,7 @@ tcp_handle_postponed_dequeues (tcp_worker_ctx_t * wrk)
tc->burst_acked = 0;
}
- _vec_len (wrk->pending_deq_acked) = 0;
+ vec_set_len (wrk->pending_deq_acked, 0);
}
static void
@@ -629,11 +580,15 @@ tcp_update_snd_wnd (tcp_connection_t * tc, u32 seq, u32 ack, u32 snd_wnd)
if (PREDICT_FALSE (tc->snd_wnd < tc->snd_mss))
{
- /* Set persist timer if not set and we just got 0 wnd */
- if (!tcp_timer_is_active (tc, TCP_TIMER_PERSIST)
- && !tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT))
+ if (!tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT))
{
tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
+
+ /* Set persist timer if we just got 0 wnd. If already set,
+ * update it because some data sent with snd_wnd < snd_mss was
+ * acked. */
+ if (tcp_timer_is_active (tc, TCP_TIMER_PERSIST))
+ tcp_persist_timer_reset (&wrk->timer_wheel, tc);
tcp_persist_timer_set (&wrk->timer_wheel, tc);
}
}
@@ -742,7 +697,7 @@ tcp_should_fastrecover (tcp_connection_t * tc, u8 has_sack)
}
static int
-tcp_cc_recover (tcp_connection_t * tc)
+tcp_cc_try_recover (tcp_connection_t *tc)
{
sack_scoreboard_hole_t *hole;
u8 is_spurious = 0;
@@ -757,14 +712,14 @@ tcp_cc_recover (tcp_connection_t * tc)
tcp_connection_tx_pacer_reset (tc, tc->cwnd, 0 /* start bucket */ );
tc->rcv_dupacks = 0;
+ tcp_recovery_off (tc);
/* Previous recovery left us congested. Continue sending as part
* of the current recovery event with an updated snd_congestion */
- if (tc->sack_sb.sacked_bytes)
+ if (tc->sack_sb.sacked_bytes && tcp_in_fastrecovery (tc))
{
tc->snd_congestion = tc->snd_nxt;
- tcp_program_retransmit (tc);
- return is_spurious;
+ return -1;
}
tc->rxt_delivered = 0;
@@ -778,19 +733,18 @@ tcp_cc_recover (tcp_connection_t * tc)
if (hole && hole->start == tc->snd_una && hole->end == tc->snd_nxt)
scoreboard_clear (&tc->sack_sb);
- if (!tcp_in_recovery (tc) && !is_spurious)
+ if (tcp_in_fastrecovery (tc) && !is_spurious)
tcp_cc_recovered (tc);
tcp_fastrecovery_off (tc);
tcp_fastrecovery_first_off (tc);
- tcp_recovery_off (tc);
TCP_EVT (TCP_EVT_CC_EVT, tc, 3);
ASSERT (tc->rto_boff == 0);
ASSERT (!tcp_in_cong_recovery (tc));
ASSERT (tcp_scoreboard_is_sane_post_recovery (tc));
- return is_spurious;
+ return 0;
}
static void
@@ -803,15 +757,6 @@ tcp_cc_update (tcp_connection_t * tc, tcp_rate_sample_t * rs)
/* If a cumulative ack, make sure dupacks is 0 */
tc->rcv_dupacks = 0;
-
- /* When dupacks hits the threshold we only enter fast retransmit if
- * cumulative ack covers more than snd_congestion. Should snd_una
- * wrap this test may fail under otherwise valid circumstances.
- * Therefore, proactively update snd_congestion when wrap detected. */
- if (PREDICT_FALSE
- (seq_leq (tc->snd_congestion, tc->snd_una - tc->bytes_acked)
- && seq_gt (tc->snd_congestion, tc->snd_una)))
- tc->snd_congestion = tc->snd_una - 1;
}
/**
@@ -857,6 +802,20 @@ tcp_cc_handle_event (tcp_connection_t * tc, tcp_rate_sample_t * rs,
*/
/*
+ * See if we can exit and stop retransmitting
+ */
+ if (seq_geq (tc->snd_una, tc->snd_congestion))
+ {
+ /* If successfully recovered, treat ack as congestion avoidance ack
+ * and return. Otherwise, we're still congested so process feedback */
+ if (!tcp_cc_try_recover (tc))
+ {
+ tcp_cc_rcv_ack (tc, rs);
+ return;
+ }
+ }
+
+ /*
* Process (re)transmit feedback. Output path uses this to decide how much
* more data to release into the network
*/
@@ -866,8 +825,7 @@ tcp_cc_handle_event (tcp_connection_t * tc, tcp_rate_sample_t * rs,
tcp_fastrecovery_first_on (tc);
tc->rxt_delivered += tc->sack_sb.rxt_sacked;
- tc->prr_delivered += tc->bytes_acked + tc->sack_sb.last_sacked_bytes
- - tc->sack_sb.last_bytes_delivered;
+ tc->prr_delivered += rs->delivered;
}
else
{
@@ -891,23 +849,6 @@ tcp_cc_handle_event (tcp_connection_t * tc, tcp_rate_sample_t * rs,
tcp_fastrecovery_first_on (tc);
}
- /*
- * See if we can exit and stop retransmitting
- */
- if (seq_geq (tc->snd_una, tc->snd_congestion))
- {
- /* If spurious return, we've already updated everything */
- if (tcp_cc_recover (tc))
- {
- tc->tsecr_last_ack = tc->rcv_opts.tsecr;
- return;
- }
-
- /* Treat as congestion avoidance ack */
- tcp_cc_rcv_ack (tc, rs);
- return;
- }
-
tcp_program_retransmit (tc);
/*
@@ -991,15 +932,6 @@ tcp_rcv_ack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, vlib_buffer_t * b,
/* If the ACK acks something not yet sent (SEG.ACK > SND.NXT) */
if (PREDICT_FALSE (seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)))
{
- /* We've probably entered recovery and the peer still has some
- * of the data we've sent. Update snd_nxt and accept the ack */
- if (seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)
- && seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_una))
- {
- tc->snd_nxt = vnet_buffer (b)->tcp.ack_number;
- goto process_ack;
- }
-
tc->errors.above_ack_wnd += 1;
*error = TCP_ERROR_ACK_FUTURE;
TCP_EVT (TCP_EVT_ACK_RCV_ERR, tc, 0, vnet_buffer (b)->tcp.ack_number);
@@ -1022,8 +954,6 @@ tcp_rcv_ack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, vlib_buffer_t * b,
return 0;
}
-process_ack:
-
/*
* Looks okay, process feedback
*/
@@ -1042,6 +972,9 @@ process_ack:
if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
tcp_bt_sample_delivery_rate (tc, &rs);
+ else
+ rs.delivered = tc->bytes_acked + tc->sack_sb.last_sacked_bytes -
+ tc->sack_sb.last_bytes_delivered;
if (tc->bytes_acked + tc->sack_sb.last_sacked_bytes)
{
@@ -1106,7 +1039,7 @@ tcp_handle_disconnects (tcp_worker_ctx_t * wrk)
tcp_disconnect_pending_off (tc);
session_transport_closing_notify (&tc->connection);
}
- _vec_len (wrk->pending_disconnects) = 0;
+ vec_set_len (wrk->pending_disconnects, 0);
}
if (vec_len (wrk->pending_resets))
@@ -1119,7 +1052,7 @@ tcp_handle_disconnects (tcp_worker_ctx_t * wrk)
tcp_disconnect_pending_off (tc);
tcp_handle_rst (tc);
}
- _vec_len (wrk->pending_resets) = 0;
+ vec_set_len (wrk->pending_resets, 0);
}
}
@@ -1156,7 +1089,6 @@ tcp_session_enqueue_data (tcp_connection_t * tc, vlib_buffer_t * b,
ASSERT (data_len);
written = session_enqueue_stream_connection (&tc->connection, b, 0,
1 /* queue event */ , 1);
- tc->bytes_in += written;
TCP_EVT (TCP_EVT_INPUT, tc, 0, data_len, written);
@@ -1164,17 +1096,20 @@ tcp_session_enqueue_data (tcp_connection_t * tc, vlib_buffer_t * b,
if (PREDICT_TRUE (written == data_len))
{
tc->rcv_nxt += written;
+ tc->bytes_in += written;
}
/* If more data written than expected, account for out-of-order bytes. */
else if (written > data_len)
{
tc->rcv_nxt += written;
+ tc->bytes_in += data_len;
TCP_EVT (TCP_EVT_CC_INPUT, tc, data_len, written);
}
else if (written > 0)
{
/* We've written something but FIFO is probably full now */
tc->rcv_nxt += written;
+ tc->bytes_in += written;
error = TCP_ERROR_PARTIALLY_ENQUEUED;
}
else
@@ -1361,9 +1296,13 @@ format_tcp_rx_trace (u8 * s, va_list * args)
tcp_connection_t *tc = &t->tcp_connection;
u32 indent = format_get_indent (s);
- s = format (s, "%U state %U\n%U%U", format_tcp_connection_id, tc,
- format_tcp_state, tc->state, format_white_space, indent,
- format_tcp_header, &t->tcp_header, 128);
+ if (!tc->c_lcl_port)
+ s = format (s, "no tcp connection\n%U%U", format_white_space, indent,
+ format_tcp_header, &t->tcp_header, 128);
+ else
+ s = format (s, "%U state %U\n%U%U", format_tcp_connection_id, tc,
+ format_tcp_state, tc->state, format_white_space, indent,
+ format_tcp_header, &t->tcp_header, 128);
return s;
}
@@ -1433,53 +1372,14 @@ tcp_established_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
}
}
-always_inline void
-tcp_node_inc_counter_i (vlib_main_t * vm, u32 tcp4_node, u32 tcp6_node,
- u8 is_ip4, u32 evt, u32 val)
-{
- if (is_ip4)
- vlib_node_increment_counter (vm, tcp4_node, evt, val);
- else
- vlib_node_increment_counter (vm, tcp6_node, evt, val);
-}
-
-#define tcp_maybe_inc_counter(node_id, err, count) \
-{ \
- if (next0 != tcp_next_drop (is_ip4)) \
- tcp_node_inc_counter_i (vm, tcp4_##node_id##_node.index, \
- tcp6_##node_id##_node.index, is_ip4, err, \
- 1); \
-}
-#define tcp_inc_counter(node_id, err, count) \
- tcp_node_inc_counter_i (vm, tcp4_##node_id##_node.index, \
- tcp6_##node_id##_node.index, is_ip4, \
- err, count)
-#define tcp_maybe_inc_err_counter(cnts, err) \
-{ \
- cnts[err] += (next0 != tcp_next_drop (is_ip4)); \
-}
-#define tcp_inc_err_counter(cnts, err, val) \
-{ \
- cnts[err] += val; \
-}
-#define tcp_store_err_counters(node_id, cnts) \
-{ \
- int i; \
- for (i = 0; i < TCP_N_ERROR; i++) \
- if (cnts[i]) \
- tcp_inc_counter(node_id, i, cnts[i]); \
-}
-
-
always_inline uword
tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame, int is_ip4)
{
- u32 thread_index = vm->thread_index, errors = 0;
+ u32 thread_index = vm->thread_index, n_left_from, *from;
tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u16 err_counters[TCP_N_ERROR] = { 0 };
- u32 n_left_from, *from;
if (node->flags & VLIB_NODE_FLAG_TRACE)
tcp_established_trace_frame (vm, node, frame, is_ip4);
@@ -1543,9 +1443,7 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
b += 1;
}
- errors = session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP,
- thread_index);
- err_counters[TCP_ERROR_MSG_QUEUE_FULL] = errors;
+ session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP, thread_index);
tcp_store_err_counters (established, err_counters);
tcp_handle_postponed_dequeues (wrk);
tcp_handle_disconnects (wrk);
@@ -1568,43 +1466,23 @@ VLIB_NODE_FN (tcp6_established_node) (vlib_main_t * vm,
return tcp46_established_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (tcp4_established_node) =
-{
+VLIB_REGISTER_NODE (tcp4_established_node) = {
.name = "tcp4-established",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
.error_counters = tcp_input_error_counters,
- .n_next_nodes = TCP_ESTABLISHED_N_NEXT,
- .next_nodes =
- {
-#define _(s,n) [TCP_ESTABLISHED_NEXT_##s] = n,
- foreach_tcp_state_next
-#undef _
- },
.format_trace = format_tcp_rx_trace_short,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (tcp6_established_node) =
-{
+VLIB_REGISTER_NODE (tcp6_established_node) = {
.name = "tcp6-established",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
.error_counters = tcp_input_error_counters,
- .n_next_nodes = TCP_ESTABLISHED_N_NEXT,
- .next_nodes =
- {
-#define _(s,n) [TCP_ESTABLISHED_NEXT_##s] = n,
- foreach_tcp_state_next
-#undef _
- },
.format_trace = format_tcp_rx_trace_short,
};
-/* *INDENT-ON* */
static u8
@@ -1796,15 +1674,54 @@ tcp_check_tx_offload (tcp_connection_t * tc, int is_ipv4)
return;
hw_if = vnet_get_sup_hw_interface (vnm, sw_if_idx);
- if (hw_if->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO)
+ if (hw_if->caps & VNET_HW_IF_CAP_TCP_GSO)
tc->cfg_flags |= TCP_CFG_F_TSO;
}
+static void
+tcp_input_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_buffer_t **bs, u16 *nexts, u32 n_bufs, u8 is_ip4)
+{
+ tcp_connection_t *tc;
+ tcp_header_t *tcp;
+ tcp_rx_trace_t *t;
+ u8 flags;
+ int i;
+
+ for (i = 0; i < n_bufs; i++)
+ {
+ if (!(bs[i]->flags & VLIB_BUFFER_IS_TRACED))
+ continue;
+
+ t = vlib_add_trace (vm, node, bs[i], sizeof (*t));
+ if (nexts[i] == TCP_INPUT_NEXT_DROP || nexts[i] == TCP_INPUT_NEXT_PUNT ||
+ nexts[i] == TCP_INPUT_NEXT_RESET)
+ {
+ tc = 0;
+ }
+ else
+ {
+ flags = vnet_buffer (bs[i])->tcp.flags;
+
+ if (flags == TCP_STATE_LISTEN)
+ tc = tcp_listener_get (vnet_buffer (bs[i])->tcp.connection_index);
+ else if (flags == TCP_STATE_SYN_SENT)
+ tc = tcp_half_open_connection_get (
+ vnet_buffer (bs[i])->tcp.connection_index);
+ else
+ tc = tcp_connection_get (vnet_buffer (bs[i])->tcp.connection_index,
+ vm->thread_index);
+ }
+ tcp = tcp_buffer_hdr (bs[i]);
+ tcp_set_rx_trace_data (t, tc, tcp, bs[i], is_ip4);
+ }
+}
+
always_inline uword
tcp46_syn_sent_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_frame_t *frame, int is_ip4)
{
- u32 n_left_from, *from, thread_index = vm->thread_index, errors = 0;
+ u32 n_left_from, *from, thread_index = vm->thread_index;
tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
@@ -1970,7 +1887,9 @@ tcp46_syn_sent_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
SESSION_E_NONE))
{
tcp_send_reset_w_pkt (new_tc, b[0], thread_index, is_ip4);
- tcp_connection_cleanup (new_tc);
+ tcp_program_cleanup (wrk, new_tc);
+ new_tc->state = TCP_STATE_CLOSED;
+ new_tc->c_s_index = ~0;
error = TCP_ERROR_CREATE_SESSION_FAIL;
goto cleanup_ho;
}
@@ -1991,8 +1910,10 @@ tcp46_syn_sent_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (session_stream_connect_notify (&new_tc->connection,
SESSION_E_NONE))
{
- tcp_connection_cleanup (new_tc);
tcp_send_reset_w_pkt (tc, b[0], thread_index, is_ip4);
+ tcp_program_cleanup (wrk, new_tc);
+ new_tc->state = TCP_STATE_CLOSED;
+ new_tc->c_s_index = ~0;
TCP_EVT (TCP_EVT_RST_SENT, tc);
error = TCP_ERROR_CREATE_SESSION_FAIL;
goto cleanup_ho;
@@ -2039,9 +1960,7 @@ tcp46_syn_sent_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
tcp_inc_counter (syn_sent, error, 1);
}
- errors =
- session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP, thread_index);
- tcp_inc_counter (syn_sent, TCP_ERROR_MSG_QUEUE_FULL, errors);
+ session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP, thread_index);
vlib_buffer_free (vm, from, frame->n_vectors);
tcp_handle_disconnects (wrk);
@@ -2062,7 +1981,6 @@ VLIB_NODE_FN (tcp6_syn_sent_node) (vlib_main_t * vm,
return tcp46_syn_sent_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp4_syn_sent_node) =
{
.name = "tcp4-syn-sent",
@@ -2070,18 +1988,9 @@ VLIB_REGISTER_NODE (tcp4_syn_sent_node) =
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
.error_counters = tcp_input_error_counters,
- .n_next_nodes = TCP_SYN_SENT_N_NEXT,
- .next_nodes =
- {
-#define _(s,n) [TCP_SYN_SENT_NEXT_##s] = n,
- foreach_tcp_state_next
-#undef _
- },
.format_trace = format_tcp_rx_trace_short,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp6_syn_sent_node) =
{
.name = "tcp6-syn-sent",
@@ -2089,16 +1998,8 @@ VLIB_REGISTER_NODE (tcp6_syn_sent_node) =
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
.error_counters = tcp_input_error_counters,
- .n_next_nodes = TCP_SYN_SENT_N_NEXT,
- .next_nodes =
- {
-#define _(s,n) [TCP_SYN_SENT_NEXT_##s] = n,
- foreach_tcp_state_next
-#undef _
- },
.format_trace = format_tcp_rx_trace_short,
};
-/* *INDENT-ON* */
static void
tcp46_rcv_process_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
@@ -2130,7 +2031,7 @@ always_inline uword
tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_frame_t *frame, int is_ip4)
{
- u32 thread_index = vm->thread_index, errors, n_left_from, *from, max_deq;
+ u32 thread_index = vm->thread_index, n_left_from, *from, max_deq;
tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
@@ -2198,15 +2099,6 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
switch (tc->state)
{
case TCP_STATE_SYN_RCVD:
-
- /* Make sure the segment is exactly right */
- if (tc->rcv_nxt != vnet_buffer (b[0])->tcp.seq_number || is_fin)
- {
- tcp_send_reset_w_pkt (tc, b[0], thread_index, is_ip4);
- error = TCP_ERROR_SEGMENT_INVALID;
- goto drop;
- }
-
/*
* If the segment acknowledgment is not acceptable, form a
* reset segment,
@@ -2220,6 +2112,10 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
goto drop;
}
+ /* Avoid notifying app if connection is about to be closed */
+ if (PREDICT_FALSE (is_fin))
+ break;
+
/* Update rtt and rto */
tcp_estimate_initial_rtt (tc);
tcp_connection_tx_pacer_update (tc);
@@ -2248,7 +2144,7 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
tcp_connection_cleanup (tc);
goto drop;
}
- error = TCP_ERROR_ACK_OK;
+ error = TCP_ERROR_CONN_ACCEPTED;
break;
case TCP_STATE_ESTABLISHED:
/* We can get packets in established state here because they
@@ -2327,8 +2223,8 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (max_deq > tc->burst_acked)
break;
- tcp_send_fin (tc);
tcp_connection_timers_reset (tc);
+ tcp_send_fin (tc);
tcp_connection_set_state (tc, TCP_STATE_LAST_ACK);
tcp_timer_set (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
tcp_cfg.lastack_time);
@@ -2440,15 +2336,15 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
tcp_cfg.closewait_time);
break;
case TCP_STATE_SYN_RCVD:
- /* Send FIN-ACK, enter LAST-ACK and because the app was not
- * notified yet, set a cleanup timer instead of relying on
- * disconnect notify and the implicit close call. */
+ /* Send FIN-ACK and enter TIME-WAIT, as opposed to LAST-ACK,
+ * because the app was not notified yet and we want to avoid
+ * session state transitions to ensure cleanup does not
+ * propagate to app. */
tcp_connection_timers_reset (tc);
tc->rcv_nxt += 1;
tcp_send_fin (tc);
- tcp_connection_set_state (tc, TCP_STATE_LAST_ACK);
- tcp_timer_set (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
- tcp_cfg.lastack_time);
+ tcp_connection_set_state (tc, TCP_STATE_TIME_WAIT);
+ tcp_program_cleanup (wrk, tc);
break;
case TCP_STATE_CLOSE_WAIT:
case TCP_STATE_CLOSING:
@@ -2503,9 +2399,7 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
tcp_inc_counter (rcv_process, error, 1);
}
- errors = session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP,
- thread_index);
- tcp_inc_counter (rcv_process, TCP_ERROR_MSG_QUEUE_FULL, errors);
+ session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP, thread_index);
tcp_handle_postponed_dequeues (wrk);
tcp_handle_disconnects (wrk);
vlib_buffer_free (vm, from, frame->n_vectors);
@@ -2527,43 +2421,23 @@ VLIB_NODE_FN (tcp6_rcv_process_node) (vlib_main_t * vm,
return tcp46_rcv_process_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (tcp4_rcv_process_node) =
-{
+VLIB_REGISTER_NODE (tcp4_rcv_process_node) = {
.name = "tcp4-rcv-process",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
.error_counters = tcp_input_error_counters,
- .n_next_nodes = TCP_RCV_PROCESS_N_NEXT,
- .next_nodes =
- {
-#define _(s,n) [TCP_RCV_PROCESS_NEXT_##s] = n,
- foreach_tcp_state_next
-#undef _
- },
.format_trace = format_tcp_rx_trace_short,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (tcp6_rcv_process_node) =
-{
+VLIB_REGISTER_NODE (tcp6_rcv_process_node) = {
.name = "tcp6-rcv-process",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
.error_counters = tcp_input_error_counters,
- .n_next_nodes = TCP_RCV_PROCESS_N_NEXT,
- .next_nodes =
- {
-#define _(s,n) [TCP_RCV_PROCESS_NEXT_##s] = n,
- foreach_tcp_state_next
-#undef _
- },
.format_trace = format_tcp_rx_trace_short,
};
-/* *INDENT-ON* */
static void
tcp46_listen_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
@@ -2664,7 +2538,6 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
while (n_left_from > 0)
{
- u32 error = TCP_ERROR_NONE;
tcp_connection_t *lc, *child;
/* Flags initialized with connection state after lookup */
@@ -2672,21 +2545,22 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
lc = tcp_listener_get (vnet_buffer (b[0])->tcp.connection_index);
}
- else /* We are in TimeWait state*/
+ /* Probably we are in time-wait or closed state */
+ else
{
tcp_connection_t *tc;
tc = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
thread_index);
if (tc->state != TCP_STATE_TIME_WAIT)
{
- error = TCP_ERROR_CREATE_EXISTS;
+ tcp_inc_counter (listen, TCP_ERROR_CREATE_EXISTS, 1);
goto done;
}
if (PREDICT_FALSE (!syn_during_timewait (tc, b[0], &tw_iss)))
{
/* This SYN can't be accepted */
- error = TCP_ERROR_CREATE_EXISTS;
+ tcp_inc_counter (listen, TCP_ERROR_CREATE_EXISTS, 1);
goto done;
}
@@ -2696,7 +2570,7 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
/* listener was cleaned up */
if (!lc)
{
- error = TCP_ERROR_NO_LISTENER;
+ tcp_inc_counter (listen, TCP_ERROR_NO_LISTENER, 1);
goto done;
}
}
@@ -2706,7 +2580,7 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
tcp_lookup_connection (lc->c_fib_index, b[0], thread_index, is_ip4);
if (PREDICT_FALSE (child->state != TCP_STATE_LISTEN))
{
- error = TCP_ERROR_CREATE_EXISTS;
+ tcp_inc_counter (listen, TCP_ERROR_CREATE_EXISTS, 1);
goto done;
}
@@ -2723,7 +2597,7 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (tcp_options_parse (tcp_buffer_hdr (b[0]), &child->rcv_opts, 1))
{
- error = TCP_ERROR_OPTIONS;
+ tcp_inc_counter (listen, TCP_ERROR_OPTIONS, 1);
tcp_connection_free (child);
goto done;
}
@@ -2753,7 +2627,7 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
lc->c_thread_index, 0 /* notify */ ))
{
tcp_connection_cleanup (child);
- error = TCP_ERROR_CREATE_SESSION_FAIL;
+ tcp_inc_counter (listen, TCP_ERROR_CREATE_SESSION_FAIL, 1);
goto done;
}
@@ -2761,12 +2635,11 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
child->tx_fifo_size = transport_tx_fifo_size (&child->connection);
tcp_send_synack (child);
+ n_syns += 1;
done:
-
b += 1;
n_left_from -= 1;
- n_syns += (error == TCP_ERROR_NONE);
}
tcp_inc_counter (listen, TCP_ERROR_SYNS_RCVD, n_syns);
@@ -2787,98 +2660,82 @@ VLIB_NODE_FN (tcp6_listen_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return tcp46_listen_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (tcp4_listen_node) =
-{
+VLIB_REGISTER_NODE (tcp4_listen_node) = {
.name = "tcp4-listen",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
.error_counters = tcp_input_error_counters,
- .n_next_nodes = TCP_LISTEN_N_NEXT,
- .next_nodes =
- {
-#define _(s,n) [TCP_LISTEN_NEXT_##s] = n,
- foreach_tcp_state_next
-#undef _
- },
.format_trace = format_tcp_rx_trace_short,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (tcp6_listen_node) =
-{
+VLIB_REGISTER_NODE (tcp6_listen_node) = {
.name = "tcp6-listen",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
.error_counters = tcp_input_error_counters,
- .n_next_nodes = TCP_LISTEN_N_NEXT,
- .next_nodes =
- {
-#define _(s,n) [TCP_LISTEN_NEXT_##s] = n,
- foreach_tcp_state_next
-#undef _
- },
.format_trace = format_tcp_rx_trace_short,
};
-/* *INDENT-ON* */
-typedef enum _tcp_input_next
+always_inline uword
+tcp46_drop_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, int is_ip4)
{
- TCP_INPUT_NEXT_DROP,
- TCP_INPUT_NEXT_LISTEN,
- TCP_INPUT_NEXT_RCV_PROCESS,
- TCP_INPUT_NEXT_SYN_SENT,
- TCP_INPUT_NEXT_ESTABLISHED,
- TCP_INPUT_NEXT_RESET,
- TCP_INPUT_NEXT_PUNT,
- TCP_INPUT_N_NEXT
-} tcp_input_next_t;
+ u32 *from = vlib_frame_vector_args (frame);
-#define foreach_tcp4_input_next \
- _ (DROP, "ip4-drop") \
- _ (LISTEN, "tcp4-listen") \
- _ (RCV_PROCESS, "tcp4-rcv-process") \
- _ (SYN_SENT, "tcp4-syn-sent") \
- _ (ESTABLISHED, "tcp4-established") \
- _ (RESET, "tcp4-reset") \
- _ (PUNT, "ip4-punt")
-
-#define foreach_tcp6_input_next \
- _ (DROP, "ip6-drop") \
- _ (LISTEN, "tcp6-listen") \
- _ (RCV_PROCESS, "tcp6-rcv-process") \
- _ (SYN_SENT, "tcp6-syn-sent") \
- _ (ESTABLISHED, "tcp6-established") \
- _ (RESET, "tcp6-reset") \
- _ (PUNT, "ip6-punt")
+ /* Error counters must be incremented by previous nodes */
+ vlib_buffer_free (vm, from, frame->n_vectors);
-#define filter_flags (TCP_FLAG_SYN|TCP_FLAG_ACK|TCP_FLAG_RST|TCP_FLAG_FIN)
+ return frame->n_vectors;
+}
-static void
-tcp_input_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_buffer_t ** bs, u32 n_bufs, u8 is_ip4)
+VLIB_NODE_FN (tcp4_drop_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
{
- tcp_connection_t *tc;
- tcp_header_t *tcp;
- tcp_rx_trace_t *t;
- int i;
+ return tcp46_drop_inline (vm, node, from_frame, 1 /* is_ip4 */);
+}
- for (i = 0; i < n_bufs; i++)
- {
- if (bs[i]->flags & VLIB_BUFFER_IS_TRACED)
- {
- t = vlib_add_trace (vm, node, bs[i], sizeof (*t));
- tc = tcp_connection_get (vnet_buffer (bs[i])->tcp.connection_index,
- vm->thread_index);
- tcp = vlib_buffer_get_current (bs[i]);
- tcp_set_rx_trace_data (t, tc, tcp, bs[i], is_ip4);
- }
- }
+VLIB_NODE_FN (tcp6_drop_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ return tcp46_drop_inline (vm, node, from_frame, 0 /* is_ip4 */);
}
+VLIB_REGISTER_NODE (tcp4_drop_node) = {
+ .name = "tcp4-drop",
+ .vector_size = sizeof (u32),
+ .n_errors = TCP_N_ERROR,
+ .error_counters = tcp_input_error_counters,
+};
+
+VLIB_REGISTER_NODE (tcp6_drop_node) = {
+ .name = "tcp6-drop",
+ .vector_size = sizeof (u32),
+ .n_errors = TCP_N_ERROR,
+ .error_counters = tcp_input_error_counters,
+};
+
+#define foreach_tcp4_input_next \
+ _ (DROP, "tcp4-drop") \
+ _ (LISTEN, "tcp4-listen") \
+ _ (RCV_PROCESS, "tcp4-rcv-process") \
+ _ (SYN_SENT, "tcp4-syn-sent") \
+ _ (ESTABLISHED, "tcp4-established") \
+ _ (RESET, "tcp4-reset") \
+ _ (PUNT, "ip4-punt")
+
+#define foreach_tcp6_input_next \
+ _ (DROP, "tcp6-drop") \
+ _ (LISTEN, "tcp6-listen") \
+ _ (RCV_PROCESS, "tcp6-rcv-process") \
+ _ (SYN_SENT, "tcp6-syn-sent") \
+ _ (ESTABLISHED, "tcp6-established") \
+ _ (RESET, "tcp6-reset") \
+ _ (PUNT, "ip6-punt")
+
+#define filter_flags (TCP_FLAG_SYN|TCP_FLAG_ACK|TCP_FLAG_RST|TCP_FLAG_FIN)
+
static void
tcp_input_set_error_next (tcp_main_t * tm, u16 * next, u32 * error, u8 is_ip4)
{
@@ -2899,9 +2756,8 @@ tcp_input_set_error_next (tcp_main_t * tm, u16 * next, u32 * error, u8 is_ip4)
}
static inline void
-tcp_input_dispatch_buffer (tcp_main_t * tm, tcp_connection_t * tc,
- vlib_buffer_t * b, u16 * next,
- vlib_node_runtime_t * error_node)
+tcp_input_dispatch_buffer (tcp_main_t *tm, tcp_connection_t *tc,
+ vlib_buffer_t *b, u16 *next, u16 *err_counters)
{
tcp_header_t *tcp;
u32 error;
@@ -2923,7 +2779,7 @@ tcp_input_dispatch_buffer (tcp_main_t * tm, tcp_connection_t * tc,
if (PREDICT_FALSE (error != TCP_ERROR_NONE))
{
- b->error = error_node->errors[error];
+ tcp_inc_err_counter (err_counters, error, 1);
if (error == TCP_ERROR_DISPATCH)
clib_warning ("tcp conn %u disp error state %U flags %U",
tc->c_c_index, format_tcp_state, tc->state,
@@ -2939,6 +2795,7 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
tcp_main_t *tm = vnet_get_tcp_main ();
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u16 nexts[VLIB_FRAME_SIZE], *next;
+ u16 err_counters[TCP_N_ERROR] = { 0 };
tcp_update_time_now (tcp_get_worker (thread_index));
@@ -2977,8 +2834,8 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vnet_buffer (b[0])->tcp.connection_index = tc0->c_c_index;
vnet_buffer (b[1])->tcp.connection_index = tc1->c_c_index;
- tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], node);
- tcp_input_dispatch_buffer (tm, tc1, b[1], &next[1], node);
+ tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], err_counters);
+ tcp_input_dispatch_buffer (tm, tc1, b[1], &next[1], err_counters);
}
else
{
@@ -2986,24 +2843,26 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
{
ASSERT (tcp_lookup_is_valid (tc0, b[0], tcp_buffer_hdr (b[0])));
vnet_buffer (b[0])->tcp.connection_index = tc0->c_c_index;
- tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], node);
+ tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0],
+ err_counters);
}
else
{
tcp_input_set_error_next (tm, &next[0], &error0, is_ip4);
- b[0]->error = node->errors[error0];
+ tcp_inc_err_counter (err_counters, error0, 1);
}
if (PREDICT_TRUE (tc1 != 0))
{
ASSERT (tcp_lookup_is_valid (tc1, b[1], tcp_buffer_hdr (b[1])));
vnet_buffer (b[1])->tcp.connection_index = tc1->c_c_index;
- tcp_input_dispatch_buffer (tm, tc1, b[1], &next[1], node);
+ tcp_input_dispatch_buffer (tm, tc1, b[1], &next[1],
+ err_counters);
}
else
{
tcp_input_set_error_next (tm, &next[1], &error1, is_ip4);
- b[1]->error = node->errors[error1];
+ tcp_inc_err_counter (err_counters, error1, 1);
}
}
@@ -3029,12 +2888,12 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
{
ASSERT (tcp_lookup_is_valid (tc0, b[0], tcp_buffer_hdr (b[0])));
vnet_buffer (b[0])->tcp.connection_index = tc0->c_c_index;
- tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], node);
+ tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], err_counters);
}
else
{
tcp_input_set_error_next (tm, &next[0], &error0, is_ip4);
- b[0]->error = node->errors[error0];
+ tcp_inc_err_counter (err_counters, error0, 1);
}
b += 1;
@@ -3043,8 +2902,9 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
- tcp_input_trace_frame (vm, node, bufs, frame->n_vectors, is_ip4);
+ tcp_input_trace_frame (vm, node, bufs, nexts, frame->n_vectors, is_ip4);
+ tcp_store_err_counters (input, err_counters);
vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
return frame->n_vectors;
}
@@ -3065,7 +2925,6 @@ VLIB_NODE_FN (tcp6_input_nolookup_node) (vlib_main_t * vm,
1 /* is_nolookup */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp4_input_nolookup_node) =
{
.name = "tcp4-input-nolookup",
@@ -3083,9 +2942,7 @@ VLIB_REGISTER_NODE (tcp4_input_nolookup_node) =
.format_buffer = format_tcp_header,
.format_trace = format_tcp_rx_trace,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp6_input_nolookup_node) =
{
.name = "tcp6-input-nolookup",
@@ -3103,7 +2960,6 @@ VLIB_REGISTER_NODE (tcp6_input_nolookup_node) =
.format_buffer = format_tcp_header,
.format_trace = format_tcp_rx_trace,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (tcp4_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * from_frame)
@@ -3119,7 +2975,6 @@ VLIB_NODE_FN (tcp6_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
0 /* is_nolookup */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp4_input_node) =
{
.name = "tcp4-input",
@@ -3137,9 +2992,7 @@ VLIB_REGISTER_NODE (tcp4_input_node) =
.format_buffer = format_tcp_header,
.format_trace = format_tcp_rx_trace,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp6_input_node) =
{
.name = "tcp6-input",
@@ -3157,7 +3010,6 @@ VLIB_REGISTER_NODE (tcp6_input_node) =
.format_buffer = format_tcp_header,
.format_trace = format_tcp_rx_trace,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
void
@@ -3343,6 +3195,8 @@ do { \
_(FIN_WAIT_2, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
TCP_ERROR_NONE);
_(FIN_WAIT_2, TCP_FLAG_SYN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+ _ (FIN_WAIT_2, TCP_FLAG_SYN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
+ TCP_ERROR_NONE);
_(CLOSE_WAIT, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
_(CLOSE_WAIT, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
TCP_ERROR_NONE);
@@ -3392,7 +3246,7 @@ do { \
_(CLOSED, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_DROP,
TCP_ERROR_CONNECTION_CLOSED);
_(CLOSED, TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET, TCP_ERROR_CONNECTION_CLOSED);
- _(CLOSED, TCP_FLAG_SYN, TCP_INPUT_NEXT_RESET, TCP_ERROR_CONNECTION_CLOSED);
+ _ (CLOSED, TCP_FLAG_SYN, TCP_INPUT_NEXT_LISTEN, TCP_ERROR_NONE);
_(CLOSED, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET,
TCP_ERROR_CONNECTION_CLOSED);
#undef _
diff --git a/src/vnet/tcp/tcp_newreno.c b/src/vnet/tcp/tcp_newreno.c
index c5ffc2a4109..8c7e77fc974 100644
--- a/src/vnet/tcp/tcp_newreno.c
+++ b/src/vnet/tcp/tcp_newreno.c
@@ -49,12 +49,12 @@ newreno_rcv_ack (tcp_connection_t * tc, tcp_rate_sample_t * rs)
{
if (tcp_in_slowstart (tc))
{
- tc->cwnd += clib_min (tc->snd_mss, tc->bytes_acked);
+ tc->cwnd += clib_min (tc->snd_mss, rs->delivered);
}
else
{
/* tc->cwnd += clib_max ((tc->snd_mss * tc->snd_mss) / tc->cwnd, 1); */
- tcp_cwnd_accumulate (tc, tc->cwnd, tc->bytes_acked);
+ tcp_cwnd_accumulate (tc, tc->cwnd, rs->delivered);
}
}
@@ -62,30 +62,31 @@ void
newreno_rcv_cong_ack (tcp_connection_t * tc, tcp_cc_ack_t ack_type,
tcp_rate_sample_t * rs)
{
+ /* With sacks prr controls the data in flight post congestion */
+ if (PREDICT_TRUE (tcp_opts_sack_permitted (tc)))
+ return;
+
if (ack_type == TCP_CC_DUPACK)
{
- if (!tcp_opts_sack_permitted (tc))
- tc->cwnd += tc->snd_mss;
+ tc->cwnd += tc->snd_mss;
}
else if (ack_type == TCP_CC_PARTIALACK)
{
- /* RFC 6582 Sec. 3.2 */
- if (!tcp_opts_sack_permitted (&tc->rcv_opts))
- {
- /* Deflate the congestion window by the amount of new data
- * acknowledged by the Cumulative Acknowledgment field.
- * If the partial ACK acknowledges at least one SMSS of new data,
- * then add back SMSS bytes to the congestion window. This
- * artificially inflates the congestion window in order to reflect
- * the additional segment that has left the network. This "partial
- * window deflation" attempts to ensure that, when fast recovery
- * eventually ends, approximately ssthresh amount of data will be
- * outstanding in the network.*/
- tc->cwnd = (tc->cwnd > tc->bytes_acked + tc->snd_mss) ?
- tc->cwnd - tc->bytes_acked : tc->snd_mss;
- if (tc->bytes_acked > tc->snd_mss)
- tc->cwnd += tc->snd_mss;
- }
+ /* RFC 6582 Sec. 3.2
+ * Deflate the congestion window by the amount of new data
+ * acknowledged by the Cumulative Acknowledgment field.
+ * If the partial ACK acknowledges at least one SMSS of new data,
+ * then add back SMSS bytes to the congestion window. This
+ * artificially inflates the congestion window in order to reflect
+ * the additional segment that has left the network. This "partial
+ * window deflation" attempts to ensure that, when fast recovery
+ * eventually ends, approximately ssthresh amount of data will be
+ * outstanding in the network. */
+ tc->cwnd = (tc->cwnd > tc->bytes_acked + tc->snd_mss) ?
+ tc->cwnd - tc->bytes_acked :
+ tc->snd_mss;
+ if (tc->bytes_acked > tc->snd_mss)
+ tc->cwnd += tc->snd_mss;
}
}
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index 5b445fa5165..78148cd5695 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -321,7 +321,6 @@ tcp_update_burst_snd_vars (tcp_connection_t * tc)
if (tc->snd_una == tc->snd_nxt)
{
tcp_cc_event (tc, TCP_CC_EVT_START_TX);
- tcp_connection_tx_pacer_reset (tc, tc->cwnd, TRANSPORT_PACER_MIN_BURST);
}
if (tc->flags & TCP_CONN_PSH_PENDING)
@@ -332,25 +331,6 @@ tcp_update_burst_snd_vars (tcp_connection_t * tc)
}
}
-#endif /* CLIB_MARCH_VARIANT */
-
-static void *
-tcp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b)
-{
- if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
- vlib_buffer_free_one (vm, b->next_buffer);
- /* Zero all flags but free list index and trace flag */
- b->flags &= VLIB_BUFFER_NEXT_PRESENT - 1;
- b->current_data = 0;
- b->current_length = 0;
- b->total_length_not_including_first_buffer = 0;
- vnet_buffer (b)->tcp.flags = 0;
- VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b);
- /* Leave enough space for headers */
- return vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN);
-}
-
-#ifndef CLIB_MARCH_VARIANT
static void *
tcp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b)
{
@@ -363,7 +343,6 @@ tcp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b)
return vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN);
}
-
/* Compute TCP checksum in software when offloading is disabled for a connection */
u16
ip6_tcp_compute_checksum_custom (vlib_main_t * vm, vlib_buffer_t * p0,
@@ -441,7 +420,7 @@ static inline void
tcp_make_ack_i (tcp_connection_t * tc, vlib_buffer_t * b, tcp_state_t state,
u8 flags)
{
- tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
+ tcp_options_t _snd_opts = {}, *snd_opts = &_snd_opts;
u8 tcp_opts_len, tcp_hdr_opts_len;
tcp_header_t *th;
u16 wnd;
@@ -568,24 +547,24 @@ tcp_enqueue_to_output (tcp_worker_ctx_t * wrk, vlib_buffer_t * b, u32 bi,
wrk->tco_next_node[!is_ip4]);
}
-#endif /* CLIB_MARCH_VARIANT */
-
-static int
-tcp_make_reset_in_place (vlib_main_t * vm, vlib_buffer_t * b, u8 is_ip4)
+int
+tcp_buffer_make_reset (vlib_main_t *vm, vlib_buffer_t *b, u8 is_ip4)
{
- ip4_header_t *ih4;
- ip6_header_t *ih6;
- tcp_header_t *th;
- ip4_address_t src_ip4, dst_ip4;
+ ip4_address_t src_ip4 = {}, dst_ip4 = {};
ip6_address_t src_ip6, dst_ip6;
u16 src_port, dst_port;
u32 tmp, len, seq, ack;
+ ip4_header_t *ih4;
+ ip6_header_t *ih6;
+ tcp_header_t *th;
u8 flags;
- /* Find IP and TCP headers */
+ /*
+ * Find IP and TCP headers and glean information from them. Assumes
+ * buffer was parsed by something like @ref tcp_input_lookup_buffer
+ */
th = tcp_buffer_hdr (b);
- /* Save src and dst ip */
if (is_ip4)
{
ih4 = vlib_buffer_get_current (b);
@@ -625,7 +604,23 @@ tcp_make_reset_in_place (vlib_main_t * vm, vlib_buffer_t * b, u8 is_ip4)
seq = 0;
}
- tcp_reuse_buffer (vm, b);
+ /*
+ * Clear and reuse current buffer for reset
+ */
+ if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ vlib_buffer_free_one (vm, b->next_buffer);
+
+ /* Zero all flags but free list index and trace flag */
+ b->flags &= VLIB_BUFFER_NEXT_PRESENT - 1;
+ /* Make sure new tcp header comes after current ip */
+ b->current_data = ((u8 *) th - b->data) + sizeof (tcp_header_t);
+ b->current_length = 0;
+ b->total_length_not_including_first_buffer = 0;
+ vnet_buffer (b)->tcp.flags = 0;
+
+ /*
+ * Add TCP and IP headers
+ */
th = vlib_buffer_push_tcp_net_order (b, dst_port, src_port, seq, ack,
sizeof (tcp_header_t), flags, 0);
@@ -646,7 +641,6 @@ tcp_make_reset_in_place (vlib_main_t * vm, vlib_buffer_t * b, u8 is_ip4)
return 0;
}
-#ifndef CLIB_MARCH_VARIANT
/**
* Send reset without reusing existing buffer
*
@@ -662,8 +656,8 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt,
u8 tcp_hdr_len, flags = 0;
tcp_header_t *th, *pkt_th;
u32 seq, ack, bi;
- ip4_header_t *ih4, *pkt_ih4;
- ip6_header_t *ih6, *pkt_ih6;
+ ip4_header_t *pkt_ih4;
+ ip6_header_t *pkt_ih6;
if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
{
@@ -673,6 +667,7 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt,
b = vlib_get_buffer (vm, bi);
tcp_init_buffer (vm, b);
+ vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
/* Make and write options */
tcp_hdr_len = sizeof (tcp_header_t);
@@ -693,6 +688,7 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt,
flags = TCP_FLAG_RST;
seq = pkt_th->ack_number;
ack = (tc->state >= TCP_STATE_SYN_RCVD) ? tc->rcv_nxt : 0;
+ ack = clib_host_to_net_u32 (ack);
}
else
{
@@ -703,28 +699,7 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt,
th = vlib_buffer_push_tcp_net_order (b, pkt_th->dst_port, pkt_th->src_port,
seq, ack, tcp_hdr_len, flags, 0);
-
- /* Swap src and dst ip */
- if (is_ip4)
- {
- ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40);
- ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address,
- &pkt_ih4->src_address, IP_PROTOCOL_TCP,
- tcp_csum_offload (tc));
- th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
- }
- else
- {
- int bogus = ~0;
- ASSERT ((pkt_ih6->ip_version_traffic_class_and_flow_label & 0xF0) ==
- 0x60);
- ih6 = vlib_buffer_push_ip6_custom (vm, b, &pkt_ih6->dst_address,
- &pkt_ih6->src_address,
- IP_PROTOCOL_TCP,
- tc->ipv6_flow_label);
- th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
- ASSERT (!bogus);
- }
+ th->checksum = tcp_compute_checksum (tc, b);
tcp_enqueue_half_open (wrk, tc, b, bi);
TCP_EVT (TCP_EVT_RST_SENT, tc);
@@ -792,7 +767,7 @@ tcp_send_syn (tcp_connection_t * tc)
* such that we can return if we've ran out.
*/
tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN,
- tc->rto * TCP_TO_TIMER_TICK);
+ (u32) tc->rto * TCP_TO_TIMER_TICK);
if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
{
@@ -863,10 +838,9 @@ tcp_send_fin (tcp_connection_t * tc)
/* Out of buffers so program fin retransmit ASAP */
tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT,
tcp_cfg.alloc_err_timeout);
- if (fin_snt)
- tc->snd_nxt += 1;
- else
- /* Make sure retransmit retries a fin not data */
+ tc->snd_nxt += 1;
+ /* Make sure retransmit retries a fin not data with right snd_nxt */
+ if (!fin_snt)
tc->flags |= TCP_CONN_FINSNT;
tcp_worker_stats_inc (wrk, no_buffer, 1);
return;
@@ -968,11 +942,9 @@ tcp_buffer_len (vlib_buffer_t * b)
return data_len;
}
-u32
-tcp_session_push_header (transport_connection_t * tconn, vlib_buffer_t * b)
+always_inline u32
+tcp_push_one_header (tcp_connection_t *tc, vlib_buffer_t *b)
{
- tcp_connection_t *tc = (tcp_connection_t *) tconn;
-
if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
tcp_bt_track_tx (tc, tcp_buffer_len (b));
@@ -980,6 +952,37 @@ tcp_session_push_header (transport_connection_t * tconn, vlib_buffer_t * b)
/* update_snd_nxt */ 1);
tcp_validate_txf_size (tc, tc->snd_nxt - tc->snd_una);
+ return 0;
+}
+
+u32
+tcp_session_push_header (transport_connection_t *tconn, vlib_buffer_t **bs,
+ u32 n_bufs)
+{
+ tcp_connection_t *tc = (tcp_connection_t *) tconn;
+
+ while (n_bufs >= 4)
+ {
+ vlib_prefetch_buffer_header (bs[2], STORE);
+ vlib_prefetch_buffer_header (bs[3], STORE);
+
+ tcp_push_one_header (tc, bs[0]);
+ tcp_push_one_header (tc, bs[1]);
+
+ n_bufs -= 2;
+ bs += 2;
+ }
+ while (n_bufs)
+ {
+ if (n_bufs > 1)
+ vlib_prefetch_buffer_header (bs[1], STORE);
+
+ tcp_push_one_header (tc, bs[0]);
+
+ n_bufs -= 1;
+ bs += 1;
+ }
+
/* If not tracking an ACK, start tracking */
if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc))
{
@@ -1113,7 +1116,7 @@ tcp_prepare_segment (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
data = tcp_init_buffer (vm, *b);
n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
max_deq_bytes);
- ASSERT (n_bytes == max_deq_bytes);
+ ASSERT (n_bytes > 0);
b[0]->current_length = n_bytes;
tcp_push_hdr_i (tc, *b, tc->snd_una + offset, /* compute opts */ 0,
/* burst */ 0, /* update_snd_nxt */ 0);
@@ -1275,6 +1278,7 @@ tcp_cc_init_rxt_timeout (tcp_connection_t * tc)
tc->cwnd_acc_bytes = 0;
tc->tr_occurences += 1;
tc->sack_sb.reorder = TCP_DUPACK_THRESHOLD;
+ tc->sack_sb.rescue_rxt = tc->snd_una - 1;
tcp_recovery_on (tc);
}
@@ -1341,7 +1345,10 @@ tcp_timer_retransmit_handler (tcp_connection_t * tc)
}
if (tcp_opts_sack_permitted (&tc->rcv_opts))
- tcp_check_sack_reneging (tc);
+ {
+ tcp_check_sack_reneging (tc);
+ scoreboard_rxt_mark_lost (&tc->sack_sb, tc->snd_una, tc->snd_nxt);
+ }
/* Update send congestion to make sure that rxt has data to send */
tc->snd_congestion = tc->snd_nxt;
@@ -1482,7 +1489,7 @@ tcp_timer_retransmit_syn_handler (tcp_connection_t * tc)
tcp_enqueue_half_open (wrk, tc, b, bi);
tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN,
- tc->rto * TCP_TO_TIMER_TICK);
+ (u32) tc->rto * TCP_TO_TIMER_TICK);
}
/**
@@ -1538,8 +1545,10 @@ tcp_timer_persist_handler (tcp_connection_t * tc)
tcp_validate_txf_size (tc, offset);
tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
- max_snd_bytes = clib_min (tc->snd_mss,
+ max_snd_bytes = clib_min (clib_min (tc->snd_mss, available_bytes),
tm->bytes_per_buffer - TRANSPORT_MAX_HDRS_LEN);
+ if (tc->snd_wnd > 0)
+ max_snd_bytes = clib_min (tc->snd_wnd, max_snd_bytes);
n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
max_snd_bytes);
b->current_length = n_bytes;
@@ -1720,7 +1729,7 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
&& tc->rxt_head != tc->snd_una
&& tcp_retransmit_should_retry_head (tc, sb))
{
- max_bytes = clib_min (tc->snd_mss, tc->snd_congestion - tc->snd_una);
+ max_bytes = clib_min (tc->snd_mss, tc->snd_nxt - tc->snd_una);
n_written = tcp_prepare_retransmit_segment (wrk, tc, 0, max_bytes, &b);
if (!n_written)
{
@@ -1752,7 +1761,7 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
if (!hole)
{
/* We are out of lost holes to retransmit so send some new data. */
- if (max_deq > tc->snd_mss)
+ if (max_deq)
{
u32 n_segs_new;
int av_wnd;
@@ -1762,7 +1771,10 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
av_wnd = (int) tc->snd_wnd - (tc->snd_nxt - tc->snd_una);
av_wnd = clib_max (av_wnd - tc->snd_mss, 0);
snd_space = clib_min (snd_space, av_wnd);
- snd_space = clib_min (max_deq, snd_space);
+ /* Low bound max_deq to mss to be able to send a segment even
+ * when it is less than mss */
+ snd_space =
+ clib_min (clib_max (max_deq, tc->snd_mss), snd_space);
burst_size = clib_min (burst_size - n_segs,
snd_space / tc->snd_mss);
burst_size = clib_min (burst_size, TCP_RXT_MAX_BURST);
@@ -1774,8 +1786,7 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
goto done;
}
- if (tcp_in_recovery (tc) || !can_rescue
- || scoreboard_rescue_rxt_valid (sb, tc))
+ if (!can_rescue || scoreboard_rescue_rxt_valid (sb, tc))
break;
/* If rescue rxt undefined or less than snd_una then one segment of
@@ -1799,7 +1810,11 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
break;
}
- max_bytes = clib_min (hole->end - sb->high_rxt, snd_space);
+ max_bytes = hole->end - sb->high_rxt;
+ /* Avoid retransmitting segment less than mss if possible */
+ if (snd_space < tc->snd_mss && max_bytes > snd_space)
+ break;
+ max_bytes = clib_min (max_bytes, snd_space);
max_bytes = snd_limited ? clib_min (max_bytes, tc->snd_mss) : max_bytes;
if (max_bytes == 0)
break;
@@ -2162,6 +2177,7 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
u32 n_left_from, *from, thread_index = vm->thread_index;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u16 nexts[VLIB_FRAME_SIZE], *next;
+ u16 err_counters[TCP_N_ERROR] = { 0 };
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -2212,7 +2228,8 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
else
{
- b[0]->error = node->errors[TCP_ERROR_INVALID_CONNECTION];
+ tcp_inc_err_counter (err_counters, TCP_ERROR_INVALID_CONNECTION,
+ 1);
next[0] = TCP_OUTPUT_NEXT_DROP;
}
if (tc1 != 0)
@@ -2223,7 +2240,8 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
else
{
- b[1]->error = node->errors[TCP_ERROR_INVALID_CONNECTION];
+ tcp_inc_err_counter (err_counters, TCP_ERROR_INVALID_CONNECTION,
+ 1);
next[1] = TCP_OUTPUT_NEXT_DROP;
}
}
@@ -2253,7 +2271,7 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
else
{
- b[0]->error = node->errors[TCP_ERROR_INVALID_CONNECTION];
+ tcp_inc_err_counter (err_counters, TCP_ERROR_INVALID_CONNECTION, 1);
next[0] = TCP_OUTPUT_NEXT_DROP;
}
@@ -2262,6 +2280,7 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
n_left_from -= 1;
}
+ tcp_store_err_counters (output, err_counters);
vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
vlib_node_increment_counter (vm, tcp_node_index (output, is_ip4),
TCP_ERROR_PKTS_SENT, frame->n_vectors);
@@ -2280,7 +2299,6 @@ VLIB_NODE_FN (tcp6_output_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return tcp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp4_output_node) =
{
.name = "tcp4-output",
@@ -2298,9 +2316,7 @@ VLIB_REGISTER_NODE (tcp4_output_node) =
.format_buffer = format_tcp_header,
.format_trace = format_tcp_tx_trace,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp6_output_node) =
{
.name = "tcp6-output",
@@ -2318,7 +2334,6 @@ VLIB_REGISTER_NODE (tcp6_output_node) =
.format_buffer = format_tcp_header,
.format_trace = format_tcp_tx_trace,
};
-/* *INDENT-ON* */
typedef enum _tcp_reset_next
{
@@ -2335,79 +2350,100 @@ typedef enum _tcp_reset_next
_(DROP, "error-drop") \
_(IP_LOOKUP, "ip6-lookup")
+static void
+tcp_reset_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_buffer_t **bs, u32 n_bufs, u8 is_ip4)
+{
+ tcp_header_t *tcp;
+ tcp_tx_trace_t *t;
+ int i;
+
+ for (i = 0; i < n_bufs; i++)
+ {
+ if (bs[i]->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ tcp = vlib_buffer_get_current (bs[i]);
+ t = vlib_add_trace (vm, node, bs[i], sizeof (*t));
+
+ if (is_ip4)
+ {
+ ip4_header_t *ih4 = vlib_buffer_get_current (bs[i]);
+ tcp = ip4_next_header (ih4);
+ t->tcp_connection.c_lcl_ip.ip4 = ih4->dst_address;
+ t->tcp_connection.c_rmt_ip.ip4 = ih4->src_address;
+ t->tcp_connection.c_is_ip4 = 1;
+ }
+ else
+ {
+ ip6_header_t *ih6 = vlib_buffer_get_current (bs[i]);
+ tcp = ip6_next_header (ih6);
+ t->tcp_connection.c_lcl_ip.ip6 = ih6->dst_address;
+ t->tcp_connection.c_rmt_ip.ip6 = ih6->src_address;
+ }
+ t->tcp_connection.c_lcl_port = tcp->dst_port;
+ t->tcp_connection.c_rmt_port = tcp->src_port;
+ t->tcp_connection.c_proto = TRANSPORT_PROTO_TCP;
+ clib_memcpy_fast (&t->tcp_header, tcp, sizeof (t->tcp_header));
+ }
+ }
+}
+
static uword
-tcp46_send_reset_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * from_frame, u8 is_ip4)
+tcp46_reset_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, u8 is_ip4)
{
- u32 error0 = TCP_ERROR_RST_SENT, next0 = TCP_RESET_NEXT_IP_LOOKUP;
- u32 n_left_from, next_index, *from, *to_next;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+ u16 nexts[VLIB_FRAME_SIZE], *next;
+ u32 n_left_from, *from;
- from = vlib_frame_vector_args (from_frame);
- n_left_from = from_frame->n_vectors;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ vlib_get_buffers (vm, from, bufs, n_left_from);
- next_index = node->cached_next_index;
+ b = bufs;
+ next = nexts;
while (n_left_from > 0)
{
- u32 n_left_to_next;
+ tcp_buffer_make_reset (vm, b[0], is_ip4);
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ /* IP lookup in fib where it was received. Previous value
+ * was overwritten by tcp-input */
+ vnet_buffer (b[0])->sw_if_index[VLIB_TX] =
+ vec_elt (ip4_main.fib_index_by_sw_if_index,
+ vnet_buffer (b[0])->sw_if_index[VLIB_RX]);
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- vlib_buffer_t *b0;
- tcp_tx_trace_t *t0;
- tcp_header_t *th0;
- u32 bi0;
-
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- tcp_make_reset_in_place (vm, b0, is_ip4);
-
- /* Prepare to send to IP lookup */
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
-
- b0->error = node->errors[error0];
- b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- th0 = vlib_buffer_get_current (b0);
- if (is_ip4)
- th0 = ip4_next_header ((ip4_header_t *) th0);
- else
- th0 = ip6_next_header ((ip6_header_t *) th0);
- t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
- clib_memcpy_fast (&t0->tcp_header, th0,
- sizeof (t0->tcp_header));
- }
+ b[0]->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ next[0] = TCP_RESET_NEXT_IP_LOOKUP;
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
- }
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ b += 1;
+ next += 1;
+ n_left_from -= 1;
}
- return from_frame->n_vectors;
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
+ tcp_reset_trace_frame (vm, node, bufs, frame->n_vectors, is_ip4);
+
+ vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+
+ vlib_node_increment_counter (vm, node->node_index, TCP_ERROR_RST_SENT,
+ frame->n_vectors);
+
+ return frame->n_vectors;
}
VLIB_NODE_FN (tcp4_reset_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * from_frame)
{
- return tcp46_send_reset_inline (vm, node, from_frame, 1);
+ return tcp46_reset_inline (vm, node, from_frame, 1);
}
VLIB_NODE_FN (tcp6_reset_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * from_frame)
{
- return tcp46_send_reset_inline (vm, node, from_frame, 0);
+ return tcp46_reset_inline (vm, node, from_frame, 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp4_reset_node) = {
.name = "tcp4-reset",
.vector_size = sizeof (u32),
@@ -2421,9 +2457,7 @@ VLIB_REGISTER_NODE (tcp4_reset_node) = {
},
.format_trace = format_tcp_tx_trace,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp6_reset_node) = {
.name = "tcp6-reset",
.vector_size = sizeof (u32),
@@ -2437,7 +2471,6 @@ VLIB_REGISTER_NODE (tcp6_reset_node) = {
},
.format_trace = format_tcp_tx_trace,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/tcp/tcp_packet.h b/src/vnet/tcp/tcp_packet.h
index b0636d871d5..c137ea68108 100644
--- a/src/vnet/tcp/tcp_packet.h
+++ b/src/vnet/tcp/tcp_packet.h
@@ -16,7 +16,8 @@
#ifndef included_tcp_packet_h
#define included_tcp_packet_h
-#include <vnet/vnet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
/* TCP flags bit 0 first. */
#define foreach_tcp_flag \
@@ -185,6 +186,100 @@ typedef struct
#define timestamp_lt(_t1, _t2) ((i32)((_t1)-(_t2)) < 0)
#define timestamp_leq(_t1, _t2) ((i32)((_t1)-(_t2)) <= 0)
+always_inline void
+ip4_tcp_reply_x1 (ip4_header_t *ip0, tcp_header_t *tcp0)
+{
+ u32 src0, dst0;
+
+ src0 = ip0->src_address.data_u32;
+ dst0 = ip0->dst_address.data_u32;
+ ip0->src_address.data_u32 = dst0;
+ ip0->dst_address.data_u32 = src0;
+
+ src0 = tcp0->src;
+ dst0 = tcp0->dst;
+ tcp0->src = dst0;
+ tcp0->dst = src0;
+}
+
+always_inline void
+ip4_tcp_reply_x2 (ip4_header_t *ip0, ip4_header_t *ip1, tcp_header_t *tcp0,
+ tcp_header_t *tcp1)
+{
+ u32 src0, dst0, src1, dst1;
+
+ src0 = ip0->src_address.data_u32;
+ src1 = ip1->src_address.data_u32;
+ dst0 = ip0->dst_address.data_u32;
+ dst1 = ip1->dst_address.data_u32;
+ ip0->src_address.data_u32 = dst0;
+ ip1->src_address.data_u32 = dst1;
+ ip0->dst_address.data_u32 = src0;
+ ip1->dst_address.data_u32 = src1;
+
+ src0 = tcp0->src;
+ src1 = tcp1->src;
+ dst0 = tcp0->dst;
+ dst1 = tcp1->dst;
+ tcp0->src = dst0;
+ tcp1->src = dst1;
+ tcp0->dst = src0;
+ tcp1->dst = src1;
+}
+
+always_inline void
+ip6_tcp_reply_x1 (ip6_header_t *ip0, tcp_header_t *tcp0)
+{
+ {
+ ip6_address_t src0, dst0;
+
+ src0 = ip0->src_address;
+ dst0 = ip0->dst_address;
+ ip0->src_address = dst0;
+ ip0->dst_address = src0;
+ }
+
+ {
+ u16 src0, dst0;
+
+ src0 = tcp0->src;
+ dst0 = tcp0->dst;
+ tcp0->src = dst0;
+ tcp0->dst = src0;
+ }
+}
+
+always_inline void
+ip6_tcp_reply_x2 (ip6_header_t *ip0, ip6_header_t *ip1, tcp_header_t *tcp0,
+ tcp_header_t *tcp1)
+{
+ {
+ ip6_address_t src0, dst0, src1, dst1;
+
+ src0 = ip0->src_address;
+ src1 = ip1->src_address;
+ dst0 = ip0->dst_address;
+ dst1 = ip1->dst_address;
+ ip0->src_address = dst0;
+ ip1->src_address = dst1;
+ ip0->dst_address = src0;
+ ip1->dst_address = src1;
+ }
+
+ {
+ u16 src0, dst0, src1, dst1;
+
+ src0 = tcp0->src;
+ src1 = tcp1->src;
+ dst0 = tcp0->dst;
+ dst1 = tcp1->dst;
+ tcp0->src = dst0;
+ tcp1->src = dst1;
+ tcp0->dst = src0;
+ tcp1->dst = src1;
+ }
+}
+
/**
* Parse TCP header options.
*
diff --git a/src/vnet/tcp/tcp_pg.c b/src/vnet/tcp/tcp_pg.c
index 07bdb113fd0..9b98e3d8ee4 100644
--- a/src/vnet/tcp/tcp_pg.c
+++ b/src/vnet/tcp/tcp_pg.c
@@ -51,6 +51,13 @@
_ (ECE) \
_ (CWR)
+#define foreach_tcp_options \
+ _ (mss, TCP_OPTION_MSS, TCP_OPTION_LEN_MSS, 1) \
+ _ (timestamp, TCP_OPTION_TIMESTAMP, TCP_OPTION_LEN_TIMESTAMP, 2) \
+ _ (winscale, TCP_OPTION_WINDOW_SCALE, TCP_OPTION_LEN_WINDOW_SCALE, 1) \
+ _ (sackperm, TCP_OPTION_SACK_PERMITTED, TCP_OPTION_LEN_SACK_PERMITTED, 0) \
+ _ (sack, TCP_OPTION_SACK_BLOCK, TCP_OPTION_LEN_SACK_BLOCK, 0)
+
static void
tcp_pg_edit_function (pg_main_t * pg,
pg_stream_t * s,
@@ -150,82 +157,192 @@ uword
unformat_pg_tcp_header (unformat_input_t * input, va_list * args)
{
pg_stream_t *s = va_arg (*args, pg_stream_t *);
- pg_tcp_header_t *p;
- u32 group_index;
+ pg_tcp_header_t *pth;
+ u32 header_group_index, opt_group_index = ~0, noop_len, opts_len = 0;
- p = pg_create_edit_group (s, sizeof (p[0]), sizeof (tcp_header_t),
- &group_index);
- pg_tcp_header_init (p);
+ pth = pg_create_edit_group (s, sizeof (pth[0]), sizeof (tcp_header_t),
+ &header_group_index);
+ pg_tcp_header_init (pth);
/* Defaults. */
- pg_edit_set_fixed (&p->seq_number, 0);
- pg_edit_set_fixed (&p->ack_number, 0);
-
- pg_edit_set_fixed (&p->data_offset_and_reserved,
- sizeof (tcp_header_t) / sizeof (u32));
+ pg_edit_set_fixed (&pth->seq_number, 0);
+ pg_edit_set_fixed (&pth->ack_number, 0);
- pg_edit_set_fixed (&p->window, 4096);
- pg_edit_set_fixed (&p->urgent_pointer, 0);
+ pg_edit_set_fixed (&pth->window, 4096);
+ pg_edit_set_fixed (&pth->urgent_pointer, 0);
-#define _(f) pg_edit_set_fixed (&p->f##_flag, 0);
+#define _(f) pg_edit_set_fixed (&pth->f##_flag, 0);
foreach_tcp_flag
#undef _
- p->checksum.type = PG_EDIT_UNSPECIFIED;
+ pth->checksum.type = PG_EDIT_UNSPECIFIED;
- if (!unformat (input, "TCP: %U -> %U",
- unformat_pg_edit,
- unformat_tcp_udp_port, &p->src,
- unformat_pg_edit, unformat_tcp_udp_port, &p->dst))
+ if (!unformat (input, "TCP: %U -> %U", unformat_pg_edit,
+ unformat_tcp_udp_port, &pth->src, unformat_pg_edit,
+ unformat_tcp_udp_port, &pth->dst))
goto error;
/* Parse options. */
while (1)
{
- if (unformat (input, "window %U",
- unformat_pg_edit, unformat_pg_number, &p->window))
+ if (unformat (input, "window %U", unformat_pg_edit, unformat_pg_number,
+ &pth->window))
;
- else if (unformat (input, "checksum %U",
- unformat_pg_edit, unformat_pg_number, &p->checksum))
+ else if (unformat (input, "checksum %U", unformat_pg_edit,
+ unformat_pg_number, &pth->checksum))
;
else if (unformat (input, "seqnum %U", unformat_pg_edit,
- unformat_pg_number, &p->seq_number))
+ unformat_pg_number, &pth->seq_number))
;
else if (unformat (input, "acknum %U", unformat_pg_edit,
- unformat_pg_number, &p->ack_number))
+ unformat_pg_number, &pth->ack_number))
;
/* Flags. */
-#define _(f) else if (unformat (input, #f)) pg_edit_set_fixed (&p->f##_flag, 1);
+#define _(f) \
+ else if (unformat (input, #f)) pg_edit_set_fixed (&pth->f##_flag, 1);
foreach_tcp_flag
#undef _
- /* Can't parse input: try next protocol level. */
+ /* Can't parse input: try TCP options and next protocol level. */
+ else break;
+ }
+
+ while (unformat (input, "opt"))
+ {
+ int i;
+ pg_edit_t *opt_header, *opt_values;
+ u8 type, opt_len, n_values;
+
+ /* first allocate a new edit group for options */
+ if (opt_group_index == ~0)
+ (void) pg_create_edit_group (s, 0, 0, &opt_group_index);
+
+ if (false)
+ {
+ }
+#define _(n, t, l, k) \
+ else if (unformat (input, #n)) \
+ { \
+ type = (t); \
+ opt_len = (l); \
+ n_values = (k); \
+ }
+ foreach_tcp_options
+#undef _
else
+ {
+ /* unknown TCP option */
break;
+ }
+
+#define pg_tcp_option_init(e, o, b) \
+ do \
+ { \
+ *(o) += (b); \
+ (e)->lsb_bit_offset = *(o) > 0 ? (*(o) -1) * BITS (u8) : 0; \
+ (e)->n_bits = (b) *BITS (u8); \
+ } \
+ while (0);
+
+ /* if we don't know how many values to read, just ask */
+ if (n_values == 0 &&
+ unformat (input, "nvalues %D", sizeof (n_values), &n_values))
+ {
+ switch (type)
+ {
+ case TCP_OPTION_SACK_BLOCK:
+ /* each sack block is composed of 2 32-bits values */
+ n_values *= 2;
+ /*
+ opt_len contains the length of a single sack block,
+ it needs to be updated to contains the final number of bytes
+ for the sack options
+ */
+ opt_len = 2 + 2 * opt_len;
+ break;
+ default:
+ /* unknown variable options */
+ continue;
+ }
+ }
+
+ opt_header = pg_add_edits (s, sizeof (pg_edit_t) * (2 + n_values),
+ opt_len, opt_group_index);
+ pg_tcp_option_init (opt_header, &opts_len, 1);
+ pg_tcp_option_init (opt_header + 1, &opts_len, 1);
+ pg_edit_set_fixed (opt_header, type);
+ pg_edit_set_fixed (opt_header + 1, opt_len);
+ opt_values = opt_header + 2;
+
+ switch (type)
+ {
+ case TCP_OPTION_MSS:
+ pg_tcp_option_init (opt_values, &opts_len, 2);
+ break;
+ case TCP_OPTION_WINDOW_SCALE:
+ pg_tcp_option_init (opt_values, &opts_len, 1);
+ break;
+ case TCP_OPTION_TIMESTAMP:
+ case TCP_OPTION_SACK_BLOCK:
+ for (i = 0; i < n_values; ++i)
+ pg_tcp_option_init (opt_values + i, &opts_len, 4);
+ break;
+ default:
+ break;
+ }
+
+ for (i = 0; i < n_values; ++i)
+ {
+ if (!unformat (input, "%U", unformat_pg_edit, unformat_pg_number,
+ opt_values + i))
+ goto error;
+ }
}
+ /* add TCP NO-OP options to fill options up to a 4-bytes boundary */
+ noop_len = (TCP_OPTS_ALIGN - opts_len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
+ if (noop_len > 0)
+ {
+ pg_edit_t *noop_edit;
+ u8 *noops = 0;
+
+ vec_validate (noops, noop_len - 1);
+ clib_memset (noops, 1, noop_len);
+
+ noop_edit =
+ pg_add_edits (s, sizeof (noop_edit[0]), noop_len, opt_group_index);
+ pg_tcp_option_init (noop_edit, &opts_len, noop_len);
+ noop_edit->type = PG_EDIT_FIXED;
+ noop_edit->values[PG_EDIT_LO] = noops;
+ }
+#undef pg_tcp_option_init
+
+ /* set the data offset according to options */
+ pg_edit_set_fixed (&pth->data_offset_and_reserved,
+ (sizeof (tcp_header_t) + opts_len) / sizeof (u32));
+
{
ip_main_t *im = &ip_main;
u16 dst_port;
tcp_udp_port_info_t *pi;
pi = 0;
- if (p->dst.type == PG_EDIT_FIXED)
+ if (pth->dst.type == PG_EDIT_FIXED)
{
- dst_port = pg_edit_get_value (&p->dst, PG_EDIT_LO);
+ dst_port = pg_edit_get_value (&pth->dst, PG_EDIT_LO);
pi = ip_get_tcp_udp_port_info (im, dst_port);
}
- if (pi && pi->unformat_pg_edit
- && unformat_user (input, pi->unformat_pg_edit, s))
+ if (pi && pi->unformat_pg_edit &&
+ unformat_user (input, pi->unformat_pg_edit, s))
;
else if (!unformat_user (input, unformat_pg_payload, s))
goto error;
- if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+ if (pth->checksum.type == PG_EDIT_UNSPECIFIED)
{
- pg_edit_group_t *g = pg_stream_get_group (s, group_index);
+ pg_edit_group_t *g = pg_stream_get_group (s, header_group_index);
g->edit_function = tcp_pg_edit_function;
g->edit_function_opaque = 0;
}
diff --git a/src/vnet/tcp/tcp_sack.c b/src/vnet/tcp/tcp_sack.c
index 8f51b517361..63af07b50cd 100644
--- a/src/vnet/tcp/tcp_sack.c
+++ b/src/vnet/tcp/tcp_sack.c
@@ -265,6 +265,27 @@ scoreboard_init_rxt (sack_scoreboard_t * sb, u32 snd_una)
}
void
+scoreboard_rxt_mark_lost (sack_scoreboard_t *sb, u32 snd_una, u32 snd_nxt)
+{
+ sack_scoreboard_hole_t *hole;
+
+ hole = scoreboard_first_hole (sb);
+ if (!hole)
+ {
+ hole = scoreboard_insert_hole (sb, TCP_INVALID_SACK_HOLE_INDEX, snd_una,
+ snd_nxt);
+ sb->tail = scoreboard_hole_index (sb, hole);
+ sb->high_sacked = snd_una;
+ }
+
+ if (hole->is_lost)
+ return;
+
+ hole->is_lost = 1;
+ sb->lost_bytes += scoreboard_hole_bytes (hole);
+}
+
+void
scoreboard_init (sack_scoreboard_t * sb)
{
sb->head = TCP_INVALID_SACK_HOLE_INDEX;
diff --git a/src/vnet/tcp/tcp_sack.h b/src/vnet/tcp/tcp_sack.h
index 1c3fa95510b..bb206b92dbb 100644
--- a/src/vnet/tcp/tcp_sack.h
+++ b/src/vnet/tcp/tcp_sack.h
@@ -105,6 +105,8 @@ void scoreboard_clear (sack_scoreboard_t * sb);
void scoreboard_clear_reneging (sack_scoreboard_t * sb, u32 start, u32 end);
void scoreboard_init (sack_scoreboard_t * sb);
void scoreboard_init_rxt (sack_scoreboard_t * sb, u32 snd_una);
+void scoreboard_rxt_mark_lost (sack_scoreboard_t *sb, u32 snd_una,
+ u32 snd_nxt);
format_function_t format_tcp_scoreboard;
diff --git a/src/vnet/tcp/tcp_syn_filter4.c b/src/vnet/tcp/tcp_syn_filter4.c
index 1b003e04e51..6e867240ad6 100644
--- a/src/vnet/tcp/tcp_syn_filter4.c
+++ b/src/vnet/tcp/tcp_syn_filter4.c
@@ -399,7 +399,6 @@ VLIB_NODE_FN (syn_filter4_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (syn_filter4_node) =
{
.name = "syn-filter-4",
@@ -418,16 +417,13 @@ VLIB_REGISTER_NODE (syn_filter4_node) =
[SYN_FILTER_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (syn_filter_4, static) =
{
.arc_name = "ip4-local",
.node_name = "syn-filter-4",
.runs_before = VNET_FEATURES("ip4-local-end-of-arc"),
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
int
@@ -525,14 +521,12 @@ syn_filter_enable_disable_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (sr_content_command, static) =
{
.path = "ip syn filter",
.short_help = "ip syn filter <interface-name> [disable]",
.function = syn_filter_enable_disable_command_fn,
};
-/* *INDENT-ON* */
#endif /* CLIB_MARCH_VARIANT */
/*
diff --git a/src/vnet/tcp/tcp_timer.h b/src/vnet/tcp/tcp_timer.h
index 4668c79cabf..c0907cae1cc 100644
--- a/src/vnet/tcp/tcp_timer.h
+++ b/src/vnet/tcp/tcp_timer.h
@@ -17,11 +17,18 @@
#include <vnet/tcp/tcp_types.h>
+static inline u8
+tcp_timer_thread_is_valid (tcp_connection_t *tc)
+{
+ return ((tc->c_thread_index == vlib_get_thread_index ()) ||
+ vlib_thread_is_main_w_barrier ());
+}
+
always_inline void
-tcp_timer_set (tcp_timer_wheel_t * tw, tcp_connection_t * tc, u8 timer_id,
+tcp_timer_set (tcp_timer_wheel_t *tw, tcp_connection_t *tc, u8 timer_id,
u32 interval)
{
- ASSERT (tc->c_thread_index == vlib_get_thread_index ());
+ ASSERT (tcp_timer_thread_is_valid (tc));
ASSERT (tc->timers[timer_id] == TCP_TIMER_HANDLE_INVALID);
tc->timers[timer_id] = tw_timer_start_tcp_twsl (tw, tc->c_c_index,
timer_id, interval);
@@ -30,7 +37,7 @@ tcp_timer_set (tcp_timer_wheel_t * tw, tcp_connection_t * tc, u8 timer_id,
always_inline void
tcp_timer_reset (tcp_timer_wheel_t * tw, tcp_connection_t * tc, u8 timer_id)
{
- ASSERT (tc->c_thread_index == vlib_get_thread_index ());
+ ASSERT (tcp_timer_thread_is_valid (tc));
tc->pending_timers &= ~(1 << timer_id);
if (tc->timers[timer_id] == TCP_TIMER_HANDLE_INVALID)
return;
@@ -43,7 +50,7 @@ always_inline void
tcp_timer_update (tcp_timer_wheel_t * tw, tcp_connection_t * tc, u8 timer_id,
u32 interval)
{
- ASSERT (tc->c_thread_index == vlib_get_thread_index ());
+ ASSERT (tcp_timer_thread_is_valid (tc));
if (tc->timers[timer_id] != TCP_TIMER_HANDLE_INVALID)
tw_timer_update_tcp_twsl (tw, tc->timers[timer_id], interval);
else
@@ -51,12 +58,19 @@ tcp_timer_update (tcp_timer_wheel_t * tw, tcp_connection_t * tc, u8 timer_id,
timer_id, interval);
}
+always_inline u8
+tcp_timer_is_active (tcp_connection_t *tc, tcp_timers_e timer)
+{
+ return tc->timers[timer] != TCP_TIMER_HANDLE_INVALID ||
+ (tc->pending_timers & (1 << timer));
+}
+
always_inline void
tcp_retransmit_timer_set (tcp_timer_wheel_t * tw, tcp_connection_t * tc)
{
ASSERT (tc->snd_una != tc->snd_nxt);
tcp_timer_set (tw, tc, TCP_TIMER_RETRANSMIT,
- clib_max (tc->rto * TCP_TO_TIMER_TICK, 1));
+ clib_max ((u32) tc->rto * TCP_TO_TIMER_TICK, 1));
}
always_inline void
@@ -70,20 +84,7 @@ tcp_persist_timer_set (tcp_timer_wheel_t * tw, tcp_connection_t * tc)
{
/* Reuse RTO. It's backed off in handler */
tcp_timer_set (tw, tc, TCP_TIMER_PERSIST,
- clib_max (tc->rto * TCP_TO_TIMER_TICK, 1));
-}
-
-always_inline void
-tcp_persist_timer_update (tcp_timer_wheel_t * tw, tcp_connection_t * tc)
-{
- u32 interval;
-
- if (seq_leq (tc->snd_una, tc->snd_congestion + tc->burst_acked))
- interval = 1;
- else
- interval = clib_max (tc->rto * TCP_TO_TIMER_TICK, 1);
-
- tcp_timer_update (tw, tc, TCP_TIMER_PERSIST, interval);
+ clib_max ((u32) tc->rto * TCP_TO_TIMER_TICK, 1));
}
always_inline void
@@ -98,19 +99,13 @@ tcp_retransmit_timer_update (tcp_timer_wheel_t * tw, tcp_connection_t * tc)
if (tc->snd_una == tc->snd_nxt)
{
tcp_retransmit_timer_reset (tw, tc);
- if (tc->snd_wnd < tc->snd_mss)
- tcp_persist_timer_update (tw, tc);
+ if (tc->snd_wnd < tc->snd_mss &&
+ !tcp_timer_is_active (tc, TCP_TIMER_PERSIST))
+ tcp_persist_timer_set (tw, tc);
}
else
tcp_timer_update (tw, tc, TCP_TIMER_RETRANSMIT,
- clib_max (tc->rto * TCP_TO_TIMER_TICK, 1));
-}
-
-always_inline u8
-tcp_timer_is_active (tcp_connection_t * tc, tcp_timers_e timer)
-{
- return tc->timers[timer] != TCP_TIMER_HANDLE_INVALID
- || (tc->pending_timers & (1 << timer));
+ clib_max ((u32) tc->rto * TCP_TO_TIMER_TICK, 1));
}
always_inline void
diff --git a/src/vnet/tcp/tcp_types.h b/src/vnet/tcp/tcp_types.h
index aacfd8f2fd4..f9a9ff9a4da 100644
--- a/src/vnet/tcp/tcp_types.h
+++ b/src/vnet/tcp/tcp_types.h
@@ -389,7 +389,6 @@ typedef struct _tcp_connection
#define rst_state snd_wl1
} tcp_connection_t;
-/* *INDENT-OFF* */
struct _tcp_cc_algorithm
{
const char *name;
@@ -406,7 +405,6 @@ struct _tcp_cc_algorithm
void (*event) (tcp_connection_t *tc, tcp_cc_event_t evt);
u64 (*get_pacing_rate) (tcp_connection_t *tc);
};
-/* *INDENT-ON* */
#define tcp_fastrecovery_on(tc) (tc)->flags |= TCP_CONN_FAST_RECOVERY
#define tcp_fastrecovery_off(tc) (tc)->flags &= ~TCP_CONN_FAST_RECOVERY
diff --git a/src/vnet/teib/teib.c b/src/vnet/teib/teib.c
index dc0c99b1dbe..a9234bbeb5e 100644
--- a/src/vnet/teib/teib.c
+++ b/src/vnet/teib/teib.c
@@ -34,7 +34,7 @@ struct teib_entry_t_
{
teib_key_t *te_key;
fib_prefix_t te_nh;
- u32 te_fib_index;
+ u32 te_nh_fib_index;
};
typedef struct teib_db_t_
@@ -83,7 +83,7 @@ teib_entry_get_af (const teib_entry_t * te)
u32
teib_entry_get_fib_index (const teib_entry_t * te)
{
- return (te->te_fib_index);
+ return (te->te_nh_fib_index);
}
const ip_address_t *
@@ -101,7 +101,7 @@ teib_entry_get_nh (const teib_entry_t * te)
void
teib_entry_adj_stack (const teib_entry_t * te, adj_index_t ai)
{
- adj_midchain_delegate_stack (ai, te->te_fib_index, &te->te_nh);
+ adj_midchain_delegate_stack (ai, te->te_nh_fib_index, &te->te_nh);
}
teib_entry_t *
@@ -139,7 +139,7 @@ teib_entry_find_46 (u32 sw_if_index,
}
static void
-teib_adj_fib_add (const ip_address_t * ip, u32 sw_if_index, u32 fib_index)
+teib_adj_fib_add (const ip_address_t *ip, u32 sw_if_index, u32 peer_fib_index)
{
if (AF_IP6 == ip_addr_version (ip) &&
ip6_address_is_link_local_unicast (&ip_addr_v6 (ip)))
@@ -155,21 +155,18 @@ teib_adj_fib_add (const ip_address_t * ip, u32 sw_if_index, u32 fib_index)
fib_prefix_t pfx;
ip_address_to_fib_prefix (ip, &pfx);
- fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ,
- FIB_ENTRY_FLAG_ATTACHED,
- fib_proto_to_dpo (pfx.fp_proto),
- &pfx.fp_addr,
- sw_if_index,
- ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
-
+ fib_table_entry_path_add (
+ peer_fib_index, &pfx, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED,
+ fib_proto_to_dpo (pfx.fp_proto), &pfx.fp_addr, sw_if_index, ~0, 1,
+ NULL, FIB_ROUTE_PATH_FLAG_NONE);
if (0 == teib_db.td_n_entries[ip_addr_version (ip)]++)
- fib_table_lock (fib_index, pfx.fp_proto, FIB_SOURCE_ADJ);
+ fib_table_lock (peer_fib_index, pfx.fp_proto, FIB_SOURCE_ADJ);
}
}
static void
-teib_adj_fib_remove (ip_address_t * ip, u32 sw_if_index, u32 fib_index)
+teib_adj_fib_remove (ip_address_t *ip, u32 sw_if_index, u32 peer_fib_index)
{
if (AF_IP6 == ip_addr_version (ip) &&
ip6_address_is_link_local_unicast (&ip_addr_v6 (ip)))
@@ -185,14 +182,12 @@ teib_adj_fib_remove (ip_address_t * ip, u32 sw_if_index, u32 fib_index)
fib_prefix_t pfx;
ip_address_to_fib_prefix (ip, &pfx);
- fib_table_entry_path_remove (fib_index, &pfx, FIB_SOURCE_ADJ,
- fib_proto_to_dpo (pfx.fp_proto),
- &pfx.fp_addr,
- sw_if_index,
- ~0, 1, FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_remove (
+ peer_fib_index, &pfx, FIB_SOURCE_ADJ, fib_proto_to_dpo (pfx.fp_proto),
+ &pfx.fp_addr, sw_if_index, ~0, 1, FIB_ROUTE_PATH_FLAG_NONE);
if (0 == --teib_db.td_n_entries[ip_addr_version (ip)])
- fib_table_unlock (fib_index, pfx.fp_proto, FIB_SOURCE_ADJ);
+ fib_table_unlock (peer_fib_index, pfx.fp_proto, FIB_SOURCE_ADJ);
}
}
@@ -203,15 +198,17 @@ teib_entry_add (u32 sw_if_index,
{
fib_protocol_t nh_proto;
teib_entry_t *te;
- u32 fib_index;
+ u32 nh_fib_index, peer_fib_index;
index_t tei;
nh_proto = (AF_IP4 == ip_addr_version (nh) ?
FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6);
- fib_index = fib_table_find (nh_proto, nh_table_id);
+ peer_fib_index = fib_table_get_index_for_sw_if_index (
+ ip_address_family_to_fib_proto (peer->version), sw_if_index);
+ nh_fib_index = fib_table_find (nh_proto, nh_table_id);
- if (~0 == fib_index)
+ if (~0 == nh_fib_index)
{
return (VNET_API_ERROR_NO_SUCH_FIB);
}
@@ -225,9 +222,6 @@ teib_entry_add (u32 sw_if_index,
.tk_sw_if_index = sw_if_index,
};
teib_entry_t *te;
- u32 fib_index;
-
- fib_index = fib_table_get_index_for_sw_if_index (nh_proto, sw_if_index);
pool_get_zero (teib_pool, te);
@@ -236,12 +230,12 @@ teib_entry_add (u32 sw_if_index,
clib_memcpy (te->te_key, &nk, sizeof (*te->te_key));
ip_address_to_fib_prefix (nh, &te->te_nh);
- te->te_fib_index = fib_index;
+ te->te_nh_fib_index = nh_fib_index;
hash_set_mem (teib_db.td_db, te->te_key, tei);
/* we how have a /32 in the overlay, add an adj-fib */
- teib_adj_fib_add (&te->te_key->tk_peer, sw_if_index, fib_index);
+ teib_adj_fib_add (&te->te_key->tk_peer, sw_if_index, peer_fib_index);
TEIB_NOTIFY (te, nv_added);
TEIB_TE_INFO (te, "created");
@@ -265,13 +259,12 @@ teib_entry_del (u32 sw_if_index, const ip_address_t * peer)
{
TEIB_TE_INFO (te, "removed");
- u32 fib_index;
+ u32 peer_fib_index;
- fib_index = fib_table_get_index_for_sw_if_index
- (ip_address_family_to_fib_proto (ip_addr_version (peer)),
- sw_if_index);
+ peer_fib_index = fib_table_get_index_for_sw_if_index (
+ ip_address_family_to_fib_proto (peer->version), sw_if_index);
- teib_adj_fib_remove (&te->te_key->tk_peer, sw_if_index, fib_index);
+ teib_adj_fib_remove (&te->te_key->tk_peer, sw_if_index, peer_fib_index);
hash_unset_mem (teib_db.td_db, te->te_key);
@@ -282,8 +275,7 @@ teib_entry_del (u32 sw_if_index, const ip_address_t * peer)
}
else
{
- TEIB_INFO ("no such entry: %U, %U, %U",
- format_vnet_sw_if_index_name,
+ TEIB_INFO ("no such entry: %U, %U", format_vnet_sw_if_index_name,
vnet_get_main (), sw_if_index, format_ip_address, peer);
return (VNET_API_ERROR_NO_SUCH_ENTRY);
}
@@ -305,7 +297,7 @@ format_teib_entry (u8 * s, va_list * args)
s = format (s, "%U", format_ip_address,
&te->te_key->tk_peer, IP46_TYPE_ANY);
s = format (s, " via [%d]:%U",
- fib_table_get_table_id (te->te_fib_index, te->te_nh.fp_proto),
+ fib_table_get_table_id (te->te_nh_fib_index, te->te_nh.fp_proto),
format_fib_prefix, &te->te_nh);
return (s);
@@ -316,12 +308,10 @@ teib_walk (teib_walk_cb_t fn, void *ctx)
{
index_t tei;
- /* *INDENT-OFF* */
pool_foreach_index (tei, teib_pool)
{
fn(tei, ctx);
}
- /* *INDENT-ON* */
}
void
@@ -329,13 +319,11 @@ teib_walk_itf (u32 sw_if_index, teib_walk_cb_t fn, void *ctx)
{
index_t tei;
- /* *INDENT-OFF* */
pool_foreach_index (tei, teib_pool)
{
if (sw_if_index == teib_entry_get_sw_if_index(teib_entry_get(tei)))
fn(tei, ctx);
}
- /* *INDENT-ON* */
}
static void
@@ -344,20 +332,18 @@ teib_walk_itf_proto (u32 sw_if_index,
{
index_t tei;
- /* *INDENT-OFF* */
pool_foreach_index (tei, teib_pool)
{
if (sw_if_index == teib_entry_get_sw_if_index(teib_entry_get(tei)) &&
af == teib_entry_get_af(teib_entry_get(tei)))
fn(tei, ctx);
}
- /* *INDENT-ON* */
}
typedef struct teib_table_bind_ctx_t_
{
- u32 new_fib_index;
- u32 old_fib_index;
+ u32 new_peer_fib_index;
+ u32 old_peer_fib_index;
} teib_table_bind_ctx_t;
static walk_rc_t
@@ -368,12 +354,13 @@ teib_walk_table_bind (index_t tei, void *arg)
te = teib_entry_get (tei);
- TEIB_TE_INFO (te, "bind: %d -> %d", ctx->old_fib_index, ctx->new_fib_index);
+ TEIB_TE_INFO (te, "bind: %d -> %d", ctx->old_peer_fib_index,
+ ctx->new_peer_fib_index);
- teib_adj_fib_remove (&te->te_key->tk_peer,
- te->te_key->tk_sw_if_index, ctx->old_fib_index);
- teib_adj_fib_add (&te->te_key->tk_peer,
- te->te_key->tk_sw_if_index, ctx->new_fib_index);
+ teib_adj_fib_remove (&te->te_key->tk_peer, te->te_key->tk_sw_if_index,
+ ctx->old_peer_fib_index);
+ teib_adj_fib_add (&te->te_key->tk_peer, te->te_key->tk_sw_if_index,
+ ctx->new_peer_fib_index);
return (WALK_CONTINUE);
}
@@ -384,8 +371,8 @@ teib_table_bind_v4 (ip4_main_t * im,
u32 sw_if_index, u32 new_fib_index, u32 old_fib_index)
{
teib_table_bind_ctx_t ctx = {
- .old_fib_index = old_fib_index,
- .new_fib_index = new_fib_index,
+ .old_peer_fib_index = old_fib_index,
+ .new_peer_fib_index = new_fib_index,
};
teib_walk_itf_proto (sw_if_index, AF_IP4, teib_walk_table_bind, &ctx);
@@ -397,8 +384,8 @@ teib_table_bind_v6 (ip6_main_t * im,
u32 sw_if_index, u32 new_fib_index, u32 old_fib_index)
{
teib_table_bind_ctx_t ctx = {
- .old_fib_index = old_fib_index,
- .new_fib_index = new_fib_index,
+ .old_peer_fib_index = old_fib_index,
+ .new_peer_fib_index = new_fib_index,
};
teib_walk_itf_proto (sw_if_index, AF_IP6, teib_walk_table_bind, &ctx);
diff --git a/src/vnet/teib/teib_cli.c b/src/vnet/teib/teib_cli.c
index a23902e0f60..03cec15c7a1 100644
--- a/src/vnet/teib/teib_cli.c
+++ b/src/vnet/teib/teib_cli.c
@@ -85,13 +85,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (teib_create_command, static) = {
.path = "create teib",
.short_help = "create teib <interface> peer <addr> nh <addr> [nh-table-id <ID>]",
.function = teib_add,
};
-/* *INDENT-ON* */
static clib_error_t *
teib_del (vlib_main_t * vm,
@@ -150,13 +148,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (teib_delete_command, static) = {
.path = "delete teib",
.short_help = "delete teib <interface> peer <addr>",
.function = teib_del,
};
-/* *INDENT-ON* */
static walk_rc_t
teib_show_one (index_t nei, void *ctx)
@@ -175,13 +171,11 @@ teib_show (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (teib_show_command, static) = {
.path = "show teib",
.short_help = "show teib",
.function = teib_show,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/tls/tls.c b/src/vnet/tls/tls.c
index 0787c22f763..5f00e6e302d 100644
--- a/src/vnet/tls/tls.c
+++ b/src/vnet/tls/tls.c
@@ -61,8 +61,7 @@ tls_add_vpp_q_rx_evt (session_t * s)
int
tls_add_vpp_q_builtin_rx_evt (session_t * s)
{
- if (svm_fifo_set_event (s->rx_fifo))
- session_send_io_evt_to_thread (s->rx_fifo, SESSION_IO_EVT_BUILTIN_RX);
+ session_enqueue_notify (s);
return 0;
}
@@ -75,9 +74,10 @@ tls_add_vpp_q_tx_evt (session_t * s)
}
static inline int
-tls_add_app_q_evt (app_worker_t * app, session_t * app_session)
+tls_add_app_q_evt (app_worker_t *app_wrk, session_t *app_session)
{
- return app_worker_lock_and_send_event (app, app_session, SESSION_IO_EVT_RX);
+ app_worker_add_event (app_wrk, app_session, SESSION_IO_EVT_RX);
+ return 0;
}
u32
@@ -115,58 +115,74 @@ u32
tls_ctx_half_open_alloc (void)
{
tls_main_t *tm = &tls_main;
- u8 will_expand = 0;
tls_ctx_t *ctx;
- u32 ctx_index;
- pool_get_aligned_will_expand (tm->half_open_ctx_pool, will_expand, 0);
- if (PREDICT_FALSE (will_expand && vlib_num_workers ()))
- {
- clib_rwlock_writer_lock (&tm->half_open_rwlock);
- pool_get_zero (tm->half_open_ctx_pool, ctx);
- ctx->c_c_index = ctx - tm->half_open_ctx_pool;
- ctx_index = ctx->c_c_index;
- clib_rwlock_writer_unlock (&tm->half_open_rwlock);
- }
- else
- {
- /* reader lock assumption: only main thread will call pool_get */
- clib_rwlock_reader_lock (&tm->half_open_rwlock);
- pool_get_zero (tm->half_open_ctx_pool, ctx);
- ctx->c_c_index = ctx - tm->half_open_ctx_pool;
- ctx_index = ctx->c_c_index;
- clib_rwlock_reader_unlock (&tm->half_open_rwlock);
- }
- return ctx_index;
+ if (vec_len (tm->postponed_ho_free))
+ tls_flush_postponed_ho_cleanups ();
+
+ pool_get_aligned_safe (tm->half_open_ctx_pool, ctx, CLIB_CACHE_LINE_BYTES);
+
+ clib_memset (ctx, 0, sizeof (*ctx));
+ ctx->c_c_index = ctx - tm->half_open_ctx_pool;
+ ctx->c_thread_index = transport_cl_thread ();
+
+ return ctx->c_c_index;
}
void
tls_ctx_half_open_free (u32 ho_index)
{
- tls_main_t *tm = &tls_main;
- clib_rwlock_writer_lock (&tm->half_open_rwlock);
pool_put_index (tls_main.half_open_ctx_pool, ho_index);
- clib_rwlock_writer_unlock (&tm->half_open_rwlock);
}
tls_ctx_t *
tls_ctx_half_open_get (u32 ctx_index)
{
tls_main_t *tm = &tls_main;
- clib_rwlock_reader_lock (&tm->half_open_rwlock);
return pool_elt_at_index (tm->half_open_ctx_pool, ctx_index);
}
void
-tls_ctx_half_open_reader_unlock ()
+tls_add_postponed_ho_cleanups (u32 ho_index)
{
- clib_rwlock_reader_unlock (&tls_main.half_open_rwlock);
+ tls_main_t *tm = &tls_main;
+ vec_add1 (tm->postponed_ho_free, ho_index);
}
-u32
-tls_ctx_half_open_index (tls_ctx_t * ctx)
+static void
+tls_ctx_ho_try_free (u32 ho_index)
+{
+ tls_ctx_t *ctx;
+
+ ctx = tls_ctx_half_open_get (ho_index);
+ /* Probably tcp connected just before tcp establish timeout and
+ * worker that owns established session has not yet received
+ * @ref tls_session_connected_cb */
+ if (!(ctx->flags & TLS_CONN_F_HO_DONE))
+ {
+ ctx->tls_session_handle = SESSION_INVALID_HANDLE;
+ tls_add_postponed_ho_cleanups (ho_index);
+ return;
+ }
+ if (!(ctx->flags & TLS_CONN_F_NO_APP_SESSION))
+ session_half_open_delete_notify (&ctx->connection);
+ tls_ctx_half_open_free (ho_index);
+}
+
+void
+tls_flush_postponed_ho_cleanups ()
{
- return (ctx - tls_main.half_open_ctx_pool);
+ tls_main_t *tm = &tls_main;
+ u32 *ho_indexp, *tmp;
+
+ tmp = tm->postponed_ho_free;
+ tm->postponed_ho_free = tm->ho_free_list;
+ tm->ho_free_list = tmp;
+
+ vec_foreach (ho_indexp, tm->ho_free_list)
+ tls_ctx_ho_try_free (*ho_indexp);
+
+ vec_reset_length (tm->ho_free_list);
}
void
@@ -189,17 +205,19 @@ tls_notify_app_accept (tls_ctx_t * ctx)
lctx = tls_listener_ctx_get (ctx->listener_ctx_index);
app_listener = listen_session_get_from_handle (lctx->app_session_handle);
- app_session = session_get (ctx->c_s_index, ctx->c_thread_index);
- app_session->app_wrk_index = ctx->parent_app_wrk_index;
- app_session->connection_index = ctx->tls_ctx_handle;
+ app_session = session_alloc (ctx->c_thread_index);
+ app_session->session_state = SESSION_STATE_ACCEPTING;
app_session->session_type = app_listener->session_type;
app_session->listener_handle = listen_session_get_handle (app_listener);
- app_session->session_state = SESSION_STATE_ACCEPTING;
+ app_session->app_wrk_index = ctx->parent_app_wrk_index;
+ app_session->connection_index = ctx->tls_ctx_handle;
+ ctx->c_s_index = app_session->session_index;
if ((rv = app_worker_init_accepted (app_session)))
{
TLS_DBG (1, "failed to allocate fifos");
session_free (app_session);
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
return rv;
}
ctx->app_session_handle = session_handle (app_session);
@@ -211,67 +229,67 @@ tls_notify_app_accept (tls_ctx_t * ctx)
int
tls_notify_app_connected (tls_ctx_t * ctx, session_error_t err)
{
+ u32 parent_app_api_ctx;
session_t *app_session;
app_worker_t *app_wrk;
app_wrk = app_worker_get_if_valid (ctx->parent_app_wrk_index);
if (!app_wrk)
{
- tls_disconnect_transport (ctx);
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
return -1;
}
if (err)
{
- /* Free app session pre-allocated when transport was established */
- if (ctx->tls_type == TRANSPORT_PROTO_TLS)
- session_free (session_get (ctx->c_s_index, ctx->c_thread_index));
- ctx->no_app_session = 1;
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
goto send_reply;
}
- /* For DTLS the app session is not preallocated because the underlying udp
- * session might migrate to a different worker during the handshake */
+ app_session = session_alloc (ctx->c_thread_index);
+ app_session->session_state = SESSION_STATE_CREATED;
+ app_session->connection_index = ctx->tls_ctx_handle;
+
if (ctx->tls_type == TRANSPORT_PROTO_DTLS)
{
- session_type_t st;
/* Cleanup half-open session as we don't get notification from udp */
session_half_open_delete_notify (&ctx->connection);
- app_session = session_alloc (ctx->c_thread_index);
- app_session->session_state = SESSION_STATE_CREATED;
- ctx->c_s_index = app_session->session_index;
- st =
+ app_session->session_type =
session_type_from_proto_and_ip (TRANSPORT_PROTO_DTLS, ctx->tcp_is_ip4);
- app_session->session_type = st;
- app_session->connection_index = ctx->tls_ctx_handle;
}
else
{
- app_session = session_get (ctx->c_s_index, ctx->c_thread_index);
+ app_session->session_type =
+ session_type_from_proto_and_ip (TRANSPORT_PROTO_TLS, ctx->tcp_is_ip4);
}
app_session->app_wrk_index = ctx->parent_app_wrk_index;
+ app_session->opaque = ctx->parent_app_api_context;
+ ctx->c_s_index = app_session->session_index;
if ((err = app_worker_init_connected (app_wrk, app_session)))
- goto failed;
+ {
+ app_worker_connect_notify (app_wrk, 0, err, ctx->parent_app_api_context);
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
+ session_free (app_session);
+ return -1;
+ }
app_session->session_state = SESSION_STATE_READY;
- if (app_worker_connect_notify (app_wrk, app_session,
- SESSION_E_NONE, ctx->parent_app_api_context))
+ parent_app_api_ctx = ctx->parent_app_api_context;
+ ctx->app_session_handle = session_handle (app_session);
+
+ if (app_worker_connect_notify (app_wrk, app_session, SESSION_E_NONE,
+ parent_app_api_ctx))
{
TLS_DBG (1, "failed to notify app");
- app_session->session_state = SESSION_STATE_CONNECTING;
- tls_disconnect (ctx->tls_ctx_handle, vlib_get_thread_index ());
+ session_free (session_get (ctx->c_s_index, ctx->c_thread_index));
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
return -1;
}
- ctx->app_session_handle = session_handle (app_session);
-
return 0;
-failed:
- ctx->no_app_session = 1;
- tls_disconnect (ctx->tls_ctx_handle, vlib_get_thread_index ());
send_reply:
return app_worker_connect_notify (app_wrk, 0, err,
ctx->parent_app_api_context);
@@ -365,7 +383,7 @@ tls_ctx_write (tls_ctx_t * ctx, session_t * app_session,
sp->max_burst_size = sp->max_burst_size * TRANSPORT_PACER_MIN_MSS;
n_wrote = tls_vfts[ctx->tls_ctx_engine].ctx_write (ctx, app_session, sp);
- sp->max_burst_size = n_wrote;
+ sp->bytes_dequeued = n_wrote;
return n_wrote > 0 ? clib_max (n_wrote / TRANSPORT_PACER_MIN_MSS, 1) : 0;
}
@@ -382,6 +400,12 @@ tls_ctx_transport_close (tls_ctx_t * ctx)
}
static inline int
+tls_ctx_transport_reset (tls_ctx_t *ctx)
+{
+ return tls_vfts[ctx->tls_ctx_engine].ctx_transport_reset (ctx);
+}
+
+static inline int
tls_ctx_app_close (tls_ctx_t * ctx)
{
return tls_vfts[ctx->tls_ctx_engine].ctx_app_close (ctx);
@@ -399,44 +423,37 @@ tls_ctx_handshake_is_over (tls_ctx_t * ctx)
return tls_vfts[ctx->tls_ctx_engine].ctx_handshake_is_over (ctx);
}
+int
+tls_reinit_ca_chain (crypto_engine_type_t tls_engine_id)
+{
+ return tls_vfts[tls_engine_id].ctx_reinit_cachain ();
+}
+
void
-tls_session_reset_callback (session_t * s)
+tls_notify_app_io_error (tls_ctx_t *ctx)
+{
+ ASSERT (tls_ctx_handshake_is_over (ctx));
+
+ session_transport_reset_notify (&ctx->connection);
+ session_transport_closed_notify (&ctx->connection);
+ tls_disconnect_transport (ctx);
+}
+
+void
+tls_session_reset_callback (session_t *ts)
{
tls_ctx_t *ctx;
- transport_connection_t *tc;
- session_t *app_session;
- ctx = tls_ctx_get (s->opaque);
- ctx->is_passive_close = 1;
- tc = &ctx->connection;
- if (tls_ctx_handshake_is_over (ctx))
- {
- session_transport_reset_notify (tc);
- session_transport_closed_notify (tc);
- tls_disconnect_transport (ctx);
- }
- else
- if ((app_session =
- session_get_if_valid (ctx->c_s_index, ctx->c_thread_index)))
- {
- session_free (app_session);
- ctx->c_s_index = SESSION_INVALID_INDEX;
- tls_disconnect_transport (ctx);
- }
+ ctx = tls_ctx_get_w_thread (ts->opaque, ts->thread_index);
+ ctx->flags |= TLS_CONN_F_PASSIVE_CLOSE;
+ tls_ctx_transport_reset (ctx);
}
static void
tls_session_cleanup_ho (session_t *s)
{
- tls_ctx_t *ctx;
- u32 ho_index;
-
/* session opaque stores the opaque passed on connect */
- ho_index = s->opaque;
- ctx = tls_ctx_half_open_get (ho_index);
- session_half_open_delete_notify (&ctx->connection);
- tls_ctx_half_open_reader_unlock ();
- tls_ctx_half_open_free (ho_index);
+ tls_ctx_ho_try_free (s->opaque);
}
int
@@ -464,56 +481,69 @@ tls_session_disconnect_callback (session_t * tls_session)
|| vlib_thread_is_main_w_barrier ());
ctx = tls_ctx_get_w_thread (tls_session->opaque, tls_session->thread_index);
- ctx->is_passive_close = 1;
+ ctx->flags |= TLS_CONN_F_PASSIVE_CLOSE;
tls_ctx_transport_close (ctx);
}
int
-tls_session_accept_callback (session_t * tls_session)
+tls_session_accept_callback (session_t *ts)
{
- session_t *tls_listener, *app_session;
+ session_t *tls_listener;
tls_ctx_t *lctx, *ctx;
u32 ctx_handle;
- tls_listener =
- listen_session_get_from_handle (tls_session->listener_handle);
+ tls_listener = listen_session_get_from_handle (ts->listener_handle);
lctx = tls_listener_ctx_get (tls_listener->opaque);
ctx_handle = tls_ctx_alloc (lctx->tls_ctx_engine);
ctx = tls_ctx_get (ctx_handle);
- memcpy (ctx, lctx, sizeof (*lctx));
- ctx->c_thread_index = vlib_get_thread_index ();
+ clib_memcpy (ctx, lctx, sizeof (*lctx));
+ ctx->c_s_index = SESSION_INVALID_INDEX;
+ ctx->c_thread_index = ts->thread_index;
ctx->tls_ctx_handle = ctx_handle;
- tls_session->session_state = SESSION_STATE_READY;
- tls_session->opaque = ctx_handle;
- ctx->tls_session_handle = session_handle (tls_session);
+ ts->opaque = ctx_handle;
+ ctx->tls_session_handle = session_handle (ts);
ctx->listener_ctx_index = tls_listener->opaque;
ctx->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP;
ctx->ckpair_index = lctx->ckpair_index;
- /* Preallocate app session. Avoids allocating a session post handshake
- * on tls_session rx and potentially invalidating the session pool */
- app_session = session_alloc (ctx->c_thread_index);
- app_session->session_state = SESSION_STATE_CREATED;
- ctx->c_s_index = app_session->session_index;
-
TLS_DBG (1, "Accept on listener %u new connection [%u]%x",
tls_listener->opaque, vlib_get_thread_index (), ctx_handle);
- return tls_ctx_init_server (ctx);
+ if (tls_ctx_init_server (ctx))
+ {
+ /* Do not free ctx yet, in case we have pending rx events */
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
+ tls_disconnect_transport (ctx);
+ }
+
+ if (ts->session_state < SESSION_STATE_READY)
+ ts->session_state = SESSION_STATE_READY;
+
+ return 0;
}
int
-tls_app_rx_callback (session_t * tls_session)
+tls_app_rx_callback (session_t *ts)
{
tls_ctx_t *ctx;
/* DTLS session migrating, wait for next notification */
- if (PREDICT_FALSE (tls_session->flags & SESSION_F_IS_MIGRATING))
+ if (PREDICT_FALSE (ts->flags & SESSION_F_IS_MIGRATING))
return 0;
- ctx = tls_ctx_get (tls_session->opaque);
- tls_ctx_read (ctx, tls_session);
+ /* Read rescheduled but underlying transport deleted now */
+ if (PREDICT_FALSE ((ts->session_state == SESSION_STATE_TRANSPORT_DELETED)))
+ return 0;
+
+ ctx = tls_ctx_get (ts->opaque);
+ if (PREDICT_FALSE ((ctx->flags & TLS_CONN_F_NO_APP_SESSION) ||
+ (ctx->flags & TLS_CONN_F_APP_CLOSED)))
+ {
+ TLS_DBG (1, "Local App closed");
+ return 0;
+ }
+ tls_ctx_read (ctx, ts);
return 0;
}
@@ -532,9 +562,7 @@ int
tls_session_connected_cb (u32 tls_app_index, u32 ho_ctx_index,
session_t *tls_session, session_error_t err)
{
- session_t *app_session;
tls_ctx_t *ho_ctx, *ctx;
- session_type_t st;
u32 ctx_handle;
ho_ctx = tls_ctx_half_open_get (ho_ctx_index);
@@ -542,8 +570,9 @@ tls_session_connected_cb (u32 tls_app_index, u32 ho_ctx_index,
ctx_handle = tls_ctx_alloc (ho_ctx->tls_ctx_engine);
ctx = tls_ctx_get (ctx_handle);
clib_memcpy_fast (ctx, ho_ctx, sizeof (*ctx));
+
/* Half-open freed on tcp half-open cleanup notification */
- tls_ctx_half_open_reader_unlock ();
+ __atomic_fetch_or (&ho_ctx->flags, TLS_CONN_F_HO_DONE, __ATOMIC_RELEASE);
ctx->c_thread_index = vlib_get_thread_index ();
ctx->tls_ctx_handle = ctx_handle;
@@ -555,18 +584,17 @@ tls_session_connected_cb (u32 tls_app_index, u32 ho_ctx_index,
ctx->tls_session_handle = session_handle (tls_session);
tls_session->opaque = ctx_handle;
- tls_session->session_state = SESSION_STATE_READY;
- /* Preallocate app session. Avoids allocating a session post handshake
- * on tls_session rx and potentially invalidating the session pool */
- app_session = session_alloc (ctx->c_thread_index);
- app_session->session_state = SESSION_STATE_CREATED;
- ctx->c_s_index = app_session->session_index;
- st = session_type_from_proto_and_ip (TRANSPORT_PROTO_TLS, ctx->tcp_is_ip4);
- app_session->session_type = st;
- app_session->connection_index = ctx->tls_ctx_handle;
+ if (tls_ctx_init_client (ctx))
+ {
+ tls_notify_app_connected (ctx, SESSION_E_TLS_HANDSHAKE);
+ tls_disconnect_transport (ctx);
+ }
- return tls_ctx_init_client (ctx);
+ if (tls_session->session_state < SESSION_STATE_READY)
+ tls_session->session_state = SESSION_STATE_READY;
+
+ return 0;
}
int
@@ -598,13 +626,13 @@ tls_session_connected_callback (u32 tls_app_index, u32 ho_ctx_index,
u32 api_context;
ho_ctx = tls_ctx_half_open_get (ho_ctx_index);
+ ho_ctx->flags |= TLS_CONN_F_HO_DONE;
app_wrk = app_worker_get_if_valid (ho_ctx->parent_app_wrk_index);
if (app_wrk)
{
api_context = ho_ctx->parent_app_api_context;
app_worker_connect_notify (app_wrk, 0, err, api_context);
}
- tls_ctx_half_open_reader_unlock ();
return 0;
}
@@ -631,7 +659,7 @@ tls_app_session_cleanup (session_t * s, session_cleanup_ntf_t ntf)
}
ctx = tls_ctx_get (s->opaque);
- if (!ctx->no_app_session)
+ if (!(ctx->flags & TLS_CONN_F_NO_APP_SESSION))
session_transport_delete_notify (&ctx->connection);
tls_ctx_free (ctx);
}
@@ -657,7 +685,7 @@ dtls_migrate_ctx (void *arg)
/* Probably the app detached while the session was migrating. Cleanup */
if (session_half_open_migrated_notify (&ctx->connection))
{
- ctx->no_app_session = 1;
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
tls_disconnect (ctx->tls_ctx_handle, vlib_get_thread_index ());
return;
}
@@ -676,7 +704,7 @@ dtls_session_migrate_callback (session_t *us, session_handle_t new_sh)
ctx = tls_ctx_get_w_thread (us->opaque, us->thread_index);
ctx->tls_session_handle = new_sh;
cloned_ctx = tls_ctx_detach (ctx);
- ctx->is_migrated = 1;
+ ctx->flags |= TLS_CONN_F_MIGRATED;
session_half_open_migrate_notify (&ctx->connection);
session_send_rpc_evt_to_thread (new_thread, dtls_migrate_ctx,
@@ -685,11 +713,22 @@ dtls_session_migrate_callback (session_t *us, session_handle_t new_sh)
tls_ctx_free (ctx);
}
+static void
+tls_session_transport_closed_callback (session_t *ts)
+{
+ tls_ctx_t *ctx;
+
+ ctx = tls_ctx_get_w_thread (ts->opaque, ts->thread_index);
+ if (!(ctx->flags & TLS_CONN_F_NO_APP_SESSION))
+ session_transport_closed_notify (&ctx->connection);
+}
+
static session_cb_vft_t tls_app_cb_vft = {
.session_accept_callback = tls_session_accept_callback,
.session_disconnect_callback = tls_session_disconnect_callback,
.session_connected_callback = tls_session_connected_callback,
.session_reset_callback = tls_session_reset_callback,
+ .session_transport_closed_callback = tls_session_transport_closed_callback,
.half_open_cleanup_callback = tls_session_cleanup_ho,
.add_segment_callback = tls_add_segment_callback,
.del_segment_callback = tls_del_segment_callback,
@@ -742,7 +781,6 @@ tls_connect (transport_endpoint_cfg_t * tep)
ctx->srv_hostname = format (0, "%s", ccfg->hostname);
vec_terminate_c_string (ctx->srv_hostname);
}
- tls_ctx_half_open_reader_unlock ();
ctx->tls_ctx_engine = engine_type;
@@ -752,7 +790,10 @@ tls_connect (transport_endpoint_cfg_t * tep)
cargs->api_context = ctx_index;
cargs->sep_ext.ns_index = app->ns_index;
if ((rv = vnet_connect (cargs)))
- return rv;
+ {
+ tls_ctx_half_open_free (ctx_index);
+ return rv;
+ }
/* Track half-open tcp session in case we need to clean it up */
ctx->tls_session_handle = cargs->sh;
@@ -769,11 +810,12 @@ tls_disconnect (u32 ctx_handle, u32 thread_index)
TLS_DBG (1, "Disconnecting %x", ctx_handle);
ctx = tls_ctx_get (ctx_handle);
+ ctx->flags |= TLS_CONN_F_APP_CLOSED;
tls_ctx_app_close (ctx);
}
u32
-tls_start_listen (u32 app_listener_index, transport_endpoint_t * tep)
+tls_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep)
{
vnet_listen_args_t _bargs, *args = &_bargs;
transport_endpt_crypto_cfg_t *ccfg;
@@ -834,6 +876,8 @@ tls_start_listen (u32 app_listener_index, transport_endpoint_t * tep)
lctx->tls_ctx_engine = engine_type;
lctx->tls_type = sep->transport_proto;
lctx->ckpair_index = ccfg->ckpair_index;
+ lctx->c_s_index = app_listener_index;
+ lctx->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP;
if (tls_vfts[engine_type].ctx_start_listen (lctx))
{
@@ -910,40 +954,53 @@ tls_listener_get (u32 listener_index)
static transport_connection_t *
tls_half_open_get (u32 ho_index)
{
- tls_main_t *tm = &tls_main;
tls_ctx_t *ctx;
ctx = tls_ctx_half_open_get (ho_index);
- clib_rwlock_reader_unlock (&tm->half_open_rwlock);
return &ctx->connection;
}
static void
tls_cleanup_ho (u32 ho_index)
{
- tls_main_t *tm = &tls_main;
- session_handle_t tcp_sh;
tls_ctx_t *ctx;
+ session_t *s;
ctx = tls_ctx_half_open_get (ho_index);
- tcp_sh = ctx->tls_session_handle;
- clib_rwlock_reader_unlock (&tm->half_open_rwlock);
- session_cleanup_half_open (tcp_sh);
- tls_ctx_half_open_free (ho_index);
+ /* Already pending cleanup */
+ if (ctx->tls_session_handle == SESSION_INVALID_HANDLE)
+ {
+ ASSERT (ctx->flags & TLS_CONN_F_HO_DONE);
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
+ return;
+ }
+
+ s = session_get_from_handle (ctx->tls_session_handle);
+ /* If no pending cleanup notification, force cleanup now. Otherwise,
+ * wait for cleanup notification and set no app session on ctx */
+ if (s->session_state != SESSION_STATE_TRANSPORT_DELETED)
+ {
+ session_cleanup_half_open (ctx->tls_session_handle);
+ tls_ctx_half_open_free (ho_index);
+ }
+ else
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
}
int
tls_custom_tx_callback (void *session, transport_send_params_t * sp)
{
- session_t *app_session = (session_t *) session;
+ session_t *as = (session_t *) session;
tls_ctx_t *ctx;
- if (PREDICT_FALSE (app_session->session_state
- >= SESSION_STATE_TRANSPORT_CLOSED))
- return 0;
+ if (PREDICT_FALSE (as->session_state >= SESSION_STATE_TRANSPORT_CLOSED ||
+ as->session_state <= SESSION_STATE_ACCEPTING))
+ {
+ sp->flags |= TRANSPORT_SND_F_DESCHED;
+ return 0;
+ }
- sp->flags = 0;
- ctx = tls_ctx_get (app_session->connection_index);
- return tls_ctx_write (ctx, app_session, sp);
+ ctx = tls_ctx_get (as->connection_index);
+ return tls_ctx_write (ctx, as, sp);
}
u8 *
@@ -1054,6 +1111,7 @@ format_tls_half_open (u8 * s, va_list * args)
{
u32 ho_index = va_arg (*args, u32);
u32 __clib_unused thread_index = va_arg (*args, u32);
+ u32 __clib_unused verbose = va_arg (*args, u32);
session_t *tcp_ho;
tls_ctx_t *ho_ctx;
@@ -1065,7 +1123,6 @@ format_tls_half_open (u8 * s, va_list * args)
ho_ctx->parent_app_wrk_index, ho_ctx->tls_ctx_engine,
tcp_ho->thread_index, tcp_ho->session_index);
- tls_ctx_half_open_reader_unlock ();
return s;
}
@@ -1074,10 +1131,11 @@ tls_transport_endpoint_get (u32 ctx_handle, u32 thread_index,
transport_endpoint_t * tep, u8 is_lcl)
{
tls_ctx_t *ctx = tls_ctx_get_w_thread (ctx_handle, thread_index);
- session_t *tcp_session;
+ session_t *ts;
- tcp_session = session_get_from_handle (ctx->tls_session_handle);
- session_get_endpoint (tcp_session, tep, is_lcl);
+ ts = session_get_from_handle (ctx->tls_session_handle);
+ if (ts && ts->session_state < SESSION_STATE_TRANSPORT_DELETED)
+ session_get_endpoint (ts, tep, is_lcl);
}
static void
@@ -1096,12 +1154,11 @@ tls_transport_listener_endpoint_get (u32 ctx_handle,
static clib_error_t *
tls_enable (vlib_main_t * vm, u8 is_en)
{
- u32 add_segment_size = 256 << 20, first_seg_size = 32 << 20;
vnet_app_detach_args_t _da, *da = &_da;
vnet_app_attach_args_t _a, *a = &_a;
u64 options[APP_OPTIONS_N_OPTIONS];
tls_main_t *tm = &tls_main;
- u32 fifo_size = 128 << 12;
+ u32 fifo_size = 512 << 10;
if (!is_en)
{
@@ -1111,7 +1168,6 @@ tls_enable (vlib_main_t * vm, u8 is_en)
return 0;
}
- first_seg_size = tm->first_seg_size ? tm->first_seg_size : first_seg_size;
fifo_size = tm->fifo_size ? tm->fifo_size : fifo_size;
clib_memset (a, 0, sizeof (*a));
@@ -1121,8 +1177,8 @@ tls_enable (vlib_main_t * vm, u8 is_en)
a->api_client_index = APP_INVALID_INDEX;
a->options = options;
a->name = format (0, "tls");
- a->options[APP_OPTIONS_SEGMENT_SIZE] = first_seg_size;
- a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = add_segment_size;
+ a->options[APP_OPTIONS_SEGMENT_SIZE] = tm->first_seg_size;
+ a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = tm->add_seg_size;
a->options[APP_OPTIONS_RX_FIFO_SIZE] = fifo_size;
a->options[APP_OPTIONS_TX_FIFO_SIZE] = fifo_size;
a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN;
@@ -1311,11 +1367,12 @@ tls_init (vlib_main_t * vm)
if (!tm->ca_cert_path)
tm->ca_cert_path = TLS_CA_CERT_PATH;
- clib_rwlock_init (&tm->half_open_rwlock);
-
vec_validate (tm->rx_bufs, num_threads - 1);
vec_validate (tm->tx_bufs, num_threads - 1);
+ tm->first_seg_size = 32 << 20;
+ tm->add_seg_size = 256 << 20;
+
transport_register_protocol (TRANSPORT_PROTO_TLS, &tls_proto,
FIB_PROTOCOL_IP4, ~0);
transport_register_protocol (TRANSPORT_PROTO_TLS, &tls_proto,
@@ -1344,6 +1401,9 @@ tls_config_fn (vlib_main_t * vm, unformat_input_t * input)
else if (unformat (input, "first-segment-size %U", unformat_memory_size,
&tm->first_seg_size))
;
+ else if (unformat (input, "add-segment-size %U", unformat_memory_size,
+ &tm->add_seg_size))
+ ;
else if (unformat (input, "fifo-size %U", unformat_memory_size, &tmp))
{
if (tmp >= 0x100000000ULL)
@@ -1360,7 +1420,7 @@ tls_config_fn (vlib_main_t * vm, unformat_input_t * input)
return 0;
}
-VLIB_EARLY_CONFIG_FUNCTION (tls_config_fn, "tls");
+VLIB_CONFIG_FUNCTION (tls_config_fn, "tls");
tls_main_t *
vnet_tls_get_main (void)
diff --git a/src/vnet/tls/tls.h b/src/vnet/tls/tls.h
index eba70c0a8bb..6bd1371b984 100644
--- a/src/vnet/tls/tls.h
+++ b/src/vnet/tls/tls.h
@@ -36,26 +36,48 @@
#define TLS_DBG(_lvl, _fmt, _args...)
#endif
-/* *INDENT-OFF* */
typedef struct tls_cxt_id_
{
- union {
- session_handle_t app_session_handle;
- u32 parent_app_api_ctx;
- };
+ session_handle_t app_session_handle;
session_handle_t tls_session_handle;
void *migrate_ctx;
u32 parent_app_wrk_index;
u32 ssl_ctx;
- u32 listener_ctx_index;
+ union
+ {
+ u32 listener_ctx_index;
+ u32 parent_app_api_ctx;
+ };
u8 tcp_is_ip4;
u8 tls_engine_id;
} tls_ctx_id_t;
-/* *INDENT-ON* */
STATIC_ASSERT (sizeof (tls_ctx_id_t) <= TRANSPORT_CONN_ID_LEN,
"ctx id must be less than TRANSPORT_CONN_ID_LEN");
+#define foreach_tls_conn_flags \
+ _ (HO_DONE, "ho-done") \
+ _ (PASSIVE_CLOSE, "passive-close") \
+ _ (APP_CLOSED, "app-closed") \
+ _ (MIGRATED, "migrated") \
+ _ (NO_APP_SESSION, "no-app-session") \
+ _ (RESUME, "resume") \
+ _ (HS_DONE, "handshake-done")
+
+typedef enum tls_conn_flags_bit_
+{
+#define _(sym, str) TLS_CONN_F_BIT_##sym,
+ foreach_tls_conn_flags
+#undef _
+} tls_conn_flags_bit_t;
+
+typedef enum tls_conn_flags_
+{
+#define _(sym, str) TLS_CONN_F_##sym = 1 << TLS_CONN_F_BIT_##sym,
+ foreach_tls_conn_flags
+#undef _
+} __clib_packed tls_conn_flags_t;
+
typedef struct tls_ctx_
{
union
@@ -76,11 +98,7 @@ typedef struct tls_ctx_
#define parent_app_api_context c_tls_ctx_id.parent_app_api_ctx
#define migration_ctx c_tls_ctx_id.migrate_ctx
- u8 is_passive_close;
- u8 resume;
- u8 app_closed;
- u8 no_app_session;
- u8 is_migrated;
+ tls_conn_flags_t flags;
u8 *srv_hostname;
u32 evt_index;
u32 ckpair_index;
@@ -92,7 +110,8 @@ typedef struct tls_main_
u32 app_index;
tls_ctx_t *listener_ctx_pool;
tls_ctx_t *half_open_ctx_pool;
- clib_rwlock_t half_open_rwlock;
+ u32 *postponed_ho_free;
+ u32 *ho_free_list;
u8 **rx_bufs;
u8 **tx_bufs;
@@ -102,6 +121,7 @@ typedef struct tls_main_
u8 use_test_cert_in_ca;
char *ca_cert_path;
u64 first_seg_size;
+ u64 add_seg_size;
u32 fifo_size;
} tls_main_t;
@@ -123,7 +143,9 @@ typedef struct tls_engine_vft_
int (*ctx_start_listen) (tls_ctx_t * ctx);
int (*ctx_stop_listen) (tls_ctx_t * ctx);
int (*ctx_transport_close) (tls_ctx_t * ctx);
+ int (*ctx_transport_reset) (tls_ctx_t *ctx);
int (*ctx_app_close) (tls_ctx_t * ctx);
+ int (*ctx_reinit_cachain) (void);
} tls_engine_vft_t;
tls_main_t *vnet_tls_get_main (void);
@@ -136,7 +158,13 @@ int tls_add_vpp_q_builtin_rx_evt (session_t * s);
int tls_notify_app_accept (tls_ctx_t * ctx);
int tls_notify_app_connected (tls_ctx_t * ctx, session_error_t err);
void tls_notify_app_enqueue (tls_ctx_t * ctx, session_t * app_session);
+void tls_notify_app_io_error (tls_ctx_t *ctx);
void tls_disconnect_transport (tls_ctx_t * ctx);
+int tls_reinit_ca_chain (crypto_engine_type_t tls_engine_id);
+
+void tls_add_postponed_ho_cleanups (u32 ho_index);
+void tls_flush_postponed_ho_cleanups ();
+
#endif /* SRC_VNET_TLS_TLS_H_ */
/*
diff --git a/src/vnet/tunnel/tunnel.c b/src/vnet/tunnel/tunnel.c
index d45a46205d8..0d27ad82538 100644
--- a/src/vnet/tunnel/tunnel.c
+++ b/src/vnet/tunnel/tunnel.c
@@ -66,16 +66,19 @@ unformat_tunnel_mode (unformat_input_t * input, va_list * args)
u8 *
format_tunnel_encap_decap_flags (u8 * s, va_list * args)
{
- tunnel_encap_decap_flags_t f = va_arg (*args, int);
+ tunnel_encap_decap_flags_t f = va_arg (*args, u32);
if (f == TUNNEL_ENCAP_DECAP_FLAG_NONE)
s = format (s, "none");
-
+ else
+ {
#define _(a, b, c) \
- else if (f & TUNNEL_ENCAP_DECAP_FLAG_##a) s = format (s, "%s ", b);
- foreach_tunnel_encap_decap_flag
+ if (f & TUNNEL_ENCAP_DECAP_FLAG_##a) \
+ s = format (s, "%s ", b);
+ foreach_tunnel_encap_decap_flag
#undef _
- return (s);
+ }
+ return (s);
}
uword
@@ -95,15 +98,19 @@ unformat_tunnel_encap_decap_flags (unformat_input_t * input, va_list * args)
u8 *
format_tunnel_flags (u8 *s, va_list *args)
{
- tunnel_flags_t f = va_arg (*args, int);
+ tunnel_flags_t f = va_arg (*args, u32);
if (f == TUNNEL_FLAG_NONE)
s = format (s, "none");
-
-#define _(a, b, c) else if (f & TUNNEL_FLAG_##a) s = format (s, "%s ", c);
- foreach_tunnel_flag
+ else
+ {
+#define _(a, b, c) \
+ if (f & TUNNEL_FLAG_##a) \
+ s = format (s, "%s ", c);
+ foreach_tunnel_flag
#undef _
- return (s);
+ }
+ return (s);
}
uword
diff --git a/src/vnet/tunnel/tunnel_types_api.c b/src/vnet/tunnel/tunnel_types_api.c
index 894eecb8407..247c13cd416 100644
--- a/src/vnet/tunnel/tunnel_types_api.c
+++ b/src/vnet/tunnel/tunnel_types_api.c
@@ -60,9 +60,14 @@ tunnel_flags_decode (vl_api_tunnel_flags_t f, tunnel_flags_t *o)
}
vl_api_tunnel_flags_t
-tunnel_flags_encode (tunnel_flags_t f)
+tunnel_flags_encode (tunnel_flags_t in)
{
- return ((vl_api_tunnel_flags_t) f);
+ vl_api_tunnel_flags_t out = 0;
+
+ if (in & TUNNEL_FLAG_TRACK_MTU)
+ out |= TUNNEL_API_FLAG_TRACK_MTU;
+
+ return (out);
}
int
diff --git a/src/vnet/udp/udp.api b/src/vnet/udp/udp.api
index 02176be7c2b..6b468be461a 100644
--- a/src/vnet/udp/udp.api
+++ b/src/vnet/udp/udp.api
@@ -32,7 +32,7 @@ import "vnet/ip/ip_types.api";
* @param dst_ip - Encap destination address
* @param src_ip - Encap source address
* @param dst_port - Encap destination port
- * @param src_port - Encap source port
+ * @param src_port - Encap source port, 0 for entopy per rfc7510
* @param id - VPP assigned id; ignored in add message, set in dump
*/
typedef udp_encap
diff --git a/src/vnet/udp/udp.c b/src/vnet/udp/udp.c
index 40e0053bb96..9c1121f7cfb 100644
--- a/src/vnet/udp/udp.c
+++ b/src/vnet/udp/udp.c
@@ -23,97 +23,63 @@
udp_main_t udp_main;
static void
-udp_connection_register_port (vlib_main_t * vm, u16 lcl_port, u8 is_ip4)
+udp_connection_register_port (u16 lcl_port, u8 is_ip4)
{
udp_main_t *um = &udp_main;
- udp_dst_port_info_t *pi;
u16 *n;
- pi = udp_get_dst_port_info (um, lcl_port, is_ip4);
- if (!pi)
- {
- udp_add_dst_port (um, lcl_port, 0, is_ip4);
- pi = udp_get_dst_port_info (um, lcl_port, is_ip4);
- pi->n_connections = 1;
- }
- else
- {
- pi->n_connections += 1;
- /* Do not return. The fact that the pi is valid does not mean
- * it's up to date */
- }
+ /* Setup udp protocol -> next index sparse vector mapping. Do not setup
+ * udp_dst_port_info_t as that is used to distinguish between external
+ * and transport consumed ports */
- pi->node_index = is_ip4 ? udp4_input_node.index : udp6_input_node.index;
- pi->next_index = um->local_to_input_edge[is_ip4];
-
- /* Setup udp protocol -> next index sparse vector mapping. */
if (is_ip4)
- n = sparse_vec_validate (um->next_by_dst_port4,
- clib_host_to_net_u16 (lcl_port));
+ n = sparse_vec_validate (um->next_by_dst_port4, lcl_port);
else
- n = sparse_vec_validate (um->next_by_dst_port6,
- clib_host_to_net_u16 (lcl_port));
+ n = sparse_vec_validate (um->next_by_dst_port6, lcl_port);
+
+ n[0] = um->local_to_input_edge[is_ip4];
- n[0] = pi->next_index;
+ __atomic_add_fetch (&um->transport_ports_refcnt[is_ip4][lcl_port], 1,
+ __ATOMIC_RELAXED);
+}
+
+void
+udp_connection_share_port (u16 lcl_port, u8 is_ip4)
+{
+ udp_main_t *um = &udp_main;
+ __atomic_add_fetch (&um->transport_ports_refcnt[is_ip4][lcl_port], 1,
+ __ATOMIC_RELAXED);
}
static void
udp_connection_unregister_port (u16 lcl_port, u8 is_ip4)
{
udp_main_t *um = &udp_main;
- udp_dst_port_info_t *pi;
+ u16 *n;
- pi = udp_get_dst_port_info (um, lcl_port, is_ip4);
- if (!pi)
+ /* Needed because listeners are not tracked as local endpoints */
+ if (__atomic_sub_fetch (&um->transport_ports_refcnt[is_ip4][lcl_port], 1,
+ __ATOMIC_RELAXED))
return;
- if (!pi->n_connections)
- {
- clib_warning ("no connections using port %u", lcl_port);
- return;
- }
-
- if (!clib_atomic_sub_fetch (&pi->n_connections, 1))
- udp_unregister_dst_port (0, lcl_port, is_ip4);
-}
-
-void
-udp_connection_share_port (u16 lcl_port, u8 is_ip4)
-{
- udp_main_t *um = &udp_main;
- udp_dst_port_info_t *pi;
+ if (is_ip4)
+ n = sparse_vec_validate (um->next_by_dst_port4, lcl_port);
+ else
+ n = sparse_vec_validate (um->next_by_dst_port6, lcl_port);
- /* Done without a lock but the operation is atomic. Writers to pi hash
- * table and vector should be guarded by a barrier sync */
- pi = udp_get_dst_port_info (um, lcl_port, is_ip4);
- clib_atomic_fetch_add_rel (&pi->n_connections, 1);
+ n[0] = UDP_NO_NODE_SET;
}
udp_connection_t *
udp_connection_alloc (u32 thread_index)
{
- udp_main_t *um = &udp_main;
+ udp_worker_t *wrk = udp_worker_get (thread_index);
udp_connection_t *uc;
- u32 will_expand = 0;
- pool_get_aligned_will_expand (um->connections[thread_index], will_expand,
- CLIB_CACHE_LINE_BYTES);
- if (PREDICT_FALSE (will_expand))
- {
- clib_spinlock_lock_if_init (&udp_main.peekers_write_locks
- [thread_index]);
- pool_get_aligned (udp_main.connections[thread_index], uc,
- CLIB_CACHE_LINE_BYTES);
- clib_spinlock_unlock_if_init (&udp_main.peekers_write_locks
- [thread_index]);
- }
- else
- {
- pool_get_aligned (um->connections[thread_index], uc,
- CLIB_CACHE_LINE_BYTES);
- }
+ pool_get_aligned_safe (wrk->connections, uc, CLIB_CACHE_LINE_BYTES);
+
clib_memset (uc, 0, sizeof (*uc));
- uc->c_c_index = uc - um->connections[thread_index];
+ uc->c_c_index = uc - wrk->connections;
uc->c_thread_index = thread_index;
uc->c_proto = TRANSPORT_PROTO_UDP;
return uc;
@@ -122,20 +88,20 @@ udp_connection_alloc (u32 thread_index)
void
udp_connection_free (udp_connection_t * uc)
{
- u32 thread_index = uc->c_thread_index;
+ udp_worker_t *wrk = udp_worker_get (uc->c_thread_index);
+
clib_spinlock_free (&uc->rx_lock);
if (CLIB_DEBUG)
clib_memset (uc, 0xFA, sizeof (*uc));
- pool_put (udp_main.connections[thread_index], uc);
+ pool_put (wrk->connections, uc);
}
static void
udp_connection_cleanup (udp_connection_t * uc)
{
- transport_endpoint_cleanup (TRANSPORT_PROTO_UDP, &uc->c_lcl_ip,
- uc->c_lcl_port);
- udp_connection_unregister_port (clib_net_to_host_u16 (uc->c_lcl_port),
- uc->c_is_ip4);
+ transport_release_local_endpoint (TRANSPORT_PROTO_UDP, &uc->c_lcl_ip,
+ uc->c_lcl_port);
+ udp_connection_unregister_port (uc->c_lcl_port, uc->c_is_ip4);
udp_connection_free (uc);
}
@@ -146,6 +112,38 @@ udp_connection_delete (udp_connection_t * uc)
udp_connection_cleanup (uc);
}
+static void
+udp_handle_cleanups (void *args)
+{
+ u32 thread_index = (u32) pointer_to_uword (args);
+ udp_connection_t *uc;
+ udp_worker_t *wrk;
+ u32 *uc_index;
+
+ wrk = udp_worker_get (thread_index);
+ vec_foreach (uc_index, wrk->pending_cleanups)
+ {
+ uc = udp_connection_get (*uc_index, thread_index);
+ udp_connection_delete (uc);
+ }
+ vec_reset_length (wrk->pending_cleanups);
+}
+
+static void
+udp_connection_program_cleanup (udp_connection_t *uc)
+{
+ uword thread_index = uc->c_thread_index;
+ udp_worker_t *wrk;
+
+ wrk = udp_worker_get (uc->c_thread_index);
+ vec_add1 (wrk->pending_cleanups, uc->c_c_index);
+
+ if (vec_len (wrk->pending_cleanups) == 1)
+ session_send_rpc_evt_to_thread_force (
+ thread_index, udp_handle_cleanups,
+ uword_to_pointer (thread_index, void *));
+}
+
static u8
udp_connection_port_used_extern (u16 lcl_port, u8 is_ip4)
{
@@ -153,8 +151,7 @@ udp_connection_port_used_extern (u16 lcl_port, u8 is_ip4)
udp_dst_port_info_t *pi;
pi = udp_get_dst_port_info (um, lcl_port, is_ip4);
- return (pi && !pi->n_connections
- && udp_is_valid_dst_port (lcl_port, is_ip4));
+ return (pi && udp_is_valid_dst_port (lcl_port, is_ip4));
}
static u16
@@ -165,18 +162,15 @@ udp_default_mtu (udp_main_t * um, u8 is_ip4)
}
static u32
-udp_session_bind (u32 session_index, transport_endpoint_t * lcl)
+udp_session_bind (u32 session_index, transport_endpoint_cfg_t *lcl)
{
udp_main_t *um = vnet_get_udp_main ();
- vlib_main_t *vm = vlib_get_main ();
transport_endpoint_cfg_t *lcl_ext;
udp_connection_t *listener;
- u16 lcl_port_ho;
void *iface_ip;
- lcl_port_ho = clib_net_to_host_u16 (lcl->port);
-
- if (udp_connection_port_used_extern (lcl_port_ho, lcl->is_ip4))
+ if (udp_connection_port_used_extern (clib_net_to_host_u16 (lcl->port),
+ lcl->is_ip4))
{
clib_warning ("port already used");
return SESSION_E_PORTINUSE;
@@ -200,7 +194,8 @@ udp_session_bind (u32 session_index, transport_endpoint_t * lcl)
listener->c_proto = TRANSPORT_PROTO_UDP;
listener->c_s_index = session_index;
listener->c_fib_index = lcl->fib_index;
- listener->mss = udp_default_mtu (um, listener->c_is_ip4);
+ listener->mss =
+ lcl->mss ? lcl->mss : udp_default_mtu (um, listener->c_is_ip4);
listener->flags |= UDP_CONN_F_OWNS_PORT | UDP_CONN_F_LISTEN;
lcl_ext = (transport_endpoint_cfg_t *) lcl;
if (lcl_ext->transport_flags & TRANSPORT_CFG_F_CONNECTED)
@@ -208,8 +203,10 @@ udp_session_bind (u32 session_index, transport_endpoint_t * lcl)
else
listener->c_flags |= TRANSPORT_CONNECTION_F_CLESS;
clib_spinlock_init (&listener->rx_lock);
+ if (!um->csum_offload)
+ listener->cfg_flags |= UDP_CFG_F_NO_CSUM_OFFLOAD;
- udp_connection_register_port (vm, lcl_port_ho, lcl->is_ip4);
+ udp_connection_register_port (listener->c_lcl_port, lcl->is_ip4);
return listener->c_c_index;
}
@@ -220,8 +217,7 @@ udp_session_unbind (u32 listener_index)
udp_connection_t *listener;
listener = udp_listener_get (listener_index);
- udp_connection_unregister_port (clib_net_to_host_u16 (listener->c_lcl_port),
- listener->c_is_ip4);
+ udp_connection_unregister_port (listener->c_lcl_port, listener->c_is_ip4);
clib_spinlock_free (&listener->rx_lock);
pool_put (um->listener_pool, listener);
return 0;
@@ -236,30 +232,127 @@ udp_session_get_listener (u32 listener_index)
return &us->connection;
}
+always_inline u16
+udp_compute_checksum (vlib_main_t *vm, vlib_buffer_t *b, u8 csum_offload,
+ u8 is_ip4)
+{
+ u16 csum = 0;
+
+ if (csum_offload)
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_UDP_CKSUM);
+ else
+ {
+ if (is_ip4)
+ csum =
+ ip4_tcp_udp_compute_checksum (vm, b, vlib_buffer_get_current (b));
+ else
+ {
+ int bogus = 0;
+ csum = ip6_tcp_udp_icmp_compute_checksum (
+ vm, b, vlib_buffer_get_current (b), &bogus);
+ }
+ }
+
+ return csum;
+}
+
+always_inline u32
+udp_push_one_header (vlib_main_t *vm, udp_connection_t *uc, vlib_buffer_t *b,
+ u8 is_cless)
+{
+ udp_header_t *uh;
+
+ b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ /* reuse tcp medatada for now */
+ vnet_buffer (b)->tcp.connection_index = uc->c_c_index;
+
+ if (!is_cless)
+ {
+ uh = vlib_buffer_push_udp (b, uc->c_lcl_port, uc->c_rmt_port);
+
+ if (uc->c_is_ip4)
+ vlib_buffer_push_ip4_custom (vm, b, &uc->c_lcl_ip4, &uc->c_rmt_ip4,
+ IP_PROTOCOL_UDP, udp_csum_offload (uc),
+ 0 /* is_df */, uc->c_dscp);
+ else
+ vlib_buffer_push_ip6 (vm, b, &uc->c_lcl_ip6, &uc->c_rmt_ip6,
+ IP_PROTOCOL_UDP);
+
+ vnet_buffer (b)->tcp.flags = 0;
+ }
+ else
+ {
+ u8 *data = vlib_buffer_get_current (b);
+ session_dgram_hdr_t hdr;
+
+ hdr = *(session_dgram_hdr_t *) (data - sizeof (hdr));
+
+ /* Local port assumed to be bound, not overwriting it */
+ uh = vlib_buffer_push_udp (b, uc->c_lcl_port, hdr.rmt_port);
+
+ if (uc->c_is_ip4)
+ vlib_buffer_push_ip4_custom (vm, b, &hdr.lcl_ip.ip4, &hdr.rmt_ip.ip4,
+ IP_PROTOCOL_UDP, udp_csum_offload (uc),
+ 0 /* is_df */, uc->c_dscp);
+ else
+ vlib_buffer_push_ip6 (vm, b, &hdr.lcl_ip.ip6, &hdr.rmt_ip.ip6,
+ IP_PROTOCOL_UDP);
+
+ /* Not connected udp session. Mark buffer for custom handling in
+ * udp_output */
+ vnet_buffer (b)->tcp.flags |= UDP_CONN_F_LISTEN;
+ }
+
+ uh->checksum =
+ udp_compute_checksum (vm, b, udp_csum_offload (uc), uc->c_is_ip4);
+
+ return 0;
+}
+
+always_inline void
+udp_push_header_batch (udp_connection_t *uc, vlib_buffer_t **bs, u32 n_bufs,
+ u8 is_cless)
+{
+ vlib_main_t *vm = vlib_get_main ();
+
+ while (n_bufs >= 4)
+ {
+ vlib_prefetch_buffer_header (bs[2], STORE);
+ vlib_prefetch_buffer_header (bs[3], STORE);
+
+ udp_push_one_header (vm, uc, bs[0], is_cless);
+ udp_push_one_header (vm, uc, bs[1], is_cless);
+
+ n_bufs -= 2;
+ bs += 2;
+ }
+ while (n_bufs)
+ {
+ if (n_bufs > 1)
+ vlib_prefetch_buffer_header (bs[1], STORE);
+
+ udp_push_one_header (vm, uc, bs[0], is_cless);
+
+ n_bufs -= 1;
+ bs += 1;
+ }
+}
+
static u32
-udp_push_header (transport_connection_t * tc, vlib_buffer_t * b)
+udp_push_header (transport_connection_t *tc, vlib_buffer_t **bs, u32 n_bufs)
{
udp_connection_t *uc;
- vlib_main_t *vm = vlib_get_main ();
uc = udp_connection_from_transport (tc);
-
- vlib_buffer_push_udp (b, uc->c_lcl_port, uc->c_rmt_port, 1);
- if (tc->is_ip4)
- vlib_buffer_push_ip4_custom (vm, b, &uc->c_lcl_ip4, &uc->c_rmt_ip4,
- IP_PROTOCOL_UDP, 1 /* csum offload */ ,
- 0 /* is_df */ );
+ if (uc->flags & UDP_CONN_F_CONNECTED)
+ udp_push_header_batch (uc, bs, n_bufs, 0 /* is_cless */);
else
- vlib_buffer_push_ip6 (vm, b, &uc->c_lcl_ip6, &uc->c_rmt_ip6,
- IP_PROTOCOL_UDP);
- vnet_buffer (b)->sw_if_index[VLIB_RX] = 0;
- vnet_buffer (b)->sw_if_index[VLIB_TX] = uc->c_fib_index;
- b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ udp_push_header_batch (uc, bs, n_bufs, 1 /* is_cless */);
if (PREDICT_FALSE (uc->flags & UDP_CONN_F_CLOSING))
{
- if (!transport_max_tx_dequeue (&uc->connection))
- udp_connection_delete (uc);
+ if (!transport_tx_fifo_has_dgram (&uc->connection))
+ udp_connection_program_cleanup (uc);
}
return 0;
@@ -281,11 +374,11 @@ udp_session_close (u32 connection_index, u32 thread_index)
udp_connection_t *uc;
uc = udp_connection_get (connection_index, thread_index);
- if (!uc)
+ if (!uc || (uc->flags & UDP_CONN_F_MIGRATED))
return;
- if (!transport_max_tx_dequeue (&uc->connection))
- udp_connection_delete (uc);
+ if (!transport_tx_fifo_has_dgram (&uc->connection))
+ udp_connection_program_cleanup (uc);
else
uc->flags |= UDP_CONN_F_CLOSING;
}
@@ -323,57 +416,42 @@ udp_session_send_params (transport_connection_t * tconn,
static int
udp_open_connection (transport_endpoint_cfg_t * rmt)
{
- vlib_main_t *vm = vlib_get_main ();
- u32 thread_index = vm->thread_index;
udp_main_t *um = &udp_main;
ip46_address_t lcl_addr;
udp_connection_t *uc;
+ u32 thread_index;
u16 lcl_port;
int rv;
rv = transport_alloc_local_endpoint (TRANSPORT_PROTO_UDP, rmt, &lcl_addr,
&lcl_port);
if (rv)
- {
- if (rv != SESSION_E_PORTINUSE)
- return rv;
-
- if (udp_connection_port_used_extern (lcl_port, rmt->is_ip4))
- return SESSION_E_PORTINUSE;
-
- /* If port in use, check if 5-tuple is also in use */
- if (session_lookup_connection (rmt->fib_index, &lcl_addr, &rmt->ip,
- lcl_port, rmt->port, TRANSPORT_PROTO_UDP,
- rmt->is_ip4))
- return SESSION_E_PORTINUSE;
-
- /* 5-tuple is available so increase lcl endpoint refcount and proceed
- * with connection allocation */
- transport_share_local_endpoint (TRANSPORT_PROTO_UDP, &lcl_addr,
- lcl_port);
- goto conn_alloc;
- }
+ return rv;
- if (udp_is_valid_dst_port (lcl_port, rmt->is_ip4))
+ if (udp_connection_port_used_extern (clib_net_to_host_u16 (lcl_port),
+ rmt->is_ip4))
{
/* If specific source port was requested abort */
if (rmt->peer.port)
- return SESSION_E_PORTINUSE;
+ {
+ transport_release_local_endpoint (TRANSPORT_PROTO_UDP, &lcl_addr,
+ lcl_port);
+ return SESSION_E_PORTINUSE;
+ }
/* Try to find a port that's not used */
- while (udp_is_valid_dst_port (lcl_port, rmt->is_ip4))
+ while (udp_connection_port_used_extern (clib_net_to_host_u16 (lcl_port),
+ rmt->is_ip4))
{
- lcl_port = transport_alloc_local_port (TRANSPORT_PROTO_UDP,
- &lcl_addr);
- if (lcl_port < 1)
+ transport_release_local_endpoint (TRANSPORT_PROTO_UDP, &lcl_addr,
+ lcl_port);
+ lcl_port =
+ transport_alloc_local_port (TRANSPORT_PROTO_UDP, &lcl_addr, rmt);
+ if ((int) lcl_port < 1)
return SESSION_E_PORTINUSE;
}
}
-conn_alloc:
-
- udp_connection_register_port (vm, lcl_port, rmt->is_ip4);
-
/* We don't poll main thread if we have workers */
thread_index = transport_cl_thread ();
@@ -381,11 +459,14 @@ conn_alloc:
ip_copy (&uc->c_rmt_ip, &rmt->ip, rmt->is_ip4);
ip_copy (&uc->c_lcl_ip, &lcl_addr, rmt->is_ip4);
uc->c_rmt_port = rmt->port;
- uc->c_lcl_port = clib_host_to_net_u16 (lcl_port);
+ uc->c_lcl_port = lcl_port;
uc->c_is_ip4 = rmt->is_ip4;
uc->c_proto = TRANSPORT_PROTO_UDP;
uc->c_fib_index = rmt->fib_index;
+ uc->c_dscp = rmt->dscp;
uc->mss = rmt->mss ? rmt->mss : udp_default_mtu (um, uc->c_is_ip4);
+ if (rmt->peer.sw_if_index != ENDPOINT_INVALID_INDEX)
+ uc->sw_if_index = rmt->peer.sw_if_index;
uc->flags |= UDP_CONN_F_OWNS_PORT;
if (rmt->transport_flags & TRANSPORT_CFG_F_CONNECTED)
{
@@ -396,6 +477,12 @@ conn_alloc:
clib_spinlock_init (&uc->rx_lock);
uc->c_flags |= TRANSPORT_CONNECTION_F_CLESS;
}
+ if (!um->csum_offload)
+ uc->cfg_flags |= UDP_CFG_F_NO_CSUM_OFFLOAD;
+ uc->next_node_index = rmt->next_node_index;
+ uc->next_node_opaque = rmt->next_node_opaque;
+
+ udp_connection_register_port (uc->c_lcl_port, rmt->is_ip4);
return uc->c_c_index;
}
@@ -445,8 +532,90 @@ format_udp_listener_session (u8 * s, va_list * args)
return format (s, "%U", format_udp_connection, uc, verbose);
}
-/* *INDENT-OFF* */
+static void
+udp_realloc_ports_sv (u16 **ports_nh_svp)
+{
+ u16 port, port_no, *ports_nh_sv, *mc;
+ u32 *ports = 0, *nh = 0, msum, i;
+ sparse_vec_header_t *h;
+ uword sv_index, *mb;
+
+ ports_nh_sv = *ports_nh_svp;
+
+ for (port = 1; port < 65535; port++)
+ {
+ port_no = clib_host_to_net_u16 (port);
+
+ sv_index = sparse_vec_index (ports_nh_sv, port_no);
+ if (sv_index != SPARSE_VEC_INVALID_INDEX)
+ {
+ vec_add1 (ports, port_no);
+ vec_add1 (nh, ports_nh_sv[sv_index]);
+ }
+ }
+
+ sparse_vec_free (ports_nh_sv);
+
+ ports_nh_sv =
+ sparse_vec_new (/* elt bytes */ sizeof (ports_nh_sv[0]),
+ /* bits in index */ BITS (((udp_header_t *) 0)->dst_port));
+
+ vec_resize (ports_nh_sv, 65535);
+
+ for (port = 1; port < 65535; port++)
+ ports_nh_sv[port] = UDP_NO_NODE_SET;
+
+ for (i = 0; i < vec_len (ports); i++)
+ ports_nh_sv[ports[i]] = nh[i];
+
+ h = sparse_vec_header (ports_nh_sv);
+ vec_foreach (mb, h->is_member_bitmap)
+ *mb = (uword) ~0;
+
+ msum = 0;
+ vec_foreach (mc, h->member_counts)
+ {
+ *mc = msum;
+ msum += msum == 0 ? 63 : 64;
+ }
+
+ vec_free (ports);
+ vec_free (nh);
+
+ *ports_nh_svp = ports_nh_sv;
+}
+
+static clib_error_t *
+udp_enable_disable (vlib_main_t *vm, u8 is_en)
+{
+ udp_main_t *um = &udp_main;
+
+ /* Not ideal. The sparse vector used to map ports to next nodes assumes
+ * only a few ports are ever used. When udp transport is enabled this does
+ * not hold and, to make matters worse, ports are consumed in a random
+ * order.
+ *
+ * This can lead to a lot of slow updates to internal data structures
+ * which in turn can slow udp connection allocations until all ports are
+ * eventually consumed.
+ *
+ * Consequently, reallocate sparse vector, preallocate all ports and have
+ * them point to UDP_NO_NODE_SET. We could consider switching the sparse
+ * vector to a preallocated vector but that would increase memory
+ * consumption for vpp deployments that do not rely on host stack.
+ */
+
+ udp_realloc_ports_sv (&um->next_by_dst_port4);
+ udp_realloc_ports_sv (&um->next_by_dst_port6);
+
+ vec_validate (um->transport_ports_refcnt[0], 65535);
+ vec_validate (um->transport_ports_refcnt[1], 65535);
+
+ return 0;
+}
+
static const transport_proto_vft_t udp_proto = {
+ .enable = udp_enable_disable,
.start_listen = udp_session_bind,
.connect = udp_open_connection,
.stop_listen = udp_session_unbind,
@@ -467,7 +636,6 @@ static const transport_proto_vft_t udp_proto = {
.service_type = TRANSPORT_SERVICE_CL,
},
};
-/* *INDENT-ON* */
static clib_error_t *
udp_init (vlib_main_t * vm)
@@ -477,7 +645,6 @@ udp_init (vlib_main_t * vm)
vlib_thread_main_t *tm = vlib_get_thread_main ();
u32 num_threads;
ip_protocol_info_t *pi;
- int i;
/*
* Registrations
@@ -490,28 +657,18 @@ udp_init (vlib_main_t * vm)
pi->format_header = format_udp_header;
pi->unformat_pg_edit = unformat_pg_udp_header;
- /* Register as transport with URI */
+ /* Register as transport with session layer */
transport_register_protocol (TRANSPORT_PROTO_UDP, &udp_proto,
- FIB_PROTOCOL_IP4, ip4_lookup_node.index);
+ FIB_PROTOCOL_IP4, udp4_output_node.index);
transport_register_protocol (TRANSPORT_PROTO_UDP, &udp_proto,
- FIB_PROTOCOL_IP6, ip6_lookup_node.index);
+ FIB_PROTOCOL_IP6, udp6_output_node.index);
/*
* Initialize data structures
*/
num_threads = 1 /* main thread */ + tm->n_threads;
- vec_validate (um->connections, num_threads - 1);
- vec_validate (um->connection_peekers, num_threads - 1);
- vec_validate (um->peekers_readers_locks, num_threads - 1);
- vec_validate (um->peekers_write_locks, num_threads - 1);
-
- if (num_threads > 1)
- for (i = 0; i < num_threads; i++)
- {
- clib_spinlock_init (&um->peekers_readers_locks[i]);
- clib_spinlock_init (&um->peekers_write_locks[i]);
- }
+ vec_validate (um->wrk, num_threads - 1);
um->local_to_input_edge[UDP_IP4] =
vlib_node_add_next (vm, udp4_local_node.index, udp4_input_node.index);
@@ -519,16 +676,15 @@ udp_init (vlib_main_t * vm)
vlib_node_add_next (vm, udp6_local_node.index, udp6_input_node.index);
um->default_mtu = 1500;
+ um->csum_offload = 1;
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (udp_init) =
{
.runs_after = VLIB_INITS("ip_main_init", "ip4_lookup_init",
"ip6_lookup_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/udp/udp.h b/src/vnet/udp/udp.h
index f157711ba2b..8e4e87f85a8 100644
--- a/src/vnet/udp/udp.h
+++ b/src/vnet/udp/udp.h
@@ -25,6 +25,8 @@
#include <vnet/ip/ip.h>
#include <vnet/session/transport.h>
+#define UDP_NO_NODE_SET ((u16) ~0)
+
typedef enum
{
#define udp_error(f, n, s, d) UDP_ERROR_##f,
@@ -55,6 +57,24 @@ typedef enum udp_conn_flags_
#undef _
} udp_conn_flags_t;
+#define foreach_udp_cfg_flag _ (NO_CSUM_OFFLOAD, "no-csum-offload")
+
+typedef enum udp_cfg_flag_bits_
+{
+#define _(sym, str) UDP_CFG_F_##sym##_BIT,
+ foreach_udp_cfg_flag
+#undef _
+ UDP_CFG_N_FLAG_BITS
+} udp_cfg_flag_bits_e;
+
+typedef enum udp_cfg_flag_
+{
+#define _(sym, str) UDP_CFG_F_##sym = 1 << UDP_CFG_F_##sym##_BIT,
+ foreach_udp_cfg_flag
+#undef _
+ UDP_CFG_N_FLAGS
+} __clib_packed udp_cfg_flags_t;
+
typedef struct
{
/** Required for pool_get_aligned */
@@ -62,9 +82,15 @@ typedef struct
transport_connection_t connection; /**< must be first */
clib_spinlock_t rx_lock; /**< rx fifo lock */
u8 flags; /**< connection flags */
+ udp_cfg_flags_t cfg_flags; /**< configuration flags */
u16 mss; /**< connection mss */
+ u32 sw_if_index; /**< connection sw_if_index */
+ u32 next_node_index; /**< Can be used to control next node in output */
+ u32 next_node_opaque; /**< Opaque to pass to next node */
} udp_connection_t;
+#define udp_csum_offload(uc) (!((uc)->cfg_flags & UDP_CFG_F_NO_CSUM_OFFLOAD))
+
typedef struct
{
/* Name (a c string). */
@@ -79,9 +105,6 @@ typedef struct
/* Next index for this type. */
u32 next_index;
- /* UDP sessions refcount (not tunnels) */
- u32 n_connections;
-
/* Parser for packet generator edits for this protocol */
unformat_function_t *unformat_pg_edit;
} udp_dst_port_info_t;
@@ -93,6 +116,12 @@ typedef enum
N_UDP_AF,
} udp_af_t;
+typedef struct udp_worker_
+{
+ udp_connection_t *connections;
+ u32 *pending_cleanups;
+} udp_worker_t;
+
typedef struct
{
udp_dst_port_info_t *dst_port_infos[N_UDP_AF];
@@ -112,16 +141,19 @@ typedef struct
u32 local_to_input_edge[N_UDP_AF];
/*
- * Per-worker thread udp connection pools used with session layer
+ * UDP transport layer per-thread context
*/
- udp_connection_t **connections;
- u32 *connection_peekers;
- clib_spinlock_t *peekers_readers_locks;
- clib_spinlock_t *peekers_write_locks;
+
+ udp_worker_t *wrk;
udp_connection_t *listener_pool;
+ /* Refcounts for ports consumed by udp transports to handle
+ * both passive and active opens using the same port */
+ u16 *transport_ports_refcnt[N_UDP_AF];
+
u16 default_mtu;
u16 msg_id_base;
+ u8 csum_offload;
u8 icmp_send_unreachable_disabled;
} udp_main_t;
@@ -131,16 +163,26 @@ extern vlib_node_registration_t udp4_input_node;
extern vlib_node_registration_t udp6_input_node;
extern vlib_node_registration_t udp4_local_node;
extern vlib_node_registration_t udp6_local_node;
+extern vlib_node_registration_t udp4_output_node;
+extern vlib_node_registration_t udp6_output_node;
void udp_add_dst_port (udp_main_t * um, udp_dst_port_t dst_port,
char *dst_port_name, u8 is_ip4);
+always_inline udp_worker_t *
+udp_worker_get (u32 thread_index)
+{
+ return vec_elt_at_index (udp_main.wrk, thread_index);
+}
+
always_inline udp_connection_t *
udp_connection_get (u32 conn_index, u32 thread_index)
{
- if (pool_is_free_index (udp_main.connections[thread_index], conn_index))
+ udp_worker_t *wrk = udp_worker_get (thread_index);
+
+ if (pool_is_free_index (wrk->connections, conn_index))
return 0;
- return pool_elt_at_index (udp_main.connections[thread_index], conn_index);
+ return pool_elt_at_index (wrk->connections, conn_index);
}
always_inline udp_connection_t *
@@ -161,65 +203,24 @@ udp_connection_from_transport (transport_connection_t * tc)
return ((udp_connection_t *) tc);
}
-always_inline u32
-udp_connection_index (udp_connection_t * uc)
-{
- return (uc - udp_main.connections[uc->c_thread_index]);
-}
-
void udp_connection_free (udp_connection_t * uc);
udp_connection_t *udp_connection_alloc (u32 thread_index);
-
-/**
- * Acquires a lock that blocks a connection pool from expanding.
- */
-always_inline void
-udp_pool_add_peeker (u32 thread_index)
-{
- if (thread_index != vlib_get_thread_index ())
- return;
- clib_spinlock_lock_if_init (&udp_main.peekers_readers_locks[thread_index]);
- udp_main.connection_peekers[thread_index] += 1;
- if (udp_main.connection_peekers[thread_index] == 1)
- clib_spinlock_lock_if_init (&udp_main.peekers_write_locks[thread_index]);
- clib_spinlock_unlock_if_init (&udp_main.peekers_readers_locks
- [thread_index]);
-}
-
-always_inline void
-udp_pool_remove_peeker (u32 thread_index)
-{
- if (thread_index != vlib_get_thread_index ())
- return;
- ASSERT (udp_main.connection_peekers[thread_index] > 0);
- clib_spinlock_lock_if_init (&udp_main.peekers_readers_locks[thread_index]);
- udp_main.connection_peekers[thread_index] -= 1;
- if (udp_main.connection_peekers[thread_index] == 0)
- clib_spinlock_unlock_if_init (&udp_main.peekers_write_locks
- [thread_index]);
- clib_spinlock_unlock_if_init (&udp_main.peekers_readers_locks
- [thread_index]);
-}
+void udp_connection_share_port (u16 lcl_port, u8 is_ip4);
always_inline udp_connection_t *
udp_connection_clone_safe (u32 connection_index, u32 thread_index)
{
+ u32 current_thread_index = vlib_get_thread_index (), new_index;
udp_connection_t *old_c, *new_c;
- u32 current_thread_index = vlib_get_thread_index ();
- new_c = udp_connection_alloc (current_thread_index);
- /* If during the memcpy pool is reallocated AND the memory allocator
- * decides to give the old chunk of memory to somebody in a hurry to
- * scribble something on it, we have a problem. So add this thread as
- * a session pool peeker.
- */
- udp_pool_add_peeker (thread_index);
- old_c = udp_main.connections[thread_index] + connection_index;
+ new_c = udp_connection_alloc (current_thread_index);
+ new_index = new_c->c_c_index;
+ /* Connection pool always realloced with barrier */
+ old_c = udp_main.wrk[thread_index].connections + connection_index;
clib_memcpy_fast (new_c, old_c, sizeof (*new_c));
old_c->flags |= UDP_CONN_F_MIGRATED;
- udp_pool_remove_peeker (thread_index);
new_c->c_thread_index = current_thread_index;
- new_c->c_c_index = udp_connection_index (new_c);
+ new_c->c_c_index = new_index;
new_c->c_fib_index = old_c->c_fib_index;
/* Assume cloned sessions don't need lock */
new_c->rx_lock = 0;
@@ -239,8 +240,6 @@ format_function_t format_udp_connection;
unformat_function_t unformat_udp_header;
unformat_function_t unformat_udp_port;
-void udp_connection_share_port (u16 lcl_port, u8 is_ip4);
-
void udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
/*
diff --git a/src/vnet/udp/udp_api.c b/src/vnet/udp/udp_api.c
index 0f2d014946f..1f952aa36ea 100644
--- a/src/vnet/udp/udp_api.c
+++ b/src/vnet/udp/udp_api.c
@@ -86,12 +86,10 @@ vl_api_udp_encap_dump_t_handler (vl_api_udp_encap_dump_t *mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (ue, udp_encap_pool)
{
send_udp_encap_details(ue, reg, mp->context);
}
- /* *INDENT-ON* */
}
static void
@@ -99,6 +97,7 @@ vl_api_udp_encap_add_t_handler (vl_api_udp_encap_add_t *mp)
{
vl_api_udp_encap_add_reply_t *rmp;
ip46_address_t src_ip, dst_ip;
+ udp_encap_fixup_flags_t flags;
u32 fib_index, table_id;
fib_protocol_t fproto;
ip46_type_t itype;
@@ -119,19 +118,19 @@ vl_api_udp_encap_add_t_handler (vl_api_udp_encap_add_t *mp)
goto done;
}
- uei = udp_encap_add_and_lock (fproto, fib_index,
- &src_ip, &dst_ip,
+ flags = UDP_ENCAP_FIXUP_NONE;
+ if (mp->udp_encap.src_port == 0)
+ flags |= UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY;
+
+ uei = udp_encap_add_and_lock (fproto, fib_index, &src_ip, &dst_ip,
ntohs (mp->udp_encap.src_port),
- ntohs (mp->udp_encap.dst_port),
- UDP_ENCAP_FIXUP_NONE);
+ ntohs (mp->udp_encap.dst_port), flags);
done:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_UDP_ENCAP_ADD_REPLY,
({
rmp->id = ntohl (uei);
}));
- /* *INDENT-ON* */
}
@@ -189,11 +188,19 @@ vl_api_udp_decap_add_del_t_handler (vl_api_udp_decap_add_del_t *mp)
static clib_error_t *
udp_api_hookup (vlib_main_t * vm)
{
+ api_main_t *am = vlibapi_get_main ();
+
/*
* Set up the (msg_name, crc, message-id) table
*/
REPLY_MSG_ID_BASE = setup_message_id_table ();
+ /* Mark these APIs as mp safe */
+ vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_UDP_ENCAP_ADD, 1);
+ vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_UDP_ENCAP_DEL, 1);
+ vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_UDP_ENCAP_DUMP,
+ 1);
+
return 0;
}
diff --git a/src/vnet/udp/udp_cli.c b/src/vnet/udp/udp_cli.c
index 97760f4c4f8..6c8992cd0de 100644
--- a/src/vnet/udp/udp_cli.c
+++ b/src/vnet/udp/udp_cli.c
@@ -13,6 +13,9 @@
* limitations under the License.
*/
+#include <vppinfra/error.h>
+#include <vppinfra/format.h>
+#include <vppinfra/format_table.h>
#include <vnet/udp/udp.h>
#include <vnet/session/session_types.h>
@@ -35,6 +38,33 @@ format_udp_connection_id (u8 * s, va_list * args)
return s;
}
+static const char *udp_cfg_flags_str[] = {
+#define _(sym, str) str,
+ foreach_udp_cfg_flag
+#undef _
+};
+
+static u8 *
+format_udp_cfg_flags (u8 *s, va_list *args)
+{
+ udp_connection_t *tc = va_arg (*args, udp_connection_t *);
+ int i, last = -1;
+
+ for (i = 0; i < UDP_CFG_N_FLAG_BITS; i++)
+ if (tc->cfg_flags & (1 << i))
+ last = i;
+ if (last >= 0)
+ s = format (s, " cfg: ");
+ for (i = 0; i < last; i++)
+ {
+ if (tc->cfg_flags & (1 << i))
+ s = format (s, "%s, ", udp_cfg_flags_str[i]);
+ }
+ if (last >= 0)
+ s = format (s, "%s", udp_cfg_flags_str[last]);
+ return s;
+}
+
static const char *udp_connection_flags_str[] = {
#define _(sym, str) str,
foreach_udp_connection_flag
@@ -64,11 +94,15 @@ static u8 *
format_udp_vars (u8 * s, va_list * args)
{
udp_connection_t *uc = va_arg (*args, udp_connection_t *);
- s = format (s, " index %u flags: %U", uc->c_c_index,
- format_udp_connection_flags, uc);
+ s = format (s, " index %u%U flags: %U\n", uc->c_c_index,
+ format_udp_cfg_flags, uc, format_udp_connection_flags, uc);
+ s = format (s, " fib_index: %u next_node: %u opaque: %u ", uc->c_fib_index);
if (!(uc->flags & UDP_CONN_F_LISTEN))
+ s = format (s, " sw_if_index: %d mss: %u\n", uc->sw_if_index, uc->mss);
+ else
s = format (s, "\n");
+
return s;
}
@@ -102,6 +136,8 @@ udp_config_fn (vlib_main_t * vm, unformat_input_t * input)
um->default_mtu = tmp;
else if (unformat (input, "icmp-unreachable-disabled"))
um->icmp_send_unreachable_disabled = 1;
+ else if (unformat (input, "no-csum-offload"))
+ um->csum_offload = 0;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
@@ -151,7 +187,7 @@ show_udp_punt_fn (vlib_main_t * vm, unformat_input_t * input,
u8 *s = NULL;
vec_foreach (port_info, um->dst_port_infos[UDP_IP6])
{
- if (udp_is_valid_dst_port (port_info->dst_port, 01))
+ if (udp_is_valid_dst_port (port_info->dst_port, 0))
{
s = format (s, (!s) ? "%d" : ", %d", port_info->dst_port);
}
@@ -162,14 +198,199 @@ show_udp_punt_fn (vlib_main_t * vm, unformat_input_t * input,
return (error);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_tcp_punt_command, static) =
{
.path = "show udp punt",
.short_help = "show udp punt [ipv4|ipv6]",
.function = show_udp_punt_fn,
};
-/* *INDENT-ON* */
+
+static void
+table_format_udp_port_ (vlib_main_t *vm, udp_main_t *um, table_t *t, int *c,
+ int port, int bind, int is_ip4)
+{
+ const udp_dst_port_info_t *pi;
+
+ if (bind && !udp_is_valid_dst_port (port, is_ip4))
+ return;
+
+ pi = udp_get_dst_port_info (um, port, is_ip4);
+ if (!pi)
+ return;
+
+ table_format_cell (t, *c, 0, "%d", pi->dst_port);
+ table_format_cell (t, *c, 1, is_ip4 ? "ip4" : "ip6");
+ table_format_cell (t, *c, 2, ~0 == pi->node_index ? "none" : "%U",
+ format_vlib_node_name, vm, pi->node_index);
+ table_format_cell (t, *c, 3, "%s", pi->name);
+
+ (*c)++;
+}
+
+static void
+table_format_udp_port (vlib_main_t *vm, udp_main_t *um, table_t *t, int *c,
+ int port, int bind, int ip4, int ip6)
+{
+ if (ip4)
+ table_format_udp_port_ (vm, um, t, c, port, bind, 1 /* is_ip4 */);
+ if (ip6)
+ table_format_udp_port_ (vm, um, t, c, port, bind, 0 /* is_ip4 */);
+}
+
+static clib_error_t *
+show_udp_ports (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ table_t table = {}, *t = &table;
+ udp_main_t *um = &udp_main;
+ clib_error_t *err = 0;
+ int ip4 = 1, ip6 = 1;
+ int port = -1;
+ int bind = 1;
+ int c = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "ip4"))
+ ip6 = 0;
+ else if (unformat (input, "ip6"))
+ ip4 = 0;
+ else if (unformat (input, "bind"))
+ bind = 1;
+ else if (unformat (input, "all"))
+ bind = 0;
+ else if (unformat (input, "%d", &port))
+ ;
+ else
+ {
+ err = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto out;
+ }
+ }
+
+ table_add_header_col (t, 4, "port", "proto", "node", "desc");
+
+ if (port > 65535)
+ {
+ err = clib_error_return (0, "wrong port %d", port);
+ goto out;
+ }
+ else if (port < 0)
+ {
+ for (port = 0; port < 65536; port++)
+ table_format_udp_port (vm, um, t, &c, port, bind, ip4, ip6);
+ }
+ else
+ {
+ table_format_udp_port (vm, um, t, &c, port, bind, ip4, ip6);
+ }
+
+ vlib_cli_output (vm, "%U", format_table, t);
+
+out:
+ table_free (t);
+ return err;
+}
+
+VLIB_CLI_COMMAND (show_udp_ports_cmd, static) = {
+ .path = "show udp ports",
+ .function = show_udp_ports,
+ .short_help = "show udp ports [ip4|ip6] [bind|all|<port>]",
+ .is_mp_safe = 1,
+};
+
+static void
+table_format_udp_transport_port_ (vlib_main_t *vm, table_t *t, int *c,
+ int port, int is_ip4)
+{
+ udp_main_t *um = &udp_main;
+ u32 refcnt;
+ u16 port_ne;
+
+ port_ne = clib_host_to_net_u16 (port);
+ refcnt = um->transport_ports_refcnt[is_ip4][port_ne];
+ if (!refcnt)
+ return;
+
+ if (!udp_is_valid_dst_port (port, is_ip4))
+ {
+ clib_warning ("Port %u is not registered refcnt %u!", port, refcnt);
+ return;
+ }
+
+ table_format_cell (t, *c, 0, "%d", port);
+ table_format_cell (t, *c, 1, is_ip4 ? "ip4" : "ip6");
+ table_format_cell (t, *c, 2, "%d", refcnt);
+
+ (*c)++;
+}
+
+static void
+table_format_udp_transport_port (vlib_main_t *vm, table_t *t, int *c, int port,
+ int ipv)
+{
+ if (ipv == -1 || ipv == 0)
+ table_format_udp_transport_port_ (vm, t, c, port, 1 /* is_ip4 */);
+ if (ipv == -1 || ipv == 1)
+ table_format_udp_transport_port_ (vm, t, c, port, 0 /* is_ip4 */);
+}
+
+static clib_error_t *
+show_udp_transport_ports (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ table_t table = {}, *t = &table;
+ int ipv = -1, port = -1, c = 0;
+ clib_error_t *err = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "ip4"))
+ ipv = 0;
+ else if (unformat (input, "ip6"))
+ ipv = 1;
+ else if (unformat (input, "%d", &port))
+ ;
+ else
+ {
+ err = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto out;
+ }
+ }
+
+ table_add_header_col (t, 3, "port", "proto", "ref-cnt");
+
+ if (port > 65535)
+ {
+ err = clib_error_return (0, "wrong port %d", port);
+ goto out;
+ }
+
+ if (port < 0)
+ {
+ for (port = 0; port < 65536; port++)
+ table_format_udp_transport_port (vm, t, &c, port, ipv);
+ }
+ else
+ {
+ table_format_udp_transport_port (vm, t, &c, port, ipv);
+ }
+
+ vlib_cli_output (vm, "%U\n", format_table, t);
+
+out:
+ table_free (t);
+ return err;
+}
+
+VLIB_CLI_COMMAND (show_udp_transport_ports_cmd, static) = {
+ .path = "show udp transport ports",
+ .function = show_udp_transport_ports,
+ .short_help = "show udp transport ports [ip4|ip6] [<port>]",
+ .is_mp_safe = 1,
+};
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/udp/udp_encap.c b/src/vnet/udp/udp_encap.c
index cb93adb8d39..e4e5271da63 100644
--- a/src/vnet/udp/udp_encap.c
+++ b/src/vnet/udp/udp_encap.c
@@ -47,8 +47,7 @@ static void
udp_encap_restack (udp_encap_t * ue)
{
dpo_stack (udp_encap_dpo_types[ue->ue_ip_proto],
- fib_proto_to_dpo (ue->ue_ip_proto),
- &ue->ue_dpo,
+ fib_proto_to_dpo (ue->ue_ip_proto), &ue->ue_dpo,
fib_entry_contribute_ip_forwarding (ue->ue_fib_entry_index));
}
@@ -196,6 +195,20 @@ udp_encap_dpo_unlock (dpo_id_t * dpo)
fib_node_unlock (&ue->ue_fib_node);
}
+u8 *
+format_udp_encap_fixup_flags (u8 *s, va_list *args)
+{
+ udp_encap_fixup_flags_t flags = va_arg (*args, udp_encap_fixup_flags_t);
+
+ if (flags == UDP_ENCAP_FIXUP_NONE)
+ return format (s, "none");
+
+ if (flags & UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY)
+ s = format (s, "%s", "src-port-is-entropy");
+
+ return (s);
+}
+
static u8 *
format_udp_encap_i (u8 * s, va_list * args)
{
@@ -211,23 +224,21 @@ format_udp_encap_i (u8 * s, va_list * args)
s = format (s, "udp-encap:[%d]: ip-fib-index:%d ", uei, ue->ue_fib_index);
if (FIB_PROTOCOL_IP4 == ue->ue_ip_proto)
{
- s = format (s, "ip:[src:%U, dst:%U] udp:[src:%d, dst:%d]",
- format_ip4_address,
- &ue->ue_hdrs.ip4.ue_ip4.src_address,
- format_ip4_address,
- &ue->ue_hdrs.ip4.ue_ip4.dst_address,
+ s = format (s, "ip:[src:%U, dst:%U] udp:[src:%d, dst:%d] flags:%U",
+ format_ip4_address, &ue->ue_hdrs.ip4.ue_ip4.src_address,
+ format_ip4_address, &ue->ue_hdrs.ip4.ue_ip4.dst_address,
clib_net_to_host_u16 (ue->ue_hdrs.ip4.ue_udp.src_port),
- clib_net_to_host_u16 (ue->ue_hdrs.ip4.ue_udp.dst_port));
+ clib_net_to_host_u16 (ue->ue_hdrs.ip4.ue_udp.dst_port),
+ format_udp_encap_fixup_flags, ue->ue_flags);
}
else
{
- s = format (s, "ip:[src:%U, dst:%U] udp:[src:%d dst:%d]",
- format_ip6_address,
- &ue->ue_hdrs.ip6.ue_ip6.src_address,
- format_ip6_address,
- &ue->ue_hdrs.ip6.ue_ip6.dst_address,
+ s = format (s, "ip:[src:%U, dst:%U] udp:[src:%d dst:%d] flags:%U",
+ format_ip6_address, &ue->ue_hdrs.ip6.ue_ip6.src_address,
+ format_ip6_address, &ue->ue_hdrs.ip6.ue_ip6.dst_address,
clib_net_to_host_u16 (ue->ue_hdrs.ip6.ue_udp.src_port),
- clib_net_to_host_u16 (ue->ue_hdrs.ip6.ue_udp.dst_port));
+ clib_net_to_host_u16 (ue->ue_hdrs.ip6.ue_udp.dst_port),
+ format_udp_encap_fixup_flags, ue->ue_flags);
}
vlib_get_combined_counter (&(udp_encap_counters), uei, &to);
s = format (s, " to:[%Ld:%Ld]]", to.packets, to.bytes);
@@ -325,12 +336,12 @@ udp_encap_fib_last_lock_gone (fib_node_t * node)
}
const static char *const udp4_encap_ip4_nodes[] = {
- "udp4-encap",
+ "udp4o4-encap",
NULL,
};
const static char *const udp4_encap_ip6_nodes[] = {
- "udp4-encap",
+ "udp6o4-encap",
NULL,
};
@@ -345,12 +356,12 @@ const static char *const udp4_encap_bier_nodes[] = {
};
const static char *const udp6_encap_ip4_nodes[] = {
- "udp6-encap",
+ "udp4o6-encap",
NULL,
};
const static char *const udp6_encap_ip6_nodes[] = {
- "udp6-encap",
+ "udp6o6-encap",
NULL,
};
@@ -507,13 +518,11 @@ udp_encap_walk (udp_encap_walk_cb_t cb, void *ctx)
{
index_t uei;
- /* *INDENT-OFF* */
pool_foreach_index (uei, udp_encap_pool)
{
if (WALK_STOP == cb(uei, ctx))
break;
}
- /* *INDENT-ON* */
}
clib_error_t *
@@ -536,12 +545,10 @@ udp_encap_show (vlib_main_t * vm,
if (INDEX_INVALID == uei)
{
- /* *INDENT-OFF* */
pool_foreach_index (uei, udp_encap_pool)
{
vlib_cli_output(vm, "%U", format_udp_encap, uei, 0);
}
- /* *INDENT-ON* */
}
else
{
@@ -551,20 +558,20 @@ udp_encap_show (vlib_main_t * vm,
return NULL;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (udp_encap_add_command, static) = {
.path = "udp encap",
- .short_help = "udp encap [add|del] <id ID> <src-ip> <dst-ip> [<src-port>] <dst-port> [src-port-is-entropy] [table-id <table>]",
+ .short_help = "udp encap [add|del] <id ID> <src-ip> <dst-ip> [<src-port>] "
+ "<dst-port> [src-port-is-entropy] [table-id <table>]",
.function = udp_encap_cli,
.is_mp_safe = 1,
};
+
VLIB_CLI_COMMAND (udp_encap_show_command, static) = {
.path = "show udp encap",
.short_help = "show udp encap [ID]",
.function = udp_encap_show,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/udp/udp_encap.h b/src/vnet/udp/udp_encap.h
index b096e0f5c09..c8b42ffa92c 100644
--- a/src/vnet/udp/udp_encap.h
+++ b/src/vnet/udp/udp_encap.h
@@ -85,7 +85,7 @@ typedef struct udp_encap_t_
/**
* The second cacheline contains control-plane data
*/
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
/**
* linkage into the FIB graph
@@ -115,6 +115,7 @@ extern index_t udp_encap_add_and_lock (fib_protocol_t proto,
extern void udp_encap_lock (index_t uei);
extern void udp_encap_unlock (index_t uei);
extern u8 *format_udp_encap (u8 * s, va_list * args);
+extern u8 *format_udp_encap_fixup_flags (u8 *s, va_list *args);
extern void udp_encap_contribute_forwarding (index_t uei,
dpo_proto_t proto,
dpo_id_t * dpo);
diff --git a/src/vnet/udp/udp_encap_node.c b/src/vnet/udp/udp_encap_node.c
index 5b9fc0bf34b..a86614f5475 100644
--- a/src/vnet/udp/udp_encap_node.c
+++ b/src/vnet/udp/udp_encap_node.c
@@ -20,12 +20,16 @@ typedef struct udp4_encap_trace_t_
{
udp_header_t udp;
ip4_header_t ip;
+ u32 flow_hash;
+ udp_encap_fixup_flags_t flags;
} udp4_encap_trace_t;
typedef struct udp6_encap_trace_t_
{
udp_header_t udp;
ip6_header_t ip;
+ u32 flow_hash;
+ udp_encap_fixup_flags_t flags;
} udp6_encap_trace_t;
extern vlib_combined_counter_main_t udp_encap_counters;
@@ -35,13 +39,16 @@ format_udp4_encap_trace (u8 * s, va_list * args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ u32 indent = format_get_indent (s);
udp4_encap_trace_t *t;
t = va_arg (*args, udp4_encap_trace_t *);
- s = format (s, "%U\n %U",
- format_ip4_header, &t->ip, sizeof (t->ip),
- format_udp_header, &t->udp, sizeof (t->udp));
+ s = format (s, "flags: %U, flow hash: 0x%08x\n%U%U\n%U%U",
+ format_udp_encap_fixup_flags, t->flags, t->flow_hash,
+ format_white_space, indent, format_ip4_header, &t->ip,
+ sizeof (t->ip), format_white_space, indent, format_udp_header,
+ &t->udp, sizeof (t->udp));
return (s);
}
@@ -50,20 +57,23 @@ format_udp6_encap_trace (u8 * s, va_list * args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ u32 indent = format_get_indent (s);
udp6_encap_trace_t *t;
t = va_arg (*args, udp6_encap_trace_t *);
- s = format (s, "%U\n %U",
- format_ip6_header, &t->ip, sizeof (t->ip),
- format_udp_header, &t->udp, sizeof (t->udp));
+ s = format (s, "flags: %U, flow hash: 0x%08x\n%U%U\n%U%U",
+ format_udp_encap_fixup_flags, t->flags, t->flow_hash,
+ format_white_space, indent, format_ip6_header, &t->ip,
+ sizeof (t->ip), format_white_space, indent, format_udp_header,
+ &t->udp, sizeof (t->udp));
return (s);
}
always_inline uword
-udp_encap_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, int is_encap_v6)
+udp_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, ip_address_family_t encap_family,
+ ip_address_family_t payload_family)
{
vlib_combined_counter_main_t *cm = &udp_encap_counters;
u32 *from = vlib_frame_vector_args (frame);
@@ -121,18 +131,22 @@ udp_encap_inline (vlib_main_t * vm,
ue1 = udp_encap_get (uei1);
/* Paint */
- if (is_encap_v6)
+ if (encap_family == AF_IP6)
{
const u8 n_bytes =
sizeof (udp_header_t) + sizeof (ip6_header_t);
- ip_udp_encap_two (vm, b0, b1, (u8 *) & ue0->ue_hdrs,
- (u8 *) & ue1->ue_hdrs, n_bytes, 0);
+ ip_udp_encap_two (vm, b0, b1, (u8 *) &ue0->ue_hdrs,
+ (u8 *) &ue1->ue_hdrs, n_bytes, encap_family,
+ payload_family, ue0->ue_flags, ue1->ue_flags);
+
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
udp6_encap_trace_t *tr =
vlib_add_trace (vm, node, b0, sizeof (*tr));
tr->udp = ue0->ue_hdrs.ip6.ue_udp;
tr->ip = ue0->ue_hdrs.ip6.ue_ip6;
+ tr->flags = ue0->ue_flags;
+ tr->flow_hash = vnet_buffer (b0)->ip.flow_hash;
}
if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -140,6 +154,8 @@ udp_encap_inline (vlib_main_t * vm,
vlib_add_trace (vm, node, b1, sizeof (*tr));
tr->udp = ue1->ue_hdrs.ip6.ue_udp;
tr->ip = ue1->ue_hdrs.ip6.ue_ip6;
+ tr->flags = ue1->ue_flags;
+ tr->flow_hash = vnet_buffer (b1)->ip.flow_hash;
}
}
else
@@ -147,9 +163,9 @@ udp_encap_inline (vlib_main_t * vm,
const u8 n_bytes =
sizeof (udp_header_t) + sizeof (ip4_header_t);
- ip_udp_encap_two (vm, b0, b1,
- (u8 *) & ue0->ue_hdrs,
- (u8 *) & ue1->ue_hdrs, n_bytes, 1);
+ ip_udp_encap_two (vm, b0, b1, (u8 *) &ue0->ue_hdrs,
+ (u8 *) &ue1->ue_hdrs, n_bytes, encap_family,
+ payload_family, ue0->ue_flags, ue1->ue_flags);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -157,6 +173,8 @@ udp_encap_inline (vlib_main_t * vm,
vlib_add_trace (vm, node, b0, sizeof (*tr));
tr->udp = ue0->ue_hdrs.ip4.ue_udp;
tr->ip = ue0->ue_hdrs.ip4.ue_ip4;
+ tr->flags = ue0->ue_flags;
+ tr->flow_hash = vnet_buffer (b0)->ip.flow_hash;
}
if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -164,6 +182,8 @@ udp_encap_inline (vlib_main_t * vm,
vlib_add_trace (vm, node, b1, sizeof (*tr));
tr->udp = ue1->ue_hdrs.ip4.ue_udp;
tr->ip = ue1->ue_hdrs.ip4.ue_ip4;
+ tr->flags = ue1->ue_flags;
+ tr->flow_hash = vnet_buffer (b1)->ip.flow_hash;
}
}
@@ -202,12 +222,12 @@ udp_encap_inline (vlib_main_t * vm,
b0));
/* Paint */
- if (is_encap_v6)
+ if (encap_family == AF_IP6)
{
const u8 n_bytes =
sizeof (udp_header_t) + sizeof (ip6_header_t);
- ip_udp_encap_one (vm, b0, (u8 *) & ue0->ue_hdrs.ip6, n_bytes,
- 0);
+ ip_udp_encap_one (vm, b0, (u8 *) &ue0->ue_hdrs.ip6, n_bytes,
+ encap_family, payload_family, ue0->ue_flags);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -215,6 +235,8 @@ udp_encap_inline (vlib_main_t * vm,
vlib_add_trace (vm, node, b0, sizeof (*tr));
tr->udp = ue0->ue_hdrs.ip6.ue_udp;
tr->ip = ue0->ue_hdrs.ip6.ue_ip6;
+ tr->flags = ue0->ue_flags;
+ tr->flow_hash = vnet_buffer (b0)->ip.flow_hash;
}
}
else
@@ -222,8 +244,8 @@ udp_encap_inline (vlib_main_t * vm,
const u8 n_bytes =
sizeof (udp_header_t) + sizeof (ip4_header_t);
- ip_udp_encap_one (vm, b0, (u8 *) & ue0->ue_hdrs.ip4, n_bytes,
- 1);
+ ip_udp_encap_one (vm, b0, (u8 *) &ue0->ue_hdrs.ip4, n_bytes,
+ encap_family, payload_family, ue0->ue_flags);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -231,6 +253,8 @@ udp_encap_inline (vlib_main_t * vm,
vlib_add_trace (vm, node, b0, sizeof (*tr));
tr->udp = ue0->ue_hdrs.ip4.ue_udp;
tr->ip = ue0->ue_hdrs.ip4.ue_ip4;
+ tr->flags = ue0->ue_flags;
+ tr->flow_hash = vnet_buffer (b0)->ip.flow_hash;
}
}
@@ -248,39 +272,87 @@ udp_encap_inline (vlib_main_t * vm,
return frame->n_vectors;
}
-VLIB_NODE_FN (udp4_encap_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (udp4o4_encap_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return udp_encap_inline (vm, node, frame, AF_IP4, AF_IP4);
+}
+
+VLIB_NODE_FN (udp6o4_encap_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return udp_encap_inline (vm, node, frame, AF_IP4, AF_IP6);
+}
+
+VLIB_NODE_FN (udp4_encap_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return udp_encap_inline (vm, node, frame, AF_IP4, N_AF);
+}
+
+VLIB_NODE_FN (udp6o6_encap_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- return udp_encap_inline (vm, node, frame, 0);
+ return udp_encap_inline (vm, node, frame, AF_IP6, AF_IP6);
}
-VLIB_NODE_FN (udp6_encap_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (udp4o6_encap_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- return udp_encap_inline (vm, node, frame, 1);
+ return udp_encap_inline (vm, node, frame, AF_IP6, AF_IP4);
}
-/* *INDENT-OFF* */
+VLIB_NODE_FN (udp6_encap_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return udp_encap_inline (vm, node, frame, AF_IP6, N_AF);
+}
+
+VLIB_REGISTER_NODE (udp4o4_encap_node) = {
+ .name = "udp4o4-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_udp4_encap_trace,
+ .n_next_nodes = 0,
+};
+
+VLIB_REGISTER_NODE (udp6o4_encap_node) = {
+ .name = "udp6o4-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_udp4_encap_trace,
+ .n_next_nodes = 0,
+ .sibling_of = "udp4o4-encap",
+};
+
VLIB_REGISTER_NODE (udp4_encap_node) = {
.name = "udp4-encap",
.vector_size = sizeof (u32),
-
.format_trace = format_udp4_encap_trace,
+ .n_next_nodes = 0,
+ .sibling_of = "udp4o4-encap",
+};
+VLIB_REGISTER_NODE (udp6o6_encap_node) = {
+ .name = "udp6o6-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_udp6_encap_trace,
+ .n_next_nodes = 0,
+};
+
+VLIB_REGISTER_NODE (udp4o6_encap_node) = {
+ .name = "udp4o6-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_udp6_encap_trace,
.n_next_nodes = 0,
+ .sibling_of = "udp6o6-encap",
};
VLIB_REGISTER_NODE (udp6_encap_node) = {
.name = "udp6-encap",
.vector_size = sizeof (u32),
-
.format_trace = format_udp6_encap_trace,
-
.n_next_nodes = 0,
+ .sibling_of = "udp6o6-encap",
};
-/* *INDENT-ON* */
/*
diff --git a/src/vnet/udp/udp_error.def b/src/vnet/udp/udp_error.def
index 178d5c96b2c..ef19970ce72 100644
--- a/src/vnet/udp/udp_error.def
+++ b/src/vnet/udp/udp_error.def
@@ -21,7 +21,10 @@ udp_error (LENGTH_ERROR, length_error, ERROR, "Packets with length errors")
udp_error (PUNT, punt, ERROR, "No listener punt")
udp_error (ENQUEUED, enqueued, INFO, "Packets enqueued")
udp_error (FIFO_FULL, fifo_full, ERROR, "Fifo full")
+udp_error (FIFO_NOMEM, fifo_nomem, ERROR, "Fifo no mem")
udp_error (NOT_READY, not_ready, ERROR, "Connection not ready")
udp_error (ACCEPT, accept, INFO, "Accepted session")
udp_error (CREATE_SESSION, create_session, ERROR, "Failed to create session")
udp_error (MQ_FULL, mq_full, ERROR, "Application msg queue full")
+udp_error (INVALID_CONNECTION, invalid_connection, ERROR, "Invalid connection")
+udp_error (PKTS_SENT, pkts_sent, INFO, "Packets sent")
diff --git a/src/vnet/udp/udp_inlines.h b/src/vnet/udp/udp_inlines.h
index e4eb0c88e83..ceec0b191b1 100644
--- a/src/vnet/udp/udp_inlines.h
+++ b/src/vnet/udp/udp_inlines.h
@@ -21,9 +21,12 @@
#include <vnet/ip/ip6.h>
#include <vnet/udp/udp_packet.h>
#include <vnet/interface_output.h>
+#include <vnet/ip/ip4_inlines.h>
+#include <vnet/ip/ip6_inlines.h>
+#include <vnet/udp/udp_encap.h>
always_inline void *
-vlib_buffer_push_udp (vlib_buffer_t * b, u16 sp, u16 dp, u8 offload_csum)
+vlib_buffer_push_udp (vlib_buffer_t *b, u16 sp, u16 dp)
{
udp_header_t *uh;
u16 udp_len = sizeof (udp_header_t) + b->current_length;
@@ -35,15 +38,44 @@ vlib_buffer_push_udp (vlib_buffer_t * b, u16 sp, u16 dp, u8 offload_csum)
uh->dst_port = dp;
uh->checksum = 0;
uh->length = clib_host_to_net_u16 (udp_len);
- if (offload_csum)
- vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_UDP_CKSUM);
vnet_buffer (b)->l4_hdr_offset = (u8 *) uh - b->data;
b->flags |= VNET_BUFFER_F_L4_HDR_OFFSET_VALID;
return uh;
}
+/*
+ * Encode udp source port entropy value per
+ * https://datatracker.ietf.org/doc/html/rfc7510#section-3
+ */
+always_inline u16
+ip_udp_sport_entropy (vlib_buffer_t *b0)
+{
+ u16 port = clib_host_to_net_u16 (0x03 << 14);
+ port |= vnet_buffer (b0)->ip.flow_hash & 0xffff;
+ return port;
+}
+
+always_inline u32
+ip_udp_compute_flow_hash (vlib_buffer_t *b0, u8 is_ip4)
+{
+ ip4_header_t *ip4;
+ ip6_header_t *ip6;
+
+ if (is_ip4)
+ {
+ ip4 = (ip4_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset);
+ return ip4_compute_flow_hash (ip4, IP_FLOW_HASH_DEFAULT);
+ }
+ else
+ {
+ ip6 = (ip6_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset);
+ return ip6_compute_flow_hash (ip6, IP_FLOW_HASH_DEFAULT);
+ }
+}
+
always_inline void
-ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4)
+ip_udp_fixup_one (vlib_main_t *vm, vlib_buffer_t *b0, u8 is_ip4,
+ u8 sport_entropy)
{
u16 new_l0;
udp_header_t *udp0;
@@ -71,6 +103,9 @@ ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4)
new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
- sizeof (*ip0));
udp0->length = new_l0;
+
+ if (sport_entropy)
+ udp0->src_port = ip_udp_sport_entropy (b0);
}
else
{
@@ -87,6 +122,9 @@ ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4)
udp0 = (udp_header_t *) (ip0 + 1);
udp0->length = new_l0;
+ if (sport_entropy)
+ udp0->src_port = ip_udp_sport_entropy (b0);
+
udp0->checksum =
ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0);
ASSERT (bogus0 == 0);
@@ -97,14 +135,27 @@ ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4)
}
always_inline void
-ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len,
- u8 is_ip4)
+ip_udp_encap_one (vlib_main_t *vm, vlib_buffer_t *b0, u8 *ec0, word ec_len,
+ ip_address_family_t encap_family,
+ ip_address_family_t payload_family,
+ udp_encap_fixup_flags_t flags)
{
- vnet_calc_checksums_inline (vm, b0, is_ip4, !is_ip4);
+ u8 sport_entropy = (flags & UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY) != 0;
+
+ if (payload_family < N_AF)
+ {
+ vnet_calc_checksums_inline (vm, b0, payload_family == AF_IP4,
+ payload_family == AF_IP6);
+
+ /* Сalculate flow hash to be used for entropy */
+ if (sport_entropy && 0 == vnet_buffer (b0)->ip.flow_hash)
+ vnet_buffer (b0)->ip.flow_hash =
+ ip_udp_compute_flow_hash (b0, payload_family == AF_IP4);
+ }
vlib_buffer_advance (b0, -ec_len);
- if (is_ip4)
+ if (encap_family == AF_IP4)
{
ip4_header_t *ip0;
@@ -112,7 +163,7 @@ ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len,
/* Apply the encap string. */
clib_memcpy_fast (ip0, ec0, ec_len);
- ip_udp_fixup_one (vm, b0, 1);
+ ip_udp_fixup_one (vm, b0, 1, sport_entropy);
}
else
{
@@ -122,26 +173,42 @@ ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len,
/* Apply the encap string. */
clib_memcpy_fast (ip0, ec0, ec_len);
- ip_udp_fixup_one (vm, b0, 0);
+ ip_udp_fixup_one (vm, b0, 0, sport_entropy);
}
}
always_inline void
-ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1,
- u8 * ec0, u8 * ec1, word ec_len, u8 is_v4)
+ip_udp_encap_two (vlib_main_t *vm, vlib_buffer_t *b0, vlib_buffer_t *b1,
+ u8 *ec0, u8 *ec1, word ec_len,
+ ip_address_family_t encap_family,
+ ip_address_family_t payload_family,
+ udp_encap_fixup_flags_t flags0,
+ udp_encap_fixup_flags_t flags1)
{
u16 new_l0, new_l1;
udp_header_t *udp0, *udp1;
+ int payload_ip4 = (payload_family == AF_IP4);
+ int sport_entropy0 = (flags0 & UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY) != 0;
+ int sport_entropy1 = (flags1 & UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY) != 0;
- ASSERT (_vec_len (ec0) == _vec_len (ec1));
-
- vnet_calc_checksums_inline (vm, b0, is_v4, !is_v4);
- vnet_calc_checksums_inline (vm, b1, is_v4, !is_v4);
+ if (payload_family < N_AF)
+ {
+ vnet_calc_checksums_inline (vm, b0, payload_ip4, !payload_ip4);
+ vnet_calc_checksums_inline (vm, b1, payload_ip4, !payload_ip4);
+
+ /* Сalculate flow hash to be used for entropy */
+ if (sport_entropy0 && 0 == vnet_buffer (b0)->ip.flow_hash)
+ vnet_buffer (b0)->ip.flow_hash =
+ ip_udp_compute_flow_hash (b0, payload_ip4);
+ if (sport_entropy1 && 0 == vnet_buffer (b1)->ip.flow_hash)
+ vnet_buffer (b1)->ip.flow_hash =
+ ip_udp_compute_flow_hash (b1, payload_ip4);
+ }
vlib_buffer_advance (b0, -ec_len);
vlib_buffer_advance (b1, -ec_len);
- if (is_v4)
+ if (encap_family == AF_IP4)
{
ip4_header_t *ip0, *ip1;
ip_csum_t sum0, sum1;
@@ -185,6 +252,11 @@ ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1,
sizeof (*ip1));
udp0->length = new_l0;
udp1->length = new_l1;
+
+ if (sport_entropy0)
+ udp0->src_port = ip_udp_sport_entropy (b0);
+ if (sport_entropy1)
+ udp1->src_port = ip_udp_sport_entropy (b1);
}
else
{
@@ -212,6 +284,11 @@ ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1,
udp0->length = new_l0;
udp1->length = new_l1;
+ if (sport_entropy0)
+ udp0->src_port = ip_udp_sport_entropy (b0);
+ if (sport_entropy1)
+ udp1->src_port = ip_udp_sport_entropy (b1);
+
udp0->checksum =
ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0);
udp1->checksum =
diff --git a/src/vnet/udp/udp_input.c b/src/vnet/udp/udp_input.c
index d14bdb8a298..a90461186c1 100644
--- a/src/vnet/udp/udp_input.c
+++ b/src/vnet/udp/udp_input.c
@@ -115,6 +115,7 @@ udp_connection_accept (udp_connection_t * listener, session_dgram_hdr_t * hdr,
uc->c_fib_index = listener->c_fib_index;
uc->mss = listener->mss;
uc->flags |= UDP_CONN_F_CONNECTED;
+ uc->cfg_flags = listener->cfg_flags;
if (session_dgram_accept (&uc->connection, listener->c_s_index,
listener->c_thread_index))
@@ -122,8 +123,8 @@ udp_connection_accept (udp_connection_t * listener, session_dgram_hdr_t * hdr,
udp_connection_free (uc);
return 0;
}
- udp_connection_share_port (clib_net_to_host_u16
- (uc->c_lcl_port), uc->c_is_ip4);
+
+ udp_connection_share_port (uc->c_lcl_port, uc->c_is_ip4);
return uc;
}
@@ -135,37 +136,46 @@ udp_connection_enqueue (udp_connection_t * uc0, session_t * s0,
int wrote0;
if (!(uc0->flags & UDP_CONN_F_CONNECTED))
- clib_spinlock_lock (&uc0->rx_lock);
+ {
+ clib_spinlock_lock (&uc0->rx_lock);
+
+ wrote0 = session_enqueue_dgram_connection_cl (
+ s0, hdr0, b, TRANSPORT_PROTO_UDP, queue_event);
+
+ clib_spinlock_unlock (&uc0->rx_lock);
+
+ /* Expect cl udp enqueue to fail because fifo enqueue */
+ if (PREDICT_FALSE (wrote0 == 0))
+ *error0 = UDP_ERROR_FIFO_FULL;
+
+ return;
+ }
if (svm_fifo_max_enqueue_prod (s0->rx_fifo)
< hdr0->data_length + sizeof (session_dgram_hdr_t))
{
*error0 = UDP_ERROR_FIFO_FULL;
- goto unlock_rx_lock;
+ return;
}
/* If session is owned by another thread and rx event needed,
* enqueue event now while we still have the peeker lock */
if (s0->thread_index != thread_index)
{
- wrote0 = session_enqueue_dgram_connection (s0, hdr0, b,
- TRANSPORT_PROTO_UDP,
- /* queue event */ 0);
- if (queue_event && !svm_fifo_has_event (s0->rx_fifo))
- session_enqueue_notify (s0);
+ wrote0 = session_enqueue_dgram_connection2 (
+ s0, hdr0, b, TRANSPORT_PROTO_UDP,
+ queue_event && !svm_fifo_has_event (s0->rx_fifo));
}
else
{
- wrote0 = session_enqueue_dgram_connection (s0, hdr0, b,
- TRANSPORT_PROTO_UDP,
- queue_event);
+ wrote0 = session_enqueue_dgram_connection (
+ s0, hdr0, b, TRANSPORT_PROTO_UDP, queue_event);
}
- ASSERT (wrote0 > 0);
-
-unlock_rx_lock:
- if (!(uc0->flags & UDP_CONN_F_CONNECTED))
- clib_spinlock_unlock (&uc0->rx_lock);
+ /* In some rare cases, session_enqueue_dgram_connection can fail because a
+ * chunk cannot be allocated in the RX FIFO */
+ if (PREDICT_FALSE (wrote0 == 0))
+ *error0 = UDP_ERROR_FIFO_NOMEM;
}
always_inline session_t *
@@ -184,6 +194,7 @@ udp_parse_and_lookup_buffer (vlib_buffer_t * b, session_dgram_hdr_t * hdr,
hdr->lcl_port = udp->dst_port;
hdr->rmt_port = udp->src_port;
hdr->is_ip4 = is_ip4;
+ hdr->gso_size = 0;
if (is_ip4)
{
@@ -213,6 +224,10 @@ udp_parse_and_lookup_buffer (vlib_buffer_t * b, session_dgram_hdr_t * hdr,
udp->src_port, TRANSPORT_PROTO_UDP);
}
+ /* Set the sw_if_index[VLIB_RX] to the interface we received
+ * the connection on (the local interface) */
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->ip.rx_sw_if_index;
+
if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT)))
b->current_length = hdr->data_length;
else
@@ -226,10 +241,9 @@ always_inline uword
udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame, u8 is_ip4)
{
- u32 n_left_from, *from, errors, *first_buffer;
+ u32 thread_index = vm->thread_index, n_left_from, *from, *first_buffer;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u16 err_counters[UDP_N_ERROR] = { 0 };
- u32 thread_index = vm->thread_index;
from = first_buffer = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -251,15 +265,11 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
goto done;
}
- /*
- * If session exists pool peeker lock is taken at this point unless
- * the session is already on the right thread or is a listener
- */
-
if (s0->session_state == SESSION_STATE_OPENED)
{
u8 queue_event = 1;
uc0 = udp_connection_from_transport (session_get_transport (s0));
+ uc0->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
if (uc0->flags & UDP_CONN_F_CONNECTED)
{
if (s0->thread_index != thread_index)
@@ -273,10 +283,8 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
ASSERT (s0->session_index == uc0->c_s_index);
/*
- * Drop the peeker lock on pool resize and ask session
- * layer for a new session.
+ * Ask session layer for a new session.
*/
- session_pool_remove_peeker (s0->thread_index);
session_dgram_connect_notify (&uc0->connection,
s0->thread_index, &s0);
queue_event = 0;
@@ -286,9 +294,9 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0],
queue_event, &error0);
- session_pool_remove_peeker (s0->thread_index);
}
- else if (s0->session_state == SESSION_STATE_READY)
+ else if (s0->session_state == SESSION_STATE_READY ||
+ s0->session_state == SESSION_STATE_ACCEPTING)
{
uc0 = udp_connection_from_transport (session_get_transport (s0));
udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0], 1,
@@ -306,6 +314,7 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
goto done;
}
s0 = session_get (uc0->c_s_index, uc0->c_thread_index);
+ uc0->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
error0 = UDP_ERROR_ACCEPT;
}
udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0], 1,
@@ -314,7 +323,6 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
else
{
error0 = UDP_ERROR_NOT_READY;
- session_pool_remove_peeker (s0->thread_index);
}
done:
@@ -328,9 +336,7 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
vlib_buffer_free (vm, first_buffer, frame->n_vectors);
- errors = session_main_flush_enqueue_events (TRANSPORT_PROTO_UDP,
- thread_index);
- err_counters[UDP_ERROR_MQ_FULL] = errors;
+ session_main_flush_enqueue_events (TRANSPORT_PROTO_UDP, thread_index);
udp_store_err_counters (vm, is_ip4, err_counters);
return frame->n_vectors;
}
@@ -342,7 +348,6 @@ udp4_input (vlib_main_t * vm, vlib_node_runtime_t * node,
return udp46_input_inline (vm, node, frame, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp4_input_node) =
{
.function = udp4_input,
@@ -359,7 +364,6 @@ VLIB_REGISTER_NODE (udp4_input_node) =
#undef _
},
};
-/* *INDENT-ON* */
static uword
udp6_input (vlib_main_t * vm, vlib_node_runtime_t * node,
@@ -368,7 +372,6 @@ udp6_input (vlib_main_t * vm, vlib_node_runtime_t * node,
return udp46_input_inline (vm, node, frame, 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp6_input_node) =
{
.function = udp6_input,
@@ -385,7 +388,6 @@ VLIB_REGISTER_NODE (udp6_input_node) =
#undef _
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/udp/udp_local.c b/src/vnet/udp/udp_local.c
index 06bafbb4be8..6531b73cd11 100644
--- a/src/vnet/udp/udp_local.c
+++ b/src/vnet/udp/udp_local.c
@@ -42,8 +42,6 @@ static vlib_error_desc_t udp_error_counters[] = {
#undef udp_error
};
-#define UDP_NO_NODE_SET ((u16) ~0)
-
#ifndef CLIB_MARCH_VARIANT
u8 *
format_udp_rx_trace (u8 * s, va_list * args)
@@ -127,9 +125,8 @@ udp46_local_inline (vlib_main_t * vm,
u32 bi0, bi1;
vlib_buffer_t *b0, *b1;
udp_header_t *h0 = 0, *h1 = 0;
- u32 i0, i1, dst_port0, dst_port1;
+ u32 i0, i1, next0, next1;
u32 advance0, advance1;
- u32 error0, next0, error1, next1;
/* Prefetch next iteration. */
{
@@ -171,72 +168,106 @@ udp46_local_inline (vlib_main_t * vm,
if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0)))
{
- error0 = UDP_ERROR_LENGTH_ERROR;
+ b0->error = node->errors[UDP_ERROR_LENGTH_ERROR];
next0 = UDP_LOCAL_NEXT_DROP;
}
else
{
vlib_buffer_advance (b0, advance0);
h0 = vlib_buffer_get_current (b0);
- error0 = UDP_ERROR_NONE;
next0 = UDP_LOCAL_NEXT_PUNT;
if (PREDICT_FALSE (clib_net_to_host_u16 (h0->length) >
vlib_buffer_length_in_chain (vm, b0)))
{
- error0 = UDP_ERROR_LENGTH_ERROR;
+ b0->error = node->errors[UDP_ERROR_LENGTH_ERROR];
next0 = UDP_LOCAL_NEXT_DROP;
}
}
if (PREDICT_FALSE (b1->current_length < advance1 + sizeof (*h1)))
{
- error1 = UDP_ERROR_LENGTH_ERROR;
+ b1->error = node->errors[UDP_ERROR_LENGTH_ERROR];
next1 = UDP_LOCAL_NEXT_DROP;
}
else
{
vlib_buffer_advance (b1, advance1);
h1 = vlib_buffer_get_current (b1);
- error1 = UDP_ERROR_NONE;
next1 = UDP_LOCAL_NEXT_PUNT;
if (PREDICT_FALSE (clib_net_to_host_u16 (h1->length) >
vlib_buffer_length_in_chain (vm, b1)))
{
- error1 = UDP_ERROR_LENGTH_ERROR;
+ b1->error = node->errors[UDP_ERROR_LENGTH_ERROR];
next1 = UDP_LOCAL_NEXT_DROP;
}
}
/* Index sparse array with network byte order. */
- dst_port0 = (error0 == 0) ? h0->dst_port : 0;
- dst_port1 = (error1 == 0) ? h1->dst_port : 0;
- sparse_vec_index2 (next_by_dst_port, dst_port0, dst_port1, &i0,
- &i1);
- next0 = (error0 == 0) ? vec_elt (next_by_dst_port, i0) : next0;
- next1 = (error1 == 0) ? vec_elt (next_by_dst_port, i1) : next1;
-
- if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX ||
- next0 == UDP_NO_NODE_SET))
+ if (PREDICT_TRUE (next0 == UDP_LOCAL_NEXT_PUNT &&
+ next1 == UDP_LOCAL_NEXT_PUNT))
{
- udp_dispatch_error (node, b0, advance0, is_ip4, &next0);
+ sparse_vec_index2 (next_by_dst_port, h0->dst_port, h1->dst_port,
+ &i0, &i1);
+ next0 = vec_elt (next_by_dst_port, i0);
+ next1 = vec_elt (next_by_dst_port, i1);
+
+ if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX ||
+ next0 == UDP_NO_NODE_SET))
+ {
+ udp_dispatch_error (node, b0, advance0, is_ip4, &next0);
+ }
+ else
+ {
+ b0->error = node->errors[UDP_ERROR_NONE];
+ // advance to the payload
+ vlib_buffer_advance (b0, sizeof (*h0));
+ }
+
+ if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX ||
+ next1 == UDP_NO_NODE_SET))
+ {
+ udp_dispatch_error (node, b1, advance1, is_ip4, &next1);
+ }
+ else
+ {
+ b1->error = node->errors[UDP_ERROR_NONE];
+ // advance to the payload
+ vlib_buffer_advance (b1, sizeof (*h1));
+ }
}
- else
+ else if (next0 == UDP_LOCAL_NEXT_PUNT)
{
- b0->error = node->errors[UDP_ERROR_NONE];
- // advance to the payload
- vlib_buffer_advance (b0, sizeof (*h0));
- }
+ i0 = sparse_vec_index (next_by_dst_port, h0->dst_port);
+ next0 = vec_elt (next_by_dst_port, i0);
- if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX ||
- next1 == UDP_NO_NODE_SET))
- {
- udp_dispatch_error (node, b1, advance1, is_ip4, &next1);
+ if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX ||
+ next0 == UDP_NO_NODE_SET))
+ {
+ udp_dispatch_error (node, b0, advance0, is_ip4, &next0);
+ }
+ else
+ {
+ b0->error = node->errors[UDP_ERROR_NONE];
+ // advance to the payload
+ vlib_buffer_advance (b0, sizeof (*h0));
+ }
}
- else
+ else if (next1 == UDP_LOCAL_NEXT_PUNT)
{
- b1->error = node->errors[UDP_ERROR_NONE];
- // advance to the payload
- vlib_buffer_advance (b1, sizeof (*h1));
+ i1 = sparse_vec_index (next_by_dst_port, h1->dst_port);
+ next1 = vec_elt (next_by_dst_port, i1);
+
+ if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX ||
+ next1 == UDP_NO_NODE_SET))
+ {
+ udp_dispatch_error (node, b1, advance1, is_ip4, &next1);
+ }
+ else
+ {
+ b1->error = node->errors[UDP_ERROR_NONE];
+ // advance to the payload
+ vlib_buffer_advance (b1, sizeof (*h1));
+ }
}
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -362,7 +393,6 @@ VLIB_NODE_FN (udp6_local_node) (vlib_main_t * vm,
return udp46_local_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp4_local_node) = {
.name = "ip4-udp-lookup",
/* Takes a vector of packets. */
@@ -382,9 +412,7 @@ VLIB_REGISTER_NODE (udp4_local_node) = {
.format_trace = format_udp_rx_trace,
.unformat_buffer = unformat_udp_header,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp6_local_node) = {
.name = "ip6-udp-lookup",
/* Takes a vector of packets. */
@@ -404,7 +432,6 @@ VLIB_REGISTER_NODE (udp6_local_node) = {
.format_trace = format_udp_rx_trace,
.unformat_buffer = unformat_udp_header,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
void
@@ -492,16 +519,12 @@ u8
udp_is_valid_dst_port (udp_dst_port_t dst_port, u8 is_ip4)
{
udp_main_t *um = &udp_main;
- u16 *n;
-
- if (is_ip4)
- n = sparse_vec_validate (um->next_by_dst_port4,
- clib_host_to_net_u16 (dst_port));
- else
- n = sparse_vec_validate (um->next_by_dst_port6,
- clib_host_to_net_u16 (dst_port));
-
- return (n[0] != SPARSE_VEC_INVALID_INDEX && n[0] != UDP_NO_NODE_SET);
+ u16 *next_by_dst_port =
+ is_ip4 ? um->next_by_dst_port4 : um->next_by_dst_port6;
+ uword index =
+ sparse_vec_index (next_by_dst_port, clib_host_to_net_u16 (dst_port));
+ return (index != SPARSE_VEC_INVALID_INDEX &&
+ vec_elt (next_by_dst_port, index) != UDP_NO_NODE_SET);
}
void
diff --git a/src/vnet/udp/udp_output.c b/src/vnet/udp/udp_output.c
new file mode 100644
index 00000000000..22b94141365
--- /dev/null
+++ b/src/vnet/udp/udp_output.c
@@ -0,0 +1,254 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vnet/udp/udp.h>
+#include <vnet/ip/ip4_inlines.h>
+#include <vnet/ip/ip6_inlines.h>
+
+#define udp_node_index(node_id, is_ip4) \
+ ((is_ip4) ? udp4_##node_id##_node.index : udp6_##node_id##_node.index)
+
+typedef enum udp_output_next_
+{
+ UDP_OUTPUT_NEXT_DROP,
+ UDP_OUTPUT_NEXT_IP_LOOKUP,
+ UDP_OUTPUT_N_NEXT
+} udp_output_next_t;
+
+#define foreach_udp4_output_next \
+ _ (DROP, "error-drop") \
+ _ (IP_LOOKUP, "ip4-lookup")
+
+#define foreach_udp6_output_next \
+ _ (DROP, "error-drop") \
+ _ (IP_LOOKUP, "ip6-lookup")
+
+static vlib_error_desc_t udp_output_error_counters[] = {
+#define udp_error(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s },
+#include <vnet/udp/udp_error.def>
+#undef udp_error
+};
+
+typedef struct udp_tx_trace_
+{
+ udp_header_t udp_header;
+ udp_connection_t udp_connection;
+} udp_tx_trace_t;
+
+static u8 *
+format_udp_tx_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ udp_tx_trace_t *t = va_arg (*args, udp_tx_trace_t *);
+ udp_connection_t *uc = &t->udp_connection;
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "%U\n%U%U", format_udp_connection, uc, 1, format_white_space,
+ indent, format_udp_header, &t->udp_header, 128);
+
+ return s;
+}
+
+always_inline udp_connection_t *
+udp_output_get_connection (vlib_buffer_t *b, u32 thread_index)
+{
+ if (PREDICT_FALSE (vnet_buffer (b)->tcp.flags & UDP_CONN_F_LISTEN))
+ return udp_listener_get (vnet_buffer (b)->tcp.connection_index);
+
+ return udp_connection_get (vnet_buffer (b)->tcp.connection_index,
+ thread_index);
+}
+
+static void
+udp46_output_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
+ u32 *to_next, u32 n_bufs)
+{
+ udp_connection_t *uc;
+ udp_tx_trace_t *t;
+ vlib_buffer_t *b;
+ udp_header_t *uh;
+ int i;
+
+ for (i = 0; i < n_bufs; i++)
+ {
+ b = vlib_get_buffer (vm, to_next[i]);
+ if (!(b->flags & VLIB_BUFFER_IS_TRACED))
+ continue;
+ uh = vlib_buffer_get_current (b);
+ uc = udp_output_get_connection (b, vm->thread_index);
+ t = vlib_add_trace (vm, node, b, sizeof (*t));
+ clib_memcpy_fast (&t->udp_header, uh, sizeof (t->udp_header));
+ clib_memcpy_fast (&t->udp_connection, uc, sizeof (t->udp_connection));
+ }
+}
+
+always_inline void
+udp_output_handle_packet (udp_connection_t *uc0, vlib_buffer_t *b0,
+ vlib_node_runtime_t *error_node, u16 *next0,
+ u8 is_ip4)
+{
+ /* If next_index is not drop use it */
+ if (uc0->next_node_index)
+ {
+ *next0 = uc0->next_node_index;
+ vnet_buffer (b0)->tcp.next_node_opaque = uc0->next_node_opaque;
+ }
+ else
+ {
+ *next0 = UDP_OUTPUT_NEXT_IP_LOOKUP;
+ }
+
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = uc0->c_fib_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = uc0->sw_if_index;
+}
+
+always_inline uword
+udp46_output_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, int is_ip4)
+{
+ u32 n_left_from, *from, thread_index = vm->thread_index;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+ u16 nexts[VLIB_FRAME_SIZE], *next;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
+ udp46_output_trace_frame (vm, node, from, n_left_from);
+
+ vlib_get_buffers (vm, from, bufs, n_left_from);
+ b = bufs;
+ next = nexts;
+
+ while (n_left_from >= 4)
+ {
+ udp_connection_t *uc0, *uc1;
+
+ vlib_prefetch_buffer_header (b[2], STORE);
+ CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
+
+ vlib_prefetch_buffer_header (b[3], STORE);
+ CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
+
+ uc0 = udp_output_get_connection (b[0], thread_index);
+ uc1 = udp_output_get_connection (b[1], thread_index);
+
+ if (PREDICT_TRUE (!uc0 + !uc1 == 0))
+ {
+ udp_output_handle_packet (uc0, b[0], node, &next[0], is_ip4);
+ udp_output_handle_packet (uc1, b[1], node, &next[1], is_ip4);
+ }
+ else
+ {
+ if (uc0 != 0)
+ {
+ udp_output_handle_packet (uc0, b[0], node, &next[0], is_ip4);
+ }
+ else
+ {
+ b[0]->error = node->errors[UDP_ERROR_INVALID_CONNECTION];
+ next[0] = UDP_OUTPUT_NEXT_DROP;
+ }
+ if (uc1 != 0)
+ {
+ udp_output_handle_packet (uc1, b[1], node, &next[1], is_ip4);
+ }
+ else
+ {
+ b[1]->error = node->errors[UDP_ERROR_INVALID_CONNECTION];
+ next[1] = UDP_OUTPUT_NEXT_DROP;
+ }
+ }
+
+ b += 2;
+ next += 2;
+ n_left_from -= 2;
+ }
+ while (n_left_from > 0)
+ {
+ udp_connection_t *uc0;
+
+ if (n_left_from > 1)
+ {
+ vlib_prefetch_buffer_header (b[1], STORE);
+ CLIB_PREFETCH (b[1]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ uc0 = udp_output_get_connection (b[0], thread_index);
+
+ if (PREDICT_TRUE (uc0 != 0))
+ {
+ udp_output_handle_packet (uc0, b[0], node, &next[0], is_ip4);
+ }
+ else
+ {
+ b[0]->error = node->errors[UDP_ERROR_INVALID_CONNECTION];
+ next[0] = UDP_OUTPUT_NEXT_DROP;
+ }
+
+ b += 1;
+ next += 1;
+ n_left_from -= 1;
+ }
+
+ vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+ vlib_node_increment_counter (vm, udp_node_index (output, is_ip4),
+ UDP_ERROR_PKTS_SENT, frame->n_vectors);
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (udp4_output_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ return udp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */);
+}
+
+VLIB_NODE_FN (udp6_output_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ return udp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */);
+}
+
+VLIB_REGISTER_NODE (udp4_output_node) =
+{
+ .name = "udp4-output",
+ .vector_size = sizeof (u32),
+ .n_errors = UDP_N_ERROR,
+ .protocol_hint = VLIB_NODE_PROTO_HINT_UDP,
+ .error_counters = udp_output_error_counters,
+ .n_next_nodes = UDP_OUTPUT_N_NEXT,
+ .next_nodes = {
+#define _(s, n) [UDP_OUTPUT_NEXT_##s] = n,
+ foreach_udp4_output_next
+#undef _
+ },
+ .format_buffer = format_udp_header,
+ .format_trace = format_udp_tx_trace,
+};
+
+VLIB_REGISTER_NODE (udp6_output_node) =
+{
+ .name = "udp6-output",
+ .vector_size = sizeof (u32),
+ .n_errors = UDP_N_ERROR,
+ .protocol_hint = VLIB_NODE_PROTO_HINT_UDP,
+ .error_counters = udp_output_error_counters,
+ .n_next_nodes = UDP_OUTPUT_N_NEXT,
+ .next_nodes = {
+#define _(s, n) [UDP_OUTPUT_NEXT_##s] = n,
+ foreach_udp6_output_next
+#undef _
+ },
+ .format_buffer = format_udp_header,
+ .format_trace = format_udp_tx_trace,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/unix/gdb_funcs.c b/src/vnet/unix/gdb_funcs.c
index 91dabe394ba..a89b7202400 100644
--- a/src/vnet/unix/gdb_funcs.c
+++ b/src/vnet/unix/gdb_funcs.c
@@ -238,44 +238,44 @@ gdb_show_traces ()
/* Get active traces from pool. */
- foreach_vlib_main ()
- {
- fmt = "------------------- Start of thread %d %s -------------------\n";
- s = format (s, fmt, index, vlib_worker_threads[index].name);
+ foreach_vlib_main__ (0 /* no checks */)
+ {
+ fmt = "------------------- Start of thread %d %s -------------------\n";
+ s = format (s, fmt, index, vlib_worker_threads[index].name);
- tm = &this_vlib_main->trace_main;
+ tm = &this_vlib_main->trace_main;
- trace_apply_filter (this_vlib_main);
+ trace_apply_filter (this_vlib_main);
- traces = 0;
- pool_foreach (h, tm->trace_buffer_pool)
- {
- vec_add1 (traces, h[0]);
- }
+ traces = 0;
+ pool_foreach (h, tm->trace_buffer_pool)
+ {
+ vec_add1 (traces, h[0]);
+ }
- if (vec_len (traces) == 0)
- {
- s = format (s, "No packets in trace buffer\n");
- goto done;
- }
+ if (vec_len (traces) == 0)
+ {
+ s = format (s, "No packets in trace buffer\n");
+ goto done;
+ }
- /* Sort them by increasing time. */
- vec_sort_with_function (traces, trace_cmp);
+ /* Sort them by increasing time. */
+ vec_sort_with_function (traces, trace_cmp);
- for (i = 0; i < vec_len (traces); i++)
- {
- if (i == max)
- {
- fformat (stderr,
- "Limiting display to %d packets."
- " To display more specify max.",
- max);
- goto done;
- }
-
- s = format (s, "Packet %d\n%U\n\n", i + 1, format_vlib_trace,
- vlib_get_first_main (), traces[i]);
- }
+ for (i = 0; i < vec_len (traces); i++)
+ {
+ if (i == max)
+ {
+ fformat (stderr,
+ "Limiting display to %d packets."
+ " To display more specify max.",
+ max);
+ goto done;
+ }
+
+ s = format (s, "Packet %d\n%U\n\n", i + 1, format_vlib_trace,
+ vlib_get_first_main (), traces[i]);
+ }
done:
vec_free (traces);
@@ -318,13 +318,11 @@ show_gdb_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_gdb_funcs_command, static) = {
.path = "show gdb",
.short_help = "Describe functions which can be called from gdb",
.function = show_gdb_command_fn,
};
-/* *INDENT-ON* */
vlib_buffer_t *
vgb (u32 bi)
diff --git a/src/vnet/unix/tuntap.c b/src/vnet/unix/tuntap.c
index 4a848349ae1..f1102dc321e 100644
--- a/src/vnet/unix/tuntap.c
+++ b/src/vnet/unix/tuntap.c
@@ -172,7 +172,7 @@ tuntap_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
/* Re-set iovecs if present. */
if (tm->threads[thread_index].iovecs)
- _vec_len (tm->threads[thread_index].iovecs) = 0;
+ vec_set_len (tm->threads[thread_index].iovecs, 0);
/** VLIB buffer chain -> Unix iovec(s). */
vec_add2 (tm->threads[thread_index].iovecs, iov, 1);
@@ -217,14 +217,12 @@ tuntap_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
return n_packets;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tuntap_tx_node,static) = {
.function = tuntap_tx,
.name = "tuntap-tx",
.type = VLIB_NODE_TYPE_INTERNAL,
.vector_size = 4,
};
-/* *INDENT-ON* */
/**
* @brief TUNTAP receive node
@@ -260,7 +258,7 @@ tuntap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
vlib_buffer_alloc (vm,
tm->threads[thread_index].rx_buffers + n_left,
VLIB_FRAME_SIZE - n_left);
- _vec_len (tm->threads[thread_index].rx_buffers) = n_left + n_alloc;
+ vec_set_len (tm->threads[thread_index].rx_buffers, n_left + n_alloc);
}
}
@@ -324,7 +322,7 @@ tuntap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+ VNET_INTERFACE_COUNTER_RX,
thread_index, tm->sw_if_index, 1, n_bytes_in_packet);
- _vec_len (tm->threads[thread_index].rx_buffers) = i_rx;
+ vec_set_len (tm->threads[thread_index].rx_buffers, i_rx);
}
b = vlib_get_buffer (vm, bi);
@@ -366,7 +364,7 @@ tuntap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
next_index = VNET_DEVICE_INPUT_NEXT_DROP;
}
- vnet_feature_start_device_input_x1 (tm->sw_if_index, &next_index, b);
+ vnet_feature_start_device_input (tm->sw_if_index, &next_index, b);
vlib_set_next_frame_buffer (vm, node, next_index, bi);
@@ -385,7 +383,6 @@ static char *tuntap_rx_error_strings[] = {
"unknown packet type",
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tuntap_rx_node,static) = {
.function = tuntap_rx,
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
@@ -397,7 +394,6 @@ VLIB_REGISTER_NODE (tuntap_rx_node,static) = {
.n_errors = 1,
.error_strings = tuntap_rx_error_strings,
};
-/* *INDENT-ON* */
/**
* @brief Gets called when file descriptor is ready from epoll.
@@ -624,12 +620,12 @@ tuntap_config (vlib_main_t * vm, unformat_input_t * input)
if (have_normal_interface)
{
vnet_main_t *vnm = vnet_get_main ();
- error = ethernet_register_interface
- (vnm, tuntap_dev_class.index, 0 /* device instance */ ,
- tm->ether_dst_mac /* ethernet address */ ,
- &tm->hw_if_index, 0 /* flag change */ );
- if (error)
- clib_error_report (error);
+ vnet_eth_interface_registration_t eir = {};
+
+ eir.dev_class_index = tuntap_dev_class.index;
+ eir.address = tm->ether_dst_mac;
+ tm->hw_if_index = vnet_eth_register_interface (vnm, &eir);
+
tm->sw_if_index = tm->hw_if_index;
vm->os_punt_frame = tuntap_nopunt_frame;
}
@@ -912,7 +908,7 @@ tuntap_punt_frame (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * frame)
{
tuntap_tx (vm, node, frame);
- vlib_frame_free (vm, node, frame);
+ vlib_frame_free (vm, frame);
}
/**
@@ -930,15 +926,13 @@ tuntap_nopunt_frame (vlib_main_t * vm,
u32 *buffers = vlib_frame_vector_args (frame);
uword n_packets = frame->n_vectors;
vlib_buffer_free (vm, buffers, n_packets);
- vlib_frame_free (vm, node, frame);
+ vlib_frame_free (vm, frame);
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (tuntap_interface_class,static) = {
.name = "tuntap",
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
-/* *INDENT-ON* */
/**
* @brief Format tun/tap interface name
@@ -984,13 +978,11 @@ tuntap_intfc_tx (vlib_main_t * vm,
return n_buffers;
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (tuntap_dev_class,static) = {
.name = "tuntap",
.tx_function = tuntap_intfc_tx,
.format_device_name = format_tuntap_interface_name,
};
-/* *INDENT-ON* */
/**
* @brief tun/tap node init
@@ -1025,12 +1017,10 @@ tuntap_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (tuntap_init) =
{
.runs_after = VLIB_INITS("ip4_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/util/throttle.c b/src/vnet/util/throttle.c
index 0985b4a81a3..8b8e030bf53 100644
--- a/src/vnet/util/throttle.c
+++ b/src/vnet/util/throttle.c
@@ -16,17 +16,18 @@
#include <vnet/util/throttle.h>
void
-throttle_init (throttle_t * t, u32 n_threads, f64 time)
+throttle_init (throttle_t *t, u32 n_threads, u32 buckets, f64 time)
{
u32 i;
t->time = time;
+ t->buckets = 1 << max_log2 (buckets);
vec_validate (t->bitmaps, n_threads);
vec_validate (t->seeds, n_threads);
vec_validate (t->last_seed_change_time, n_threads);
for (i = 0; i < n_threads; i++)
- vec_validate (t->bitmaps[i], (THROTTLE_BITS / BITS (uword)) - 1);
+ clib_bitmap_alloc (t->bitmaps[i], t->buckets);
}
/*
diff --git a/src/vnet/util/throttle.h b/src/vnet/util/throttle.h
index 38ace280131..53435c4a359 100644
--- a/src/vnet/util/throttle.h
+++ b/src/vnet/util/throttle.h
@@ -31,11 +31,13 @@ typedef struct throttle_t_
uword **bitmaps;
u64 *seeds;
f64 *last_seed_change_time;
+ u32 buckets;
} throttle_t;
#define THROTTLE_BITS (512)
-extern void throttle_init (throttle_t * t, u32 n_threads, f64 time);
+extern void throttle_init (throttle_t *t, u32 n_threads, u32 buckets,
+ f64 time);
always_inline u64
throttle_seed (throttle_t * t, u32 thread_index, f64 time_now)
@@ -43,7 +45,7 @@ throttle_seed (throttle_t * t, u32 thread_index, f64 time_now)
if (time_now - t->last_seed_change_time[thread_index] > t->time)
{
(void) random_u64 (&t->seeds[thread_index]);
- clib_memset (t->bitmaps[thread_index], 0, THROTTLE_BITS / BITS (u8));
+ clib_bitmap_zero (t->bitmaps[thread_index]);
t->last_seed_change_time[thread_index] = time_now;
}
@@ -53,21 +55,14 @@ throttle_seed (throttle_t * t, u32 thread_index, f64 time_now)
always_inline int
throttle_check (throttle_t * t, u32 thread_index, u64 hash, u64 seed)
{
- int drop;
- uword m;
- u32 w;
+ ASSERT (is_pow2 (t->buckets));
hash = clib_xxhash (hash ^ seed);
/* Select bit number */
- hash &= THROTTLE_BITS - 1;
- w = hash / BITS (uword);
- m = (uword) 1 << (hash % BITS (uword));
+ hash &= t->buckets - 1;
- drop = (t->bitmaps[thread_index][w] & m) != 0;
- t->bitmaps[thread_index][w] |= m;
-
- return (drop);
+ return clib_bitmap_set_no_check (t->bitmaps[thread_index], hash, 1);
}
#endif
diff --git a/src/vnet/vnet.h b/src/vnet/vnet.h
index 24afe633af2..54988aec667 100644
--- a/src/vnet/vnet.h
+++ b/src/vnet/vnet.h
@@ -45,6 +45,7 @@
#include <vppinfra/types.h>
#include <vppinfra/pcap.h>
+#include <vnet/error.h>
#include <vnet/buffer.h>
#include <vnet/config.h>
#include <vnet/interface.h>
@@ -70,6 +71,7 @@ typedef struct
u32 pcap_sw_if_index;
pcap_main_t pcap_main;
u32 filter_classify_table_index;
+ vlib_is_packet_traced_fn_t *current_filter_function;
vlib_error_t pcap_error_index;
} vnet_pcap_t;
diff --git a/src/vnet/vxlan-gbp/decap.c b/src/vnet/vxlan-gbp/decap.c
deleted file mode 100644
index 927c778b211..00000000000
--- a/src/vnet/vxlan-gbp/decap.c
+++ /dev/null
@@ -1,1050 +0,0 @@
-/*
- * decap.c: vxlan gbp tunnel decap packet processing
- *
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vlib/vlib.h>
-
-#include <vnet/vxlan-gbp/vxlan_gbp.h>
-
-typedef struct
-{
- u32 next_index;
- u32 tunnel_index;
- u32 error;
- u32 vni;
- u16 sclass;
- u8 flags;
-} vxlan_gbp_rx_trace_t;
-
-static u8 *
-format_vxlan_gbp_rx_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- vxlan_gbp_rx_trace_t *t = va_arg (*args, vxlan_gbp_rx_trace_t *);
-
- if (t->tunnel_index == ~0)
- return format (s,
- "VXLAN_GBP decap error - tunnel for vni %d does not exist",
- t->vni);
- return format (s,
- "VXLAN_GBP decap from vxlan_gbp_tunnel%d vni %d sclass %d"
- " flags %U next %d error %d",
- t->tunnel_index, t->vni, t->sclass,
- format_vxlan_gbp_header_gpflags, t->flags,
- t->next_index, t->error);
-}
-
-always_inline u32
-buf_fib_index (vlib_buffer_t * b, u32 is_ip4)
-{
- u32 sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_TX];
- if (sw_if_index != (u32) ~ 0)
- return sw_if_index;
-
- u32 *fib_index_by_sw_if_index = is_ip4 ?
- ip4_main.fib_index_by_sw_if_index : ip6_main.fib_index_by_sw_if_index;
- sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
-
- return vec_elt (fib_index_by_sw_if_index, sw_if_index);
-}
-
-typedef vxlan4_gbp_tunnel_key_t last_tunnel_cache4;
-
-always_inline vxlan_gbp_tunnel_t *
-vxlan4_gbp_find_tunnel (vxlan_gbp_main_t * vxm, last_tunnel_cache4 * cache,
- u32 fib_index, ip4_header_t * ip4_0,
- vxlan_gbp_header_t * vxlan_gbp0)
-{
- /*
- * Check unicast first since that's where most of the traffic comes from
- * Make sure VXLAN_GBP tunnel exist according to packet SIP, DIP and VNI
- */
- vxlan4_gbp_tunnel_key_t key4;
- int rv;
-
- key4.key[1] = (((u64) fib_index << 32) |
- (vxlan_gbp0->vni_reserved &
- clib_host_to_net_u32 (0xffffff00)));
- key4.key[0] =
- (((u64) ip4_0->dst_address.as_u32 << 32) | ip4_0->src_address.as_u32);
-
- if (PREDICT_FALSE (key4.key[0] != cache->key[0] ||
- key4.key[1] != cache->key[1]))
- {
- rv = clib_bihash_search_inline_16_8 (&vxm->vxlan4_gbp_tunnel_by_key,
- &key4);
- if (PREDICT_FALSE (rv == 0))
- {
- *cache = key4;
- return (pool_elt_at_index (vxm->tunnels, cache->value));
- }
- }
- else
- {
- return (pool_elt_at_index (vxm->tunnels, cache->value));
- }
-
- /* No unicast match - try multicast */
- if (PREDICT_TRUE (!ip4_address_is_multicast (&ip4_0->dst_address)))
- return (NULL);
-
- key4.key[0] = ip4_0->dst_address.as_u32;
- /* Make sure mcast VXLAN_GBP tunnel exist by packet DIP and VNI */
- rv = clib_bihash_search_inline_16_8 (&vxm->vxlan4_gbp_tunnel_by_key, &key4);
-
- if (PREDICT_FALSE (rv != 0))
- return (NULL);
-
- return (pool_elt_at_index (vxm->tunnels, key4.value));
-}
-
-typedef vxlan6_gbp_tunnel_key_t last_tunnel_cache6;
-
-always_inline vxlan_gbp_tunnel_t *
-vxlan6_gbp_find_tunnel (vxlan_gbp_main_t * vxm, last_tunnel_cache6 * cache,
- u32 fib_index, ip6_header_t * ip6_0,
- vxlan_gbp_header_t * vxlan_gbp0)
-{
- /* Make sure VXLAN_GBP tunnel exist according to packet SIP and VNI */
- vxlan6_gbp_tunnel_key_t key6 = {
- .key = {
- [0] = ip6_0->src_address.as_u64[0],
- [1] = ip6_0->src_address.as_u64[1],
- [2] = ((((u64) fib_index) << 32) |
- (vxlan_gbp0->vni_reserved &
- clib_host_to_net_u32 (0xffffff00))),
- }
- };
- int rv;
-
- if (PREDICT_FALSE
- (clib_bihash_key_compare_24_8 (key6.key, cache->key) == 0))
- {
- rv = clib_bihash_search_inline_24_8 (&vxm->vxlan6_gbp_tunnel_by_key,
- &key6);
- if (PREDICT_FALSE (rv != 0))
- return NULL;
-
- *cache = key6;
- }
- vxlan_gbp_tunnel_t *t0 = pool_elt_at_index (vxm->tunnels, cache->value);
-
- /* Validate VXLAN_GBP tunnel SIP against packet DIP */
- if (PREDICT_FALSE
- (!ip6_address_is_equal (&ip6_0->dst_address, &t0->src.ip6)))
- {
- /* try multicast */
- if (PREDICT_TRUE (!ip6_address_is_multicast (&ip6_0->dst_address)))
- return 0;
-
- /* Make sure mcast VXLAN_GBP tunnel exist by packet DIP and VNI */
- key6.key[0] = ip6_0->dst_address.as_u64[0];
- key6.key[1] = ip6_0->dst_address.as_u64[1];
- rv = clib_bihash_search_inline_24_8 (&vxm->vxlan6_gbp_tunnel_by_key,
- &key6);
- if (PREDICT_FALSE (rv != 0))
- return 0;
-
- }
-
- return t0;
-}
-
-always_inline vxlan_gbp_input_next_t
-vxlan_gbp_tunnel_get_next (const vxlan_gbp_tunnel_t * t, vlib_buffer_t * b0)
-{
- if (VXLAN_GBP_TUNNEL_MODE_L2 == t->mode)
- return (VXLAN_GBP_INPUT_NEXT_L2_INPUT);
- else
- {
- ethernet_header_t *e0;
- u16 type0;
-
- e0 = vlib_buffer_get_current (b0);
- vlib_buffer_advance (b0, sizeof (*e0));
- type0 = clib_net_to_host_u16 (e0->type);
- switch (type0)
- {
- case ETHERNET_TYPE_IP4:
- return (VXLAN_GBP_INPUT_NEXT_IP4_INPUT);
- case ETHERNET_TYPE_IP6:
- return (VXLAN_GBP_INPUT_NEXT_IP6_INPUT);
- }
- }
- return (VXLAN_GBP_INPUT_NEXT_DROP);
-}
-
-always_inline uword
-vxlan_gbp_input (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame, u8 is_ip4)
-{
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
- vnet_main_t *vnm = vxm->vnet_main;
- vnet_interface_main_t *im = &vnm->interface_main;
- vlib_combined_counter_main_t *rx_counter =
- im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX;
- vlib_combined_counter_main_t *drop_counter =
- im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_DROP;
- last_tunnel_cache4 last4;
- last_tunnel_cache6 last6;
- u32 pkts_decapsulated = 0;
- u32 thread_index = vlib_get_thread_index ();
-
- if (is_ip4)
- clib_memset (&last4, 0xff, sizeof last4);
- else
- clib_memset (&last6, 0xff, sizeof last6);
-
- u32 next_index = node->cached_next_index;
-
- u32 *from = vlib_frame_vector_args (from_frame);
- u32 n_left_from = from_frame->n_vectors;
-
- while (n_left_from > 0)
- {
- u32 *to_next, n_left_to_next;
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- /* Prefetch next iteration. */
- {
- vlib_buffer_t *p2, *p3;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
-
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
-
- CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- }
-
- u32 bi0 = to_next[0] = from[0];
- u32 bi1 = to_next[1] = from[1];
- from += 2;
- to_next += 2;
- n_left_to_next -= 2;
- n_left_from -= 2;
-
- vlib_buffer_t *b0, *b1;
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
-
- /* udp leaves current_data pointing at the vxlan_gbp header */
- void *cur0 = vlib_buffer_get_current (b0);
- void *cur1 = vlib_buffer_get_current (b1);
- vxlan_gbp_header_t *vxlan_gbp0 = cur0;
- vxlan_gbp_header_t *vxlan_gbp1 = cur1;
-
- ip4_header_t *ip4_0, *ip4_1;
- ip6_header_t *ip6_0, *ip6_1;
- if (is_ip4)
- {
- ip4_0 = cur0 - sizeof (udp_header_t) - sizeof (ip4_header_t);
- ip4_1 = cur1 - sizeof (udp_header_t) - sizeof (ip4_header_t);
- }
- else
- {
- ip6_0 = cur0 - sizeof (udp_header_t) - sizeof (ip6_header_t);
- ip6_1 = cur1 - sizeof (udp_header_t) - sizeof (ip6_header_t);
- }
-
- u32 fi0 = buf_fib_index (b0, is_ip4);
- u32 fi1 = buf_fib_index (b1, is_ip4);
-
- vxlan_gbp_tunnel_t *t0, *t1;
- if (is_ip4)
- {
- t0 =
- vxlan4_gbp_find_tunnel (vxm, &last4, fi0, ip4_0, vxlan_gbp0);
- t1 =
- vxlan4_gbp_find_tunnel (vxm, &last4, fi1, ip4_1, vxlan_gbp1);
- }
- else
- {
- t0 =
- vxlan6_gbp_find_tunnel (vxm, &last6, fi0, ip6_0, vxlan_gbp0);
- t1 =
- vxlan6_gbp_find_tunnel (vxm, &last6, fi1, ip6_1, vxlan_gbp1);
- }
-
- u32 len0 = vlib_buffer_length_in_chain (vm, b0);
- u32 len1 = vlib_buffer_length_in_chain (vm, b1);
-
- vxlan_gbp_input_next_t next0, next1;
- u8 error0 = 0, error1 = 0;
- u8 flags0 = vxlan_gbp_get_flags (vxlan_gbp0);
- u8 flags1 = vxlan_gbp_get_flags (vxlan_gbp1);
- /* Required to make the l2 tag push / pop code work on l2 subifs */
- /* pop vxlan_gbp */
- vlib_buffer_advance (b0, sizeof *vxlan_gbp0);
- vlib_buffer_advance (b1, sizeof *vxlan_gbp1);
-
- u8 i_and_g0 = ((flags0 & VXLAN_GBP_FLAGS_GI) == VXLAN_GBP_FLAGS_GI);
- u8 i_and_g1 = ((flags1 & VXLAN_GBP_FLAGS_GI) == VXLAN_GBP_FLAGS_GI);
-
- /* Validate VXLAN_GBP tunnel encap-fib index against packet */
- if (PREDICT_FALSE (t0 == NULL || !i_and_g0))
- {
- if (t0 != NULL && !i_and_g0)
- {
- error0 = VXLAN_GBP_ERROR_BAD_FLAGS;
- vlib_increment_combined_counter
- (drop_counter, thread_index, t0->sw_if_index, 1, len0);
- next0 = VXLAN_GBP_INPUT_NEXT_DROP;
- }
- else
- {
- error0 = VXLAN_GBP_ERROR_NO_SUCH_TUNNEL;
- next0 = VXLAN_GBP_INPUT_NEXT_PUNT;
- if (is_ip4)
- b0->punt_reason =
- vxm->punt_no_such_tunnel[FIB_PROTOCOL_IP4];
- else
- b0->punt_reason =
- vxm->punt_no_such_tunnel[FIB_PROTOCOL_IP6];
- }
- b0->error = node->errors[error0];
- }
- else
- {
- next0 = vxlan_gbp_tunnel_get_next (t0, b0);
-
- /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
- vnet_buffer (b0)->sw_if_index[VLIB_RX] = t0->sw_if_index;
- vlib_increment_combined_counter
- (rx_counter, thread_index, t0->sw_if_index, 1, len0);
- pkts_decapsulated++;
- }
-
- vnet_buffer2 (b0)->gbp.flags = (vxlan_gbp_get_gpflags (vxlan_gbp0) |
- VXLAN_GBP_GPFLAGS_R);
- vnet_buffer2 (b0)->gbp.sclass = vxlan_gbp_get_sclass (vxlan_gbp0);
-
-
- if (PREDICT_FALSE (t1 == NULL || !i_and_g1))
- {
- if (t1 != NULL && !i_and_g1)
- {
- error1 = VXLAN_GBP_ERROR_BAD_FLAGS;
- vlib_increment_combined_counter
- (drop_counter, thread_index, t1->sw_if_index, 1, len1);
- next1 = VXLAN_GBP_INPUT_NEXT_DROP;
- }
- else
- {
- error1 = VXLAN_GBP_ERROR_NO_SUCH_TUNNEL;
- next1 = VXLAN_GBP_INPUT_NEXT_PUNT;
- if (is_ip4)
- b1->punt_reason =
- vxm->punt_no_such_tunnel[FIB_PROTOCOL_IP4];
- else
- b1->punt_reason =
- vxm->punt_no_such_tunnel[FIB_PROTOCOL_IP6];
- }
- b1->error = node->errors[error1];
- }
- else
- {
- next1 = vxlan_gbp_tunnel_get_next (t1, b1);
-
- /* Set packet input sw_if_index to unicast VXLAN_GBP tunnel for learning */
- vnet_buffer (b1)->sw_if_index[VLIB_RX] = t1->sw_if_index;
- pkts_decapsulated++;
-
- vlib_increment_combined_counter
- (rx_counter, thread_index, t1->sw_if_index, 1, len1);
- }
-
- vnet_buffer2 (b1)->gbp.flags = (vxlan_gbp_get_gpflags (vxlan_gbp1) |
- VXLAN_GBP_GPFLAGS_R);
-
- vnet_buffer2 (b1)->gbp.sclass = vxlan_gbp_get_sclass (vxlan_gbp1);
-
- vnet_update_l2_len (b0);
- vnet_update_l2_len (b1);
-
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_gbp_rx_trace_t *tr =
- vlib_add_trace (vm, node, b0, sizeof (*tr));
- tr->next_index = next0;
- tr->error = error0;
- tr->tunnel_index = t0 == 0 ? ~0 : t0 - vxm->tunnels;
- tr->vni = vxlan_gbp_get_vni (vxlan_gbp0);
- tr->sclass = vxlan_gbp_get_sclass (vxlan_gbp0);
- tr->flags = vxlan_gbp_get_gpflags (vxlan_gbp0);
- }
- if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_gbp_rx_trace_t *tr =
- vlib_add_trace (vm, node, b1, sizeof (*tr));
- tr->next_index = next1;
- tr->error = error1;
- tr->tunnel_index = t1 == 0 ? ~0 : t1 - vxm->tunnels;
- tr->vni = vxlan_gbp_get_vni (vxlan_gbp1);
- tr->sclass = vxlan_gbp_get_sclass (vxlan_gbp1);
- tr->flags = vxlan_gbp_get_gpflags (vxlan_gbp1);
- }
-
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0 = to_next[0] = from[0];
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
-
- /* udp leaves current_data pointing at the vxlan_gbp header */
- void *cur0 = vlib_buffer_get_current (b0);
- vxlan_gbp_header_t *vxlan_gbp0 = cur0;
- ip4_header_t *ip4_0;
- ip6_header_t *ip6_0;
- if (is_ip4)
- ip4_0 = cur0 - sizeof (udp_header_t) - sizeof (ip4_header_t);
- else
- ip6_0 = cur0 - sizeof (udp_header_t) - sizeof (ip6_header_t);
-
- u32 fi0 = buf_fib_index (b0, is_ip4);
-
- vxlan_gbp_tunnel_t *t0;
- if (is_ip4)
- t0 = vxlan4_gbp_find_tunnel (vxm, &last4, fi0, ip4_0, vxlan_gbp0);
- else
- t0 = vxlan6_gbp_find_tunnel (vxm, &last6, fi0, ip6_0, vxlan_gbp0);
-
- uword len0 = vlib_buffer_length_in_chain (vm, b0);
-
- vxlan_gbp_input_next_t next0;
- u8 error0 = 0;
- u8 flags0 = vxlan_gbp_get_flags (vxlan_gbp0);
-
- /* pop (ip, udp, vxlan_gbp) */
- vlib_buffer_advance (b0, sizeof (*vxlan_gbp0));
-
- u8 i_and_g0 = ((flags0 & VXLAN_GBP_FLAGS_GI) == VXLAN_GBP_FLAGS_GI);
-
- /* Validate VXLAN_GBP tunnel encap-fib index against packet */
- if (PREDICT_FALSE (t0 == NULL || !i_and_g0))
- {
- if (t0 != NULL && !i_and_g0)
- {
- error0 = VXLAN_GBP_ERROR_BAD_FLAGS;
- vlib_increment_combined_counter
- (drop_counter, thread_index, t0->sw_if_index, 1, len0);
- next0 = VXLAN_GBP_INPUT_NEXT_DROP;
- }
- else
- {
- error0 = VXLAN_GBP_ERROR_NO_SUCH_TUNNEL;
- next0 = VXLAN_GBP_INPUT_NEXT_PUNT;
- if (is_ip4)
- b0->punt_reason =
- vxm->punt_no_such_tunnel[FIB_PROTOCOL_IP4];
- else
- b0->punt_reason =
- vxm->punt_no_such_tunnel[FIB_PROTOCOL_IP6];
- }
- b0->error = node->errors[error0];
- }
- else
- {
- next0 = vxlan_gbp_tunnel_get_next (t0, b0);
- /* Set packet input sw_if_index to unicast VXLAN_GBP tunnel for learning */
- vnet_buffer (b0)->sw_if_index[VLIB_RX] = t0->sw_if_index;
- pkts_decapsulated++;
-
- vlib_increment_combined_counter
- (rx_counter, thread_index, t0->sw_if_index, 1, len0);
- }
- vnet_buffer2 (b0)->gbp.flags = (vxlan_gbp_get_gpflags (vxlan_gbp0) |
- VXLAN_GBP_GPFLAGS_R);
-
- vnet_buffer2 (b0)->gbp.sclass = vxlan_gbp_get_sclass (vxlan_gbp0);
-
- /* Required to make the l2 tag push / pop code work on l2 subifs */
- vnet_update_l2_len (b0);
-
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_gbp_rx_trace_t *tr
- = vlib_add_trace (vm, node, b0, sizeof (*tr));
- tr->next_index = next0;
- tr->error = error0;
- tr->tunnel_index = t0 == 0 ? ~0 : t0 - vxm->tunnels;
- tr->vni = vxlan_gbp_get_vni (vxlan_gbp0);
- tr->sclass = vxlan_gbp_get_sclass (vxlan_gbp0);
- tr->flags = vxlan_gbp_get_gpflags (vxlan_gbp0);
- }
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
- /* Do we still need this now that tunnel tx stats is kept? */
- u32 node_idx =
- is_ip4 ? vxlan4_gbp_input_node.index : vxlan6_gbp_input_node.index;
- vlib_node_increment_counter (vm, node_idx, VXLAN_GBP_ERROR_DECAPSULATED,
- pkts_decapsulated);
-
- return from_frame->n_vectors;
-}
-
-VLIB_NODE_FN (vxlan4_gbp_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return vxlan_gbp_input (vm, node, from_frame, /* is_ip4 */ 1);
-}
-
-VLIB_NODE_FN (vxlan6_gbp_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return vxlan_gbp_input (vm, node, from_frame, /* is_ip4 */ 0);
-}
-
-static char *vxlan_gbp_error_strings[] = {
-#define vxlan_gbp_error(n,s) s,
-#include <vnet/vxlan-gbp/vxlan_gbp_error.def>
-#undef vxlan_gbp_error
-#undef _
-};
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (vxlan4_gbp_input_node) =
-{
- .name = "vxlan4-gbp-input",
- .vector_size = sizeof (u32),
- .n_errors = VXLAN_GBP_N_ERROR,
- .error_strings = vxlan_gbp_error_strings,
- .n_next_nodes = VXLAN_GBP_INPUT_N_NEXT,
- .format_trace = format_vxlan_gbp_rx_trace,
- .next_nodes = {
-#define _(s,n) [VXLAN_GBP_INPUT_NEXT_##s] = n,
- foreach_vxlan_gbp_input_next
-#undef _
- },
-};
-
-VLIB_REGISTER_NODE (vxlan6_gbp_input_node) =
-{
- .name = "vxlan6-gbp-input",
- .vector_size = sizeof (u32),
- .n_errors = VXLAN_GBP_N_ERROR,
- .error_strings = vxlan_gbp_error_strings,
- .n_next_nodes = VXLAN_GBP_INPUT_N_NEXT,
- .next_nodes = {
-#define _(s,n) [VXLAN_GBP_INPUT_NEXT_##s] = n,
- foreach_vxlan_gbp_input_next
-#undef _
- },
- .format_trace = format_vxlan_gbp_rx_trace,
-};
-/* *INDENT-ON* */
-
-typedef enum
-{
- IP_VXLAN_GBP_BYPASS_NEXT_DROP,
- IP_VXLAN_GBP_BYPASS_NEXT_VXLAN_GBP,
- IP_VXLAN_GBP_BYPASS_N_NEXT,
-} ip_vxlan_gbp_bypass_next_t;
-
-always_inline uword
-ip_vxlan_gbp_bypass_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, u32 is_ip4)
-{
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
- u32 *from, *to_next, n_left_from, n_left_to_next, next_index;
- vlib_node_runtime_t *error_node =
- vlib_node_get_runtime (vm, ip4_input_node.index);
- ip4_address_t addr4; /* last IPv4 address matching a local VTEP address */
- ip6_address_t addr6; /* last IPv6 address matching a local VTEP address */
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
-
- if (node->flags & VLIB_NODE_FLAG_TRACE)
- ip4_forward_next_trace (vm, node, frame, VLIB_TX);
-
- if (is_ip4)
- addr4.data_u32 = ~0;
- else
- ip6_address_set_zero (&addr6);
-
- while (n_left_from > 0)
- {
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- vlib_buffer_t *b0, *b1;
- ip4_header_t *ip40, *ip41;
- ip6_header_t *ip60, *ip61;
- udp_header_t *udp0, *udp1;
- u32 bi0, ip_len0, udp_len0, flags0, next0;
- u32 bi1, ip_len1, udp_len1, flags1, next1;
- i32 len_diff0, len_diff1;
- u8 error0, good_udp0, proto0;
- u8 error1, good_udp1, proto1;
-
- /* Prefetch next iteration. */
- {
- vlib_buffer_t *p2, *p3;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
-
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
-
- CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- }
-
- bi0 = to_next[0] = from[0];
- bi1 = to_next[1] = from[1];
- from += 2;
- n_left_from -= 2;
- to_next += 2;
- n_left_to_next -= 2;
-
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
- if (is_ip4)
- {
- ip40 = vlib_buffer_get_current (b0);
- ip41 = vlib_buffer_get_current (b1);
- }
- else
- {
- ip60 = vlib_buffer_get_current (b0);
- ip61 = vlib_buffer_get_current (b1);
- }
-
- /* Setup packet for next IP feature */
- vnet_feature_next (&next0, b0);
- vnet_feature_next (&next1, b1);
-
- if (is_ip4)
- {
- /* Treat IP frag packets as "experimental" protocol for now
- until support of IP frag reassembly is implemented */
- proto0 = ip4_is_fragment (ip40) ? 0xfe : ip40->protocol;
- proto1 = ip4_is_fragment (ip41) ? 0xfe : ip41->protocol;
- }
- else
- {
- proto0 = ip60->protocol;
- proto1 = ip61->protocol;
- }
-
- /* Process packet 0 */
- if (proto0 != IP_PROTOCOL_UDP)
- goto exit0; /* not UDP packet */
-
- if (is_ip4)
- udp0 = ip4_next_header (ip40);
- else
- udp0 = ip6_next_header (ip60);
-
- if (udp0->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_vxlan_gbp))
- goto exit0; /* not VXLAN_GBP packet */
-
- /* Validate DIP against VTEPs */
- if (is_ip4)
- {
- if (addr4.as_u32 != ip40->dst_address.as_u32)
- {
- if (!hash_get (vxm->vtep4, ip40->dst_address.as_u32))
- goto exit0; /* no local VTEP for VXLAN_GBP packet */
- addr4 = ip40->dst_address;
- }
- }
- else
- {
- if (!ip6_address_is_equal (&addr6, &ip60->dst_address))
- {
- if (!hash_get_mem (vxm->vtep6, &ip60->dst_address))
- goto exit0; /* no local VTEP for VXLAN_GBP packet */
- addr6 = ip60->dst_address;
- }
- }
-
- flags0 = b0->flags;
- good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
-
- /* Don't verify UDP checksum for packets with explicit zero checksum. */
- good_udp0 |= udp0->checksum == 0;
-
- /* Verify UDP length */
- if (is_ip4)
- ip_len0 = clib_net_to_host_u16 (ip40->length);
- else
- ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
- udp_len0 = clib_net_to_host_u16 (udp0->length);
- len_diff0 = ip_len0 - udp_len0;
-
- /* Verify UDP checksum */
- if (PREDICT_FALSE (!good_udp0))
- {
- if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
- {
- if (is_ip4)
- flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
- else
- flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
- good_udp0 =
- (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
- }
- }
-
- if (is_ip4)
- {
- error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
- error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
- }
- else
- {
- error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
- error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
- }
-
- next0 = error0 ?
- IP_VXLAN_GBP_BYPASS_NEXT_DROP :
- IP_VXLAN_GBP_BYPASS_NEXT_VXLAN_GBP;
- b0->error = error0 ? error_node->errors[error0] : 0;
-
- /* vxlan-gbp-input node expect current at VXLAN_GBP header */
- if (is_ip4)
- vlib_buffer_advance (b0,
- sizeof (ip4_header_t) +
- sizeof (udp_header_t));
- else
- vlib_buffer_advance (b0,
- sizeof (ip6_header_t) +
- sizeof (udp_header_t));
-
- exit0:
- /* Process packet 1 */
- if (proto1 != IP_PROTOCOL_UDP)
- goto exit1; /* not UDP packet */
-
- if (is_ip4)
- udp1 = ip4_next_header (ip41);
- else
- udp1 = ip6_next_header (ip61);
-
- if (udp1->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_vxlan_gbp))
- goto exit1; /* not VXLAN_GBP packet */
-
- /* Validate DIP against VTEPs */
- if (is_ip4)
- {
- if (addr4.as_u32 != ip41->dst_address.as_u32)
- {
- if (!hash_get (vxm->vtep4, ip41->dst_address.as_u32))
- goto exit1; /* no local VTEP for VXLAN_GBP packet */
- addr4 = ip41->dst_address;
- }
- }
- else
- {
- if (!ip6_address_is_equal (&addr6, &ip61->dst_address))
- {
- if (!hash_get_mem (vxm->vtep6, &ip61->dst_address))
- goto exit1; /* no local VTEP for VXLAN_GBP packet */
- addr6 = ip61->dst_address;
- }
- }
-
- flags1 = b1->flags;
- good_udp1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
-
- /* Don't verify UDP checksum for packets with explicit zero checksum. */
- good_udp1 |= udp1->checksum == 0;
-
- /* Verify UDP length */
- if (is_ip4)
- ip_len1 = clib_net_to_host_u16 (ip41->length);
- else
- ip_len1 = clib_net_to_host_u16 (ip61->payload_length);
- udp_len1 = clib_net_to_host_u16 (udp1->length);
- len_diff1 = ip_len1 - udp_len1;
-
- /* Verify UDP checksum */
- if (PREDICT_FALSE (!good_udp1))
- {
- if ((flags1 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
- {
- if (is_ip4)
- flags1 = ip4_tcp_udp_validate_checksum (vm, b1);
- else
- flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, b1);
- good_udp1 =
- (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
- }
- }
-
- if (is_ip4)
- {
- error1 = good_udp1 ? 0 : IP4_ERROR_UDP_CHECKSUM;
- error1 = (len_diff1 >= 0) ? error1 : IP4_ERROR_UDP_LENGTH;
- }
- else
- {
- error1 = good_udp1 ? 0 : IP6_ERROR_UDP_CHECKSUM;
- error1 = (len_diff1 >= 0) ? error1 : IP6_ERROR_UDP_LENGTH;
- }
-
- next1 = error1 ?
- IP_VXLAN_GBP_BYPASS_NEXT_DROP :
- IP_VXLAN_GBP_BYPASS_NEXT_VXLAN_GBP;
- b1->error = error1 ? error_node->errors[error1] : 0;
-
- /* vxlan_gbp-input node expect current at VXLAN_GBP header */
- if (is_ip4)
- vlib_buffer_advance (b1,
- sizeof (ip4_header_t) +
- sizeof (udp_header_t));
- else
- vlib_buffer_advance (b1,
- sizeof (ip6_header_t) +
- sizeof (udp_header_t));
-
- exit1:
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- vlib_buffer_t *b0;
- ip4_header_t *ip40;
- ip6_header_t *ip60;
- udp_header_t *udp0;
- u32 bi0, ip_len0, udp_len0, flags0, next0;
- i32 len_diff0;
- u8 error0, good_udp0, proto0;
-
- bi0 = to_next[0] = from[0];
- from += 1;
- n_left_from -= 1;
- to_next += 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- if (is_ip4)
- ip40 = vlib_buffer_get_current (b0);
- else
- ip60 = vlib_buffer_get_current (b0);
-
- /* Setup packet for next IP feature */
- vnet_feature_next (&next0, b0);
-
- if (is_ip4)
- /* Treat IP4 frag packets as "experimental" protocol for now
- until support of IP frag reassembly is implemented */
- proto0 = ip4_is_fragment (ip40) ? 0xfe : ip40->protocol;
- else
- proto0 = ip60->protocol;
-
- if (proto0 != IP_PROTOCOL_UDP)
- goto exit; /* not UDP packet */
-
- if (is_ip4)
- udp0 = ip4_next_header (ip40);
- else
- udp0 = ip6_next_header (ip60);
-
- if (udp0->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_vxlan_gbp))
- goto exit; /* not VXLAN_GBP packet */
-
- /* Validate DIP against VTEPs */
- if (is_ip4)
- {
- if (addr4.as_u32 != ip40->dst_address.as_u32)
- {
- if (!hash_get (vxm->vtep4, ip40->dst_address.as_u32))
- goto exit; /* no local VTEP for VXLAN_GBP packet */
- addr4 = ip40->dst_address;
- }
- }
- else
- {
- if (!ip6_address_is_equal (&addr6, &ip60->dst_address))
- {
- if (!hash_get_mem (vxm->vtep6, &ip60->dst_address))
- goto exit; /* no local VTEP for VXLAN_GBP packet */
- addr6 = ip60->dst_address;
- }
- }
-
- flags0 = b0->flags;
- good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
-
- /* Don't verify UDP checksum for packets with explicit zero checksum. */
- good_udp0 |= udp0->checksum == 0;
-
- /* Verify UDP length */
- if (is_ip4)
- ip_len0 = clib_net_to_host_u16 (ip40->length);
- else
- ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
- udp_len0 = clib_net_to_host_u16 (udp0->length);
- len_diff0 = ip_len0 - udp_len0;
-
- /* Verify UDP checksum */
- if (PREDICT_FALSE (!good_udp0))
- {
- if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
- {
- if (is_ip4)
- flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
- else
- flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
- good_udp0 =
- (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
- }
- }
-
- if (is_ip4)
- {
- error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
- error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
- }
- else
- {
- error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
- error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
- }
-
- next0 = error0 ?
- IP_VXLAN_GBP_BYPASS_NEXT_DROP :
- IP_VXLAN_GBP_BYPASS_NEXT_VXLAN_GBP;
- b0->error = error0 ? error_node->errors[error0] : 0;
-
- /* vxlan_gbp-input node expect current at VXLAN_GBP header */
- if (is_ip4)
- vlib_buffer_advance (b0,
- sizeof (ip4_header_t) +
- sizeof (udp_header_t));
- else
- vlib_buffer_advance (b0,
- sizeof (ip6_header_t) +
- sizeof (udp_header_t));
-
- exit:
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return frame->n_vectors;
-}
-
-VLIB_NODE_FN (ip4_vxlan_gbp_bypass_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return ip_vxlan_gbp_bypass_inline (vm, node, frame, /* is_ip4 */ 1);
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip4_vxlan_gbp_bypass_node) =
-{
- .name = "ip4-vxlan-gbp-bypass",
- .vector_size = sizeof (u32),
- .n_next_nodes = IP_VXLAN_GBP_BYPASS_N_NEXT,
- .next_nodes = {
- [IP_VXLAN_GBP_BYPASS_NEXT_DROP] = "error-drop",
- [IP_VXLAN_GBP_BYPASS_NEXT_VXLAN_GBP] = "vxlan4-gbp-input",
- },
- .format_buffer = format_ip4_header,
- .format_trace = format_ip4_forward_next_trace,
-};
-/* *INDENT-ON* */
-
-#ifndef CLIB_MARCH_VARIANT
-/* Dummy init function to get us linked in. */
-clib_error_t *
-ip4_vxlan_gbp_bypass_init (vlib_main_t * vm)
-{
- return 0;
-}
-
-VLIB_INIT_FUNCTION (ip4_vxlan_gbp_bypass_init);
-#endif /* CLIB_MARCH_VARIANT */
-
-VLIB_NODE_FN (ip6_vxlan_gbp_bypass_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return ip_vxlan_gbp_bypass_inline (vm, node, frame, /* is_ip4 */ 0);
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip6_vxlan_gbp_bypass_node) =
-{
- .name = "ip6-vxlan-gbp-bypass",
- .vector_size = sizeof (u32),
- .n_next_nodes = IP_VXLAN_GBP_BYPASS_N_NEXT,
- .next_nodes = {
- [IP_VXLAN_GBP_BYPASS_NEXT_DROP] = "error-drop",
- [IP_VXLAN_GBP_BYPASS_NEXT_VXLAN_GBP] = "vxlan6-gbp-input",
- },
- .format_buffer = format_ip6_header,
- .format_trace = format_ip6_forward_next_trace,
-};
-/* *INDENT-ON* */
-
-#ifndef CLIB_MARCH_VARIANT
-/* Dummy init function to get us linked in. */
-clib_error_t *
-ip6_vxlan_gbp_bypass_init (vlib_main_t * vm)
-{
- return 0;
-}
-
-VLIB_INIT_FUNCTION (ip6_vxlan_gbp_bypass_init);
-#endif /* CLIB_MARCH_VARIANT */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan-gbp/dir.dox b/src/vnet/vxlan-gbp/dir.dox
deleted file mode 100644
index 6e63c90b17b..00000000000
--- a/src/vnet/vxlan-gbp/dir.dox
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
-@dir
-@brief VXLAN-GBP Code.
-
-This directory contains source code to support VXLAN-GBP.
-
-*/
-/*? %%clicmd:group_label VXLAN-GBP CLI %% ?*/
diff --git a/src/vnet/vxlan-gbp/encap.c b/src/vnet/vxlan-gbp/encap.c
deleted file mode 100644
index 2a4e8a8e312..00000000000
--- a/src/vnet/vxlan-gbp/encap.c
+++ /dev/null
@@ -1,601 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <vppinfra/error.h>
-#include <vppinfra/hash.h>
-#include <vnet/vnet.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/interface_output.h>
-#include <vnet/vxlan-gbp/vxlan_gbp.h>
-#include <vnet/qos/qos_types.h>
-#include <vnet/adj/rewrite.h>
-
-/* Statistics (not all errors) */
-#define foreach_vxlan_gbp_encap_error \
-_(ENCAPSULATED, "good packets encapsulated")
-
-static char *vxlan_gbp_encap_error_strings[] = {
-#define _(sym,string) string,
- foreach_vxlan_gbp_encap_error
-#undef _
-};
-
-typedef enum
-{
-#define _(sym,str) VXLAN_GBP_ENCAP_ERROR_##sym,
- foreach_vxlan_gbp_encap_error
-#undef _
- VXLAN_GBP_ENCAP_N_ERROR,
-} vxlan_gbp_encap_error_t;
-
-typedef enum
-{
- VXLAN_GBP_ENCAP_NEXT_DROP,
- VXLAN_GBP_ENCAP_N_NEXT,
-} vxlan_gbp_encap_next_t;
-
-typedef struct
-{
- u32 tunnel_index;
- u32 vni;
- u16 sclass;
- u8 flags;
-} vxlan_gbp_encap_trace_t;
-
-#ifndef CLIB_MARCH_VARIANT
-u8 *
-format_vxlan_gbp_encap_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- vxlan_gbp_encap_trace_t *t = va_arg (*args, vxlan_gbp_encap_trace_t *);
-
- s =
- format (s,
- "VXLAN_GBP encap to vxlan_gbp_tunnel%d vni %d sclass %d flags %U",
- t->tunnel_index, t->vni, t->sclass,
- format_vxlan_gbp_header_gpflags, t->flags);
- return s;
-}
-#endif /* CLIB_MARCH_VARIANT */
-
-always_inline uword
-vxlan_gbp_encap_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame, u8 is_ip4, u8 csum_offload)
-{
- u32 n_left_from, next_index, *from, *to_next;
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
- vnet_main_t *vnm = vxm->vnet_main;
- vnet_interface_main_t *im = &vnm->interface_main;
- vlib_combined_counter_main_t *tx_counter =
- im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX;
- u32 pkts_encapsulated = 0;
- u32 thread_index = vlib_get_thread_index ();
- u32 sw_if_index0 = 0, sw_if_index1 = 0;
- u32 next0 = 0, next1 = 0;
- vxlan_gbp_tunnel_t *t0 = NULL, *t1 = NULL;
- index_t dpoi_idx0 = INDEX_INVALID, dpoi_idx1 = INDEX_INVALID;
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
-
- from = vlib_frame_vector_args (from_frame);
- n_left_from = from_frame->n_vectors;
- vlib_get_buffers (vm, from, bufs, n_left_from);
-
- next_index = node->cached_next_index;
-
- STATIC_ASSERT_SIZEOF (ip6_vxlan_gbp_header_t, 56);
- STATIC_ASSERT_SIZEOF (ip4_vxlan_gbp_header_t, 36);
-
- u8 const underlay_hdr_len = is_ip4 ?
- sizeof (ip4_vxlan_gbp_header_t) : sizeof (ip6_vxlan_gbp_header_t);
- u16 const l3_len = is_ip4 ? sizeof (ip4_header_t) : sizeof (ip6_header_t);
- u32 const csum_flags =
- is_ip4 ? VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
- VNET_BUFFER_F_L4_HDR_OFFSET_VALID :
- VNET_BUFFER_F_IS_IP6 | VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
- VNET_BUFFER_F_L4_HDR_OFFSET_VALID;
- u32 const outer_packet_csum_offload_flags =
- is_ip4 ? VNET_BUFFER_OFFLOAD_F_IP_CKSUM | VNET_BUFFER_OFFLOAD_F_UDP_CKSUM :
- VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
- u32 const inner_packet_removed_flags =
- VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_IS_IP6 |
- VNET_BUFFER_F_L2_HDR_OFFSET_VALID | VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
- VNET_BUFFER_F_L4_HDR_OFFSET_VALID;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- /* Prefetch next iteration. */
- {
- vlib_buffer_t *p2, *p3;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
-
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
-
- CLIB_PREFETCH (b[2]->data - CLIB_CACHE_LINE_BYTES,
- 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (b[3]->data - CLIB_CACHE_LINE_BYTES,
- 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- }
-
- u32 bi0 = to_next[0] = from[0];
- u32 bi1 = to_next[1] = from[1];
- from += 2;
- to_next += 2;
- n_left_to_next -= 2;
- n_left_from -= 2;
-
- u32 or_flags = b[0]->flags | b[1]->flags;
- if (csum_offload && (or_flags & VNET_BUFFER_F_OFFLOAD))
- {
- /* Only calculate the non-GSO packet csum offload */
- if ((b[0]->flags & VNET_BUFFER_F_GSO) == 0)
- {
- vnet_calc_checksums_inline (vm, b[0],
- b[0]->flags &
- VNET_BUFFER_F_IS_IP4,
- b[0]->flags &
- VNET_BUFFER_F_IS_IP6);
- b[0]->flags &= ~inner_packet_removed_flags;
- }
- if ((b[1]->flags & VNET_BUFFER_F_GSO) == 0)
- {
- vnet_calc_checksums_inline (vm, b[1],
- b[1]->flags &
- VNET_BUFFER_F_IS_IP4,
- b[1]->flags &
- VNET_BUFFER_F_IS_IP6);
- b[1]->flags &= ~inner_packet_removed_flags;
- }
- }
-
- u32 flow_hash0 = vnet_l2_compute_flow_hash (b[0]);
- u32 flow_hash1 = vnet_l2_compute_flow_hash (b[1]);
-
- /* Get next node index and adj index from tunnel next_dpo */
- if (sw_if_index0 != vnet_buffer (b[0])->sw_if_index[VLIB_TX])
- {
- sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
- vnet_hw_interface_t *hi0 =
- vnet_get_sup_hw_interface (vnm, sw_if_index0);
- t0 = &vxm->tunnels[hi0->dev_instance];
- /* Note: change to always set next0 if it may set to drop */
- next0 = t0->next_dpo.dpoi_next_node;
- dpoi_idx0 = t0->next_dpo.dpoi_index;
- }
-
- /* Get next node index and adj index from tunnel next_dpo */
- if (sw_if_index1 != vnet_buffer (b[1])->sw_if_index[VLIB_TX])
- {
- if (sw_if_index0 == vnet_buffer (b[1])->sw_if_index[VLIB_TX])
- {
- sw_if_index1 = sw_if_index0;
- t1 = t0;
- next1 = next0;
- dpoi_idx1 = dpoi_idx0;
- }
- else
- {
- sw_if_index1 = vnet_buffer (b[1])->sw_if_index[VLIB_TX];
- vnet_hw_interface_t *hi1 =
- vnet_get_sup_hw_interface (vnm, sw_if_index1);
- t1 = &vxm->tunnels[hi1->dev_instance];
- /* Note: change to always set next1 if it may set to drop */
- next1 = t1->next_dpo.dpoi_next_node;
- dpoi_idx1 = t1->next_dpo.dpoi_index;
- }
- }
-
- vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpoi_idx0;
- vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpoi_idx1;
-
- ASSERT (t0->rewrite_header.data_bytes == underlay_hdr_len);
- ASSERT (t1->rewrite_header.data_bytes == underlay_hdr_len);
- vnet_rewrite_two_headers (*t0, *t1, vlib_buffer_get_current (b[0]),
- vlib_buffer_get_current (b[1]),
- underlay_hdr_len);
-
- vlib_buffer_advance (b[0], -underlay_hdr_len);
- vlib_buffer_advance (b[1], -underlay_hdr_len);
-
- u32 len0 = vlib_buffer_length_in_chain (vm, b[0]);
- u32 len1 = vlib_buffer_length_in_chain (vm, b[1]);
- u16 payload_l0 = clib_host_to_net_u16 (len0 - l3_len);
- u16 payload_l1 = clib_host_to_net_u16 (len1 - l3_len);
-
- void *underlay0 = vlib_buffer_get_current (b[0]);
- void *underlay1 = vlib_buffer_get_current (b[1]);
-
- ip4_header_t *ip4_0, *ip4_1;
- qos_bits_t ip4_0_tos = 0, ip4_1_tos = 0;
- ip6_header_t *ip6_0, *ip6_1;
- udp_header_t *udp0, *udp1;
- vxlan_gbp_header_t *vxlan_gbp0, *vxlan_gbp1;
- u8 *l3_0, *l3_1;
- if (is_ip4)
- {
- ip4_vxlan_gbp_header_t *hdr0 = underlay0;
- ip4_vxlan_gbp_header_t *hdr1 = underlay1;
-
- /* Fix the IP4 checksum and length */
- ip4_0 = &hdr0->ip4;
- ip4_1 = &hdr1->ip4;
- ip4_0->length = clib_host_to_net_u16 (len0);
- ip4_1->length = clib_host_to_net_u16 (len1);
-
- if (PREDICT_FALSE (b[0]->flags & VNET_BUFFER_F_QOS_DATA_VALID))
- {
- ip4_0_tos = vnet_buffer2 (b[0])->qos.bits;
- ip4_0->tos = ip4_0_tos;
- }
- if (PREDICT_FALSE (b[1]->flags & VNET_BUFFER_F_QOS_DATA_VALID))
- {
- ip4_1_tos = vnet_buffer2 (b[1])->qos.bits;
- ip4_1->tos = ip4_1_tos;
- }
-
- l3_0 = (u8 *) ip4_0;
- l3_1 = (u8 *) ip4_1;
- udp0 = &hdr0->udp;
- udp1 = &hdr1->udp;
- vxlan_gbp0 = &hdr0->vxlan_gbp;
- vxlan_gbp1 = &hdr1->vxlan_gbp;
- }
- else /* ipv6 */
- {
- ip6_vxlan_gbp_header_t *hdr0 = underlay0;
- ip6_vxlan_gbp_header_t *hdr1 = underlay1;
-
- /* Fix IP6 payload length */
- ip6_0 = &hdr0->ip6;
- ip6_1 = &hdr1->ip6;
- ip6_0->payload_length = payload_l0;
- ip6_1->payload_length = payload_l1;
-
- l3_0 = (u8 *) ip6_0;
- l3_1 = (u8 *) ip6_1;
- udp0 = &hdr0->udp;
- udp1 = &hdr1->udp;
- vxlan_gbp0 = &hdr0->vxlan_gbp;
- vxlan_gbp1 = &hdr1->vxlan_gbp;
- }
-
- /* Fix UDP length and set source port */
- udp0->length = payload_l0;
- udp0->src_port = flow_hash0;
- udp1->length = payload_l1;
- udp1->src_port = flow_hash1;
-
- /* set source class and gpflags */
- vxlan_gbp0->gpflags = vnet_buffer2 (b[0])->gbp.flags;
- vxlan_gbp1->gpflags = vnet_buffer2 (b[1])->gbp.flags;
- vxlan_gbp0->sclass =
- clib_host_to_net_u16 (vnet_buffer2 (b[0])->gbp.sclass);
- vxlan_gbp1->sclass =
- clib_host_to_net_u16 (vnet_buffer2 (b[1])->gbp.sclass);
-
- if (csum_offload)
- {
- b[0]->flags |= csum_flags;
- vnet_buffer (b[0])->l3_hdr_offset = l3_0 - b[0]->data;
- vnet_buffer (b[0])->l4_hdr_offset = (u8 *) udp0 - b[0]->data;
- vnet_buffer_offload_flags_set (b[0],
- outer_packet_csum_offload_flags);
- b[1]->flags |= csum_flags;
- vnet_buffer (b[1])->l3_hdr_offset = l3_1 - b[1]->data;
- vnet_buffer (b[1])->l4_hdr_offset = (u8 *) udp1 - b[1]->data;
- vnet_buffer_offload_flags_set (b[1],
- outer_packet_csum_offload_flags);
- }
- /* IPv4 UDP checksum only if checksum offload is used */
- else if (is_ip4)
- {
- ip_csum_t sum0 = ip4_0->checksum;
- sum0 = ip_csum_update (sum0, 0, ip4_0->length, ip4_header_t,
- length /* changed member */ );
- if (PREDICT_FALSE (ip4_0_tos))
- {
- sum0 = ip_csum_update (sum0, 0, ip4_0_tos, ip4_header_t,
- tos /* changed member */ );
- }
- ip4_0->checksum = ip_csum_fold (sum0);
- ip_csum_t sum1 = ip4_1->checksum;
- sum1 = ip_csum_update (sum1, 0, ip4_1->length, ip4_header_t,
- length /* changed member */ );
- if (PREDICT_FALSE (ip4_1_tos))
- {
- sum1 = ip_csum_update (sum1, 0, ip4_1_tos, ip4_header_t,
- tos /* changed member */ );
- }
- ip4_1->checksum = ip_csum_fold (sum1);
- }
- /* IPv6 UDP checksum is mandatory */
- else
- {
- int bogus = 0;
-
- udp0->checksum = ip6_tcp_udp_icmp_compute_checksum
- (vm, b[0], ip6_0, &bogus);
- ASSERT (bogus == 0);
- if (udp0->checksum == 0)
- udp0->checksum = 0xffff;
- udp1->checksum = ip6_tcp_udp_icmp_compute_checksum
- (vm, b[1], ip6_1, &bogus);
- ASSERT (bogus == 0);
- if (udp1->checksum == 0)
- udp1->checksum = 0xffff;
- }
-
- /* save inner packet flow_hash for load-balance node */
- vnet_buffer (b[0])->ip.flow_hash = flow_hash0;
- vnet_buffer (b[1])->ip.flow_hash = flow_hash1;
-
- vlib_increment_combined_counter (tx_counter, thread_index,
- sw_if_index0, 1, len0);
- vlib_increment_combined_counter (tx_counter, thread_index,
- sw_if_index1, 1, len1);
- pkts_encapsulated += 2;
-
- if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_gbp_encap_trace_t *tr =
- vlib_add_trace (vm, node, b[0], sizeof (*tr));
- tr->tunnel_index = t0 - vxm->tunnels;
- tr->vni = t0->vni;
- tr->sclass = vnet_buffer2 (b[0])->gbp.sclass;
- tr->flags = vnet_buffer2 (b[0])->gbp.flags;
- }
-
- if (PREDICT_FALSE (b[1]->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_gbp_encap_trace_t *tr =
- vlib_add_trace (vm, node, b[1], sizeof (*tr));
- tr->tunnel_index = t1 - vxm->tunnels;
- tr->vni = t1->vni;
- tr->sclass = vnet_buffer2 (b[1])->gbp.sclass;
- tr->flags = vnet_buffer2 (b[1])->gbp.flags;
- }
- b += 2;
-
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0 = to_next[0] = from[0];
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- if (csum_offload && (b[0]->flags & VNET_BUFFER_F_OFFLOAD))
- {
- /* Only calculate the non-GSO packet csum offload */
- if ((b[0]->flags & VNET_BUFFER_F_GSO) == 0)
- {
- vnet_calc_checksums_inline (vm, b[0],
- b[0]->flags &
- VNET_BUFFER_F_IS_IP4,
- b[0]->flags &
- VNET_BUFFER_F_IS_IP6);
- b[0]->flags &= ~inner_packet_removed_flags;
- }
- }
-
- u32 flow_hash0 = vnet_l2_compute_flow_hash (b[0]);
-
- /* Get next node index and adj index from tunnel next_dpo */
- if (sw_if_index0 != vnet_buffer (b[0])->sw_if_index[VLIB_TX])
- {
- sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
- vnet_hw_interface_t *hi0 =
- vnet_get_sup_hw_interface (vnm, sw_if_index0);
- t0 = &vxm->tunnels[hi0->dev_instance];
- /* Note: change to always set next0 if it may be set to drop */
- next0 = t0->next_dpo.dpoi_next_node;
- dpoi_idx0 = t0->next_dpo.dpoi_index;
- }
- vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpoi_idx0;
-
- ASSERT (t0->rewrite_header.data_bytes == underlay_hdr_len);
- vnet_rewrite_one_header (*t0, vlib_buffer_get_current (b[0]),
- underlay_hdr_len);
-
- vlib_buffer_advance (b[0], -underlay_hdr_len);
- void *underlay0 = vlib_buffer_get_current (b[0]);
-
- u32 len0 = vlib_buffer_length_in_chain (vm, b[0]);
- u16 payload_l0 = clib_host_to_net_u16 (len0 - l3_len);
-
- vxlan_gbp_header_t *vxlan_gbp0;
- udp_header_t *udp0;
- ip4_header_t *ip4_0;
- qos_bits_t ip4_0_tos = 0;
- ip6_header_t *ip6_0;
- u8 *l3_0;
- if (is_ip4)
- {
- ip4_vxlan_gbp_header_t *hdr = underlay0;
-
- /* Fix the IP4 checksum and length */
- ip4_0 = &hdr->ip4;
- ip4_0->length = clib_host_to_net_u16 (len0);
-
- if (PREDICT_FALSE (b[0]->flags & VNET_BUFFER_F_QOS_DATA_VALID))
- {
- ip4_0_tos = vnet_buffer2 (b[0])->qos.bits;
- ip4_0->tos = ip4_0_tos;
- }
-
- l3_0 = (u8 *) ip4_0;
- udp0 = &hdr->udp;
- vxlan_gbp0 = &hdr->vxlan_gbp;
- }
- else /* ip6 path */
- {
- ip6_vxlan_gbp_header_t *hdr = underlay0;
-
- /* Fix IP6 payload length */
- ip6_0 = &hdr->ip6;
- ip6_0->payload_length = payload_l0;
-
- l3_0 = (u8 *) ip6_0;
- udp0 = &hdr->udp;
- vxlan_gbp0 = &hdr->vxlan_gbp;
- }
-
- /* Fix UDP length and set source port */
- udp0->length = payload_l0;
- udp0->src_port = flow_hash0;
-
- /* set source class and gpflags */
- vxlan_gbp0->gpflags = vnet_buffer2 (b[0])->gbp.flags;
- vxlan_gbp0->sclass =
- clib_host_to_net_u16 (vnet_buffer2 (b[0])->gbp.sclass);
-
- if (csum_offload)
- {
- b[0]->flags |= csum_flags;
- vnet_buffer (b[0])->l3_hdr_offset = l3_0 - b[0]->data;
- vnet_buffer (b[0])->l4_hdr_offset = (u8 *) udp0 - b[0]->data;
- vnet_buffer_offload_flags_set (b[0],
- outer_packet_csum_offload_flags);
- }
- /* IPv4 UDP checksum only if checksum offload is used */
- else if (is_ip4)
- {
- ip_csum_t sum0 = ip4_0->checksum;
- sum0 = ip_csum_update (sum0, 0, ip4_0->length, ip4_header_t,
- length /* changed member */ );
- if (PREDICT_FALSE (ip4_0_tos))
- {
- sum0 = ip_csum_update (sum0, 0, ip4_0_tos, ip4_header_t,
- tos /* changed member */ );
- }
- ip4_0->checksum = ip_csum_fold (sum0);
- }
- /* IPv6 UDP checksum is mandatory */
- else
- {
- int bogus = 0;
-
- udp0->checksum = ip6_tcp_udp_icmp_compute_checksum
- (vm, b[0], ip6_0, &bogus);
- ASSERT (bogus == 0);
- if (udp0->checksum == 0)
- udp0->checksum = 0xffff;
- }
-
- /* save inner packet flow_hash for load-balance node */
- vnet_buffer (b[0])->ip.flow_hash = flow_hash0;
-
- vlib_increment_combined_counter (tx_counter, thread_index,
- sw_if_index0, 1, len0);
- pkts_encapsulated++;
-
- if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_gbp_encap_trace_t *tr =
- vlib_add_trace (vm, node, b[0], sizeof (*tr));
- tr->tunnel_index = t0 - vxm->tunnels;
- tr->vni = t0->vni;
- tr->sclass = vnet_buffer2 (b[0])->gbp.sclass;
- tr->flags = vnet_buffer2 (b[0])->gbp.flags;
- }
- b += 1;
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- /* Do we still need this now that tunnel tx stats is kept? */
- vlib_node_increment_counter (vm, node->node_index,
- VXLAN_GBP_ENCAP_ERROR_ENCAPSULATED,
- pkts_encapsulated);
-
- return from_frame->n_vectors;
-}
-
-VLIB_NODE_FN (vxlan4_gbp_encap_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- /* Disable chksum offload as setup overhead in tx node is not worthwhile
- for ip4 header checksum only, unless udp checksum is also required */
- return vxlan_gbp_encap_inline (vm, node, from_frame, /* is_ip4 */ 1,
- /* csum_offload */ 0);
-}
-
-VLIB_NODE_FN (vxlan6_gbp_encap_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- /* Enable checksum offload for ip6 as udp checksum is mandatory, */
- return vxlan_gbp_encap_inline (vm, node, from_frame, /* is_ip4 */ 0,
- /* csum_offload */ 1);
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (vxlan4_gbp_encap_node) =
-{
- .name = "vxlan4-gbp-encap",
- .vector_size = sizeof (u32),
- .format_trace = format_vxlan_gbp_encap_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN (vxlan_gbp_encap_error_strings),
- .error_strings = vxlan_gbp_encap_error_strings,
- .n_next_nodes = VXLAN_GBP_ENCAP_N_NEXT,
- .next_nodes = {
- [VXLAN_GBP_ENCAP_NEXT_DROP] = "error-drop",
- },
-};
-
-VLIB_REGISTER_NODE (vxlan6_gbp_encap_node) =
-{
- .name = "vxlan6-gbp-encap",
- .vector_size = sizeof (u32),
- .format_trace = format_vxlan_gbp_encap_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN (vxlan_gbp_encap_error_strings),
- .error_strings = vxlan_gbp_encap_error_strings,
- .n_next_nodes = VXLAN_GBP_ENCAP_N_NEXT,
- .next_nodes = {
- [VXLAN_GBP_ENCAP_NEXT_DROP] = "error-drop",
- },
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan-gbp/vxlan_gbp.api b/src/vnet/vxlan-gbp/vxlan_gbp.api
deleted file mode 100644
index 68566697000..00000000000
--- a/src/vnet/vxlan-gbp/vxlan_gbp.api
+++ /dev/null
@@ -1,100 +0,0 @@
-/* Hey Emacs use -*- mode: C -*- */
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-option version = "1.1.1";
-import "vnet/ip/ip_types.api";
-import "vnet/interface_types.api";
-
-enum vxlan_gbp_api_tunnel_mode
-{
- VXLAN_GBP_API_TUNNEL_MODE_L2,
- VXLAN_GBP_API_TUNNEL_MODE_L3,
-};
-
-/** \brief Definition of a VXLAN GBP tunnel
- @param instance - optional unique custom device instance, else ~0.
- @param src - Source IP address
- @param dst - Destination IP address, can be multicast
- @param mcast_sw_if_index - Interface for multicast destination
- @param encap_table_id - Encap route table
- @param vni - The VXLAN Network Identifier, uint24
- @param sw_ifindex - Ignored in add message, set in details
-*/
-typedef vxlan_gbp_tunnel
-{
- u32 instance;
- vl_api_address_t src;
- vl_api_address_t dst;
- vl_api_interface_index_t mcast_sw_if_index;
- u32 encap_table_id;
- u32 vni;
- vl_api_interface_index_t sw_if_index;
- vl_api_vxlan_gbp_api_tunnel_mode_t mode;
-};
-
-/** \brief Create or delete a VXLAN-GBP tunnel
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param is_add - Use 1 to create the tunnel, 0 to remove it
-*/
-define vxlan_gbp_tunnel_add_del
-{
- u32 client_index;
- u32 context;
- bool is_add [default=true];
- vl_api_vxlan_gbp_tunnel_t tunnel;
- option in_progress;
-};
-
-define vxlan_gbp_tunnel_add_del_reply
-{
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
- option in_progress;
-};
-
-define vxlan_gbp_tunnel_dump
-{
- u32 client_index;
- u32 context;
- vl_api_interface_index_t sw_if_index [default=0xffffffff];
- option in_progress;
-};
-
-define vxlan_gbp_tunnel_details
-{
- u32 context;
- vl_api_vxlan_gbp_tunnel_t tunnel;
- option in_progress;
-};
-
-/** \brief Interface set vxlan-bypass request
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param sw_if_index - interface used to reach neighbor
- @param is_ipv6 - if non-zero, enable ipv6-vxlan-bypass, else ipv4-vxlan-bypass
- @param enable - if non-zero enable, else disable
-*/
-autoreply define sw_interface_set_vxlan_gbp_bypass
-{
- u32 client_index;
- u32 context;
- vl_api_interface_index_t sw_if_index;
- bool is_ipv6;
- bool enable [default=true];
- option in_progress;
-};
diff --git a/src/vnet/vxlan-gbp/vxlan_gbp.c b/src/vnet/vxlan-gbp/vxlan_gbp.c
deleted file mode 100644
index eb685b8a40c..00000000000
--- a/src/vnet/vxlan-gbp/vxlan_gbp.c
+++ /dev/null
@@ -1,1193 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <vnet/vxlan-gbp/vxlan_gbp.h>
-#include <vnet/ip/format.h>
-#include <vnet/ip/punt.h>
-#include <vnet/fib/fib_entry.h>
-#include <vnet/fib/fib_table.h>
-#include <vnet/fib/fib_entry_track.h>
-#include <vnet/mfib/mfib_table.h>
-#include <vnet/adj/adj_mcast.h>
-#include <vnet/adj/rewrite.h>
-#include <vnet/interface.h>
-#include <vlib/vlib.h>
-
-/**
- * @file
- * @brief VXLAN GBP.
- *
- * VXLAN GBP provides the features of vxlan and carry group policy id.
- */
-static vlib_punt_hdl_t punt_hdl;
-
-vxlan_gbp_main_t vxlan_gbp_main;
-
-u8 *
-format_vxlan_gbp_tunnel_mode (u8 * s, va_list * args)
-{
- vxlan_gbp_tunnel_mode_t mode = va_arg (*args, vxlan_gbp_tunnel_mode_t);
-
- switch (mode)
- {
- case VXLAN_GBP_TUNNEL_MODE_L2:
- s = format (s, "L2");
- break;
- case VXLAN_GBP_TUNNEL_MODE_L3:
- s = format (s, "L3");
- break;
- }
- return (s);
-}
-
-u8 *
-format_vxlan_gbp_tunnel (u8 * s, va_list * args)
-{
- vxlan_gbp_tunnel_t *t = va_arg (*args, vxlan_gbp_tunnel_t *);
-
- s = format (s,
- "[%d] instance %d src %U dst %U vni %d fib-idx %d"
- " sw-if-idx %d mode %U ",
- t->dev_instance, t->user_instance,
- format_ip46_address, &t->src, IP46_TYPE_ANY,
- format_ip46_address, &t->dst, IP46_TYPE_ANY,
- t->vni, t->encap_fib_index, t->sw_if_index,
- format_vxlan_gbp_tunnel_mode, t->mode);
-
- s = format (s, "encap-dpo-idx %d ", t->next_dpo.dpoi_index);
-
- if (PREDICT_FALSE (ip46_address_is_multicast (&t->dst)))
- s = format (s, "mcast-sw-if-idx %d ", t->mcast_sw_if_index);
-
- return s;
-}
-
-static u8 *
-format_vxlan_gbp_name (u8 * s, va_list * args)
-{
- u32 dev_instance = va_arg (*args, u32);
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
- vxlan_gbp_tunnel_t *t;
-
- if (dev_instance == ~0)
- return format (s, "<cached-unused>");
-
- if (dev_instance >= vec_len (vxm->tunnels))
- return format (s, "<improperly-referenced>");
-
- t = pool_elt_at_index (vxm->tunnels, dev_instance);
-
- return format (s, "vxlan_gbp_tunnel%d", t->user_instance);
-}
-
-static clib_error_t *
-vxlan_gbp_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
- u32 flags)
-{
- u32 hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
- VNET_HW_INTERFACE_FLAG_LINK_UP : 0;
- vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
-
- return /* no error */ 0;
-}
-
-/* *INDENT-OFF* */
-VNET_DEVICE_CLASS (vxlan_gbp_device_class, static) = {
- .name = "VXLAN-GBP",
- .format_device_name = format_vxlan_gbp_name,
- .format_tx_trace = format_vxlan_gbp_encap_trace,
- .admin_up_down_function = vxlan_gbp_interface_admin_up_down,
-};
-/* *INDENT-ON* */
-
-static u8 *
-format_vxlan_gbp_header_with_length (u8 * s, va_list * args)
-{
- u32 dev_instance = va_arg (*args, u32);
- s = format (s, "unimplemented dev %u", dev_instance);
- return s;
-}
-
-/* *INDENT-OFF* */
-VNET_HW_INTERFACE_CLASS (vxlan_gbp_hw_class) = {
- .name = "VXLAN-GBP",
- .format_header = format_vxlan_gbp_header_with_length,
- .build_rewrite = default_build_rewrite,
-};
-/* *INDENT-ON* */
-
-static void
-vxlan_gbp_tunnel_restack_dpo (vxlan_gbp_tunnel_t * t)
-{
- u8 is_ip4 = ip46_address_is_ip4 (&t->dst);
- dpo_id_t dpo = DPO_INVALID;
- fib_forward_chain_type_t forw_type = is_ip4 ?
- FIB_FORW_CHAIN_TYPE_UNICAST_IP4 : FIB_FORW_CHAIN_TYPE_UNICAST_IP6;
-
- fib_entry_contribute_forwarding (t->fib_entry_index, forw_type, &dpo);
-
- /* vxlan_gbp uses the payload hash as the udp source port
- * hence the packet's hash is unknown
- * skip single bucket load balance dpo's */
- while (DPO_LOAD_BALANCE == dpo.dpoi_type)
- {
- load_balance_t *lb = load_balance_get (dpo.dpoi_index);
- if (lb->lb_n_buckets > 1)
- break;
-
- dpo_copy (&dpo, load_balance_get_bucket_i (lb, 0));
- }
-
- u32 encap_index = is_ip4 ?
- vxlan4_gbp_encap_node.index : vxlan6_gbp_encap_node.index;
- dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
- dpo_reset (&dpo);
-}
-
-static vxlan_gbp_tunnel_t *
-vxlan_gbp_tunnel_from_fib_node (fib_node_t * node)
-{
- ASSERT (FIB_NODE_TYPE_VXLAN_GBP_TUNNEL == node->fn_type);
- return ((vxlan_gbp_tunnel_t *) (((char *) node) -
- STRUCT_OFFSET_OF (vxlan_gbp_tunnel_t,
- node)));
-}
-
-/**
- * Function definition to backwalk a FIB node -
- * Here we will restack the new dpo of VXLAN DIP to encap node.
- */
-static fib_node_back_walk_rc_t
-vxlan_gbp_tunnel_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx)
-{
- vxlan_gbp_tunnel_restack_dpo (vxlan_gbp_tunnel_from_fib_node (node));
- return (FIB_NODE_BACK_WALK_CONTINUE);
-}
-
-/**
- * Function definition to get a FIB node from its index
- */
-static fib_node_t *
-vxlan_gbp_tunnel_fib_node_get (fib_node_index_t index)
-{
- vxlan_gbp_tunnel_t *t;
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
-
- t = pool_elt_at_index (vxm->tunnels, index);
-
- return (&t->node);
-}
-
-/**
- * Function definition to inform the FIB node that its last lock has gone.
- */
-static void
-vxlan_gbp_tunnel_last_lock_gone (fib_node_t * node)
-{
- /*
- * The VXLAN GBP tunnel is a root of the graph. As such
- * it never has children and thus is never locked.
- */
- ASSERT (0);
-}
-
-/*
- * Virtual function table registered by VXLAN GBP tunnels
- * for participation in the FIB object graph.
- */
-const static fib_node_vft_t vxlan_gbp_vft = {
- .fnv_get = vxlan_gbp_tunnel_fib_node_get,
- .fnv_last_lock = vxlan_gbp_tunnel_last_lock_gone,
- .fnv_back_walk = vxlan_gbp_tunnel_back_walk,
-};
-
-
-#define foreach_copy_field \
-_(vni) \
-_(mode) \
-_(mcast_sw_if_index) \
-_(encap_fib_index) \
-_(src) \
-_(dst)
-
-static void
-vxlan_gbp_rewrite (vxlan_gbp_tunnel_t * t, bool is_ip6)
-{
- union
- {
- ip4_vxlan_gbp_header_t h4;
- ip6_vxlan_gbp_header_t h6;
- } h;
- int len = is_ip6 ? sizeof h.h6 : sizeof h.h4;
-
- udp_header_t *udp;
- vxlan_gbp_header_t *vxlan_gbp;
- /* Fixed portion of the (outer) ip header */
-
- clib_memset (&h, 0, sizeof (h));
- if (!is_ip6)
- {
- ip4_header_t *ip = &h.h4.ip4;
- udp = &h.h4.udp, vxlan_gbp = &h.h4.vxlan_gbp;
- ip->ip_version_and_header_length = 0x45;
- ip->ttl = 254;
- ip->protocol = IP_PROTOCOL_UDP;
-
- ip->src_address = t->src.ip4;
- ip->dst_address = t->dst.ip4;
-
- /* we fix up the ip4 header length and checksum after-the-fact */
- ip->checksum = ip4_header_checksum (ip);
- }
- else
- {
- ip6_header_t *ip = &h.h6.ip6;
- udp = &h.h6.udp, vxlan_gbp = &h.h6.vxlan_gbp;
- ip->ip_version_traffic_class_and_flow_label =
- clib_host_to_net_u32 (6 << 28);
- ip->hop_limit = 255;
- ip->protocol = IP_PROTOCOL_UDP;
-
- ip->src_address = t->src.ip6;
- ip->dst_address = t->dst.ip6;
- }
-
- /* UDP header, randomize src port on something, maybe? */
- udp->src_port = clib_host_to_net_u16 (47789);
- udp->dst_port = clib_host_to_net_u16 (UDP_DST_PORT_vxlan_gbp);
-
- /* VXLAN header */
- vxlan_gbp_set_header (vxlan_gbp, t->vni);
- vnet_rewrite_set_data (*t, &h, len);
-}
-
-static uword
-vtep_addr_ref (ip46_address_t * ip)
-{
- uword *vtep = ip46_address_is_ip4 (ip) ?
- hash_get (vxlan_gbp_main.vtep4, ip->ip4.as_u32) :
- hash_get_mem (vxlan_gbp_main.vtep6, &ip->ip6);
- if (vtep)
- return ++(*vtep);
- ip46_address_is_ip4 (ip) ?
- hash_set (vxlan_gbp_main.vtep4, ip->ip4.as_u32, 1) :
- hash_set_mem_alloc (&vxlan_gbp_main.vtep6, &ip->ip6, 1);
- return 1;
-}
-
-static uword
-vtep_addr_unref (ip46_address_t * ip)
-{
- uword *vtep = ip46_address_is_ip4 (ip) ?
- hash_get (vxlan_gbp_main.vtep4, ip->ip4.as_u32) :
- hash_get_mem (vxlan_gbp_main.vtep6, &ip->ip6);
- ALWAYS_ASSERT (vtep);
- if (--(*vtep) != 0)
- return *vtep;
- ip46_address_is_ip4 (ip) ?
- hash_unset (vxlan_gbp_main.vtep4, ip->ip4.as_u32) :
- hash_unset_mem_free (&vxlan_gbp_main.vtep6, &ip->ip6);
- return 0;
-}
-
-/* *INDENT-OFF* */
-typedef CLIB_PACKED(union
-{
- struct
- {
- fib_node_index_t mfib_entry_index;
- adj_index_t mcast_adj_index;
- };
- u64 as_u64;
-}) mcast_shared_t;
-/* *INDENT-ON* */
-
-static inline mcast_shared_t
-mcast_shared_get (ip46_address_t * ip)
-{
- ASSERT (ip46_address_is_multicast (ip));
- uword *p = hash_get_mem (vxlan_gbp_main.mcast_shared, ip);
- ALWAYS_ASSERT (p);
- mcast_shared_t ret = {.as_u64 = *p };
- return ret;
-}
-
-static inline void
-mcast_shared_add (ip46_address_t * dst, fib_node_index_t mfei, adj_index_t ai)
-{
- mcast_shared_t new_ep = {
- .mcast_adj_index = ai,
- .mfib_entry_index = mfei,
- };
-
- hash_set_mem_alloc (&vxlan_gbp_main.mcast_shared, dst, new_ep.as_u64);
-}
-
-static inline void
-mcast_shared_remove (ip46_address_t * dst)
-{
- mcast_shared_t ep = mcast_shared_get (dst);
-
- adj_unlock (ep.mcast_adj_index);
- mfib_table_entry_delete_index (ep.mfib_entry_index, MFIB_SOURCE_VXLAN_GBP);
-
- hash_unset_mem_free (&vxlan_gbp_main.mcast_shared, dst);
-}
-
-inline void
-vxlan_gbp_register_udp_ports (void)
-{
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
-
- if (vxm->udp_ports_registered == 0)
- {
- udp_register_dst_port (vxm->vlib_main, UDP_DST_PORT_vxlan_gbp,
- vxlan4_gbp_input_node.index, /* is_ip4 */ 1);
- udp_register_dst_port (vxm->vlib_main, UDP_DST_PORT_vxlan6_gbp,
- vxlan6_gbp_input_node.index, /* is_ip4 */ 0);
- }
- /*
- * Counts the number of vxlan_gbp tunnels
- */
- vxm->udp_ports_registered += 1;
-}
-
-inline void
-vxlan_gbp_unregister_udp_ports (void)
-{
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
-
- ASSERT (vxm->udp_ports_registered != 0);
-
- if (vxm->udp_ports_registered == 1)
- {
- udp_unregister_dst_port (vxm->vlib_main, UDP_DST_PORT_vxlan_gbp,
- /* is_ip4 */ 1);
- udp_unregister_dst_port (vxm->vlib_main, UDP_DST_PORT_vxlan6_gbp,
- /* is_ip4 */ 0);
- }
-
- vxm->udp_ports_registered -= 1;
-}
-
-int vnet_vxlan_gbp_tunnel_add_del
- (vnet_vxlan_gbp_tunnel_add_del_args_t * a, u32 * sw_if_indexp)
-{
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
- vxlan_gbp_tunnel_t *t = 0;
- vnet_main_t *vnm = vxm->vnet_main;
- u64 *p;
- u32 sw_if_index = ~0;
- vxlan4_gbp_tunnel_key_t key4;
- vxlan6_gbp_tunnel_key_t key6;
- u32 is_ip6 = a->is_ip6;
-
- int not_found;
- if (!is_ip6)
- {
- key4.key[0] = ip46_address_is_multicast (&a->dst) ?
- a->dst.ip4.as_u32 :
- a->dst.ip4.as_u32 | (((u64) a->src.ip4.as_u32) << 32);
- key4.key[1] = (((u64) a->encap_fib_index) << 32)
- | clib_host_to_net_u32 (a->vni << 8);
- not_found =
- clib_bihash_search_inline_16_8 (&vxm->vxlan4_gbp_tunnel_by_key,
- &key4);
- p = &key4.value;
- }
- else
- {
- key6.key[0] = a->dst.ip6.as_u64[0];
- key6.key[1] = a->dst.ip6.as_u64[1];
- key6.key[2] = (((u64) a->encap_fib_index) << 32)
- | clib_host_to_net_u32 (a->vni << 8);
- not_found =
- clib_bihash_search_inline_24_8 (&vxm->vxlan6_gbp_tunnel_by_key,
- &key6);
- p = &key6.value;
- }
-
- if (not_found)
- p = 0;
-
- if (a->is_add)
- {
- l2input_main_t *l2im = &l2input_main;
- u32 dev_instance; /* real dev instance tunnel index */
- u32 user_instance; /* request and actual instance number */
-
- /* adding a tunnel: tunnel must not already exist */
- if (p)
- {
- t = pool_elt_at_index (vxm->tunnels, *p);
- *sw_if_indexp = t->sw_if_index;
- return VNET_API_ERROR_TUNNEL_EXIST;
- }
- pool_get_aligned (vxm->tunnels, t, CLIB_CACHE_LINE_BYTES);
- clib_memset (t, 0, sizeof (*t));
- dev_instance = t - vxm->tunnels;
-
- /* copy from arg structure */
-#define _(x) t->x = a->x;
- foreach_copy_field;
-#undef _
-
- vxlan_gbp_rewrite (t, is_ip6);
- /*
- * Reconcile the real dev_instance and a possible requested instance.
- */
- user_instance = a->instance;
- if (user_instance == ~0)
- user_instance = dev_instance;
- if (hash_get (vxm->instance_used, user_instance))
- {
- pool_put (vxm->tunnels, t);
- return VNET_API_ERROR_INSTANCE_IN_USE;
- }
- hash_set (vxm->instance_used, user_instance, 1);
-
- t->dev_instance = dev_instance; /* actual */
- t->user_instance = user_instance; /* name */
-
- /* copy the key */
- int add_failed;
- if (is_ip6)
- {
- key6.value = (u64) dev_instance;
- add_failed =
- clib_bihash_add_del_24_8 (&vxm->vxlan6_gbp_tunnel_by_key, &key6,
- 1 /*add */ );
- }
- else
- {
- key4.value = (u64) dev_instance;
- add_failed =
- clib_bihash_add_del_16_8 (&vxm->vxlan4_gbp_tunnel_by_key, &key4,
- 1 /*add */ );
- }
-
- if (add_failed)
- {
- pool_put (vxm->tunnels, t);
- return VNET_API_ERROR_INVALID_REGISTRATION;
- }
-
- vxlan_gbp_register_udp_ports ();
-
- t->hw_if_index = vnet_register_interface
- (vnm, vxlan_gbp_device_class.index, dev_instance,
- vxlan_gbp_hw_class.index, dev_instance);
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, t->hw_if_index);
-
- /* Set vxlan_gbp tunnel output node */
- u32 encap_index = !is_ip6 ?
- vxlan4_gbp_encap_node.index : vxlan6_gbp_encap_node.index;
- vnet_set_interface_output_node (vnm, t->hw_if_index, encap_index);
-
- t->sw_if_index = sw_if_index = hi->sw_if_index;
-
- if (VXLAN_GBP_TUNNEL_MODE_L3 == t->mode)
- {
- ip4_sw_interface_enable_disable (t->sw_if_index, 1);
- ip6_sw_interface_enable_disable (t->sw_if_index, 1);
- }
-
- vec_validate_init_empty (vxm->tunnel_index_by_sw_if_index, sw_if_index,
- ~0);
- vxm->tunnel_index_by_sw_if_index[sw_if_index] = dev_instance;
-
- /* setup l2 input config with l2 feature and bd 0 to drop packet */
- vec_validate (l2im->configs, sw_if_index);
- l2im->configs[sw_if_index].feature_bitmap = L2INPUT_FEAT_DROP;
- l2im->configs[sw_if_index].bd_index = 0;
-
- vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
- si->flags &= ~VNET_SW_INTERFACE_FLAG_HIDDEN;
- vnet_sw_interface_set_flags (vnm, sw_if_index,
- VNET_SW_INTERFACE_FLAG_ADMIN_UP);
-
- fib_node_init (&t->node, FIB_NODE_TYPE_VXLAN_GBP_TUNNEL);
- fib_prefix_t tun_dst_pfx;
- vnet_flood_class_t flood_class = VNET_FLOOD_CLASS_TUNNEL_NORMAL;
-
- fib_prefix_from_ip46_addr (&t->dst, &tun_dst_pfx);
- if (!ip46_address_is_multicast (&t->dst))
- {
- /* Unicast tunnel -
- * source the FIB entry for the tunnel's destination
- * and become a child thereof. The tunnel will then get poked
- * when the forwarding for the entry updates, and the tunnel can
- * re-stack accordingly
- */
- vtep_addr_ref (&t->src);
- t->fib_entry_index = fib_entry_track (t->encap_fib_index,
- &tun_dst_pfx,
- FIB_NODE_TYPE_VXLAN_GBP_TUNNEL,
- dev_instance,
- &t->sibling_index);
- vxlan_gbp_tunnel_restack_dpo (t);
- }
- else
- {
- /* Multicast tunnel -
- * as the same mcast group can be used for multiple mcast tunnels
- * with different VNIs, create the output fib adjacency only if
- * it does not already exist
- */
- fib_protocol_t fp = fib_ip_proto (is_ip6);
-
- if (vtep_addr_ref (&t->dst) == 1)
- {
- fib_node_index_t mfei;
- adj_index_t ai;
- fib_route_path_t path = {
- .frp_proto = fib_proto_to_dpo (fp),
- .frp_addr = zero_addr,
- .frp_sw_if_index = 0xffffffff,
- .frp_fib_index = ~0,
- .frp_weight = 0,
- .frp_flags = FIB_ROUTE_PATH_LOCAL,
- .frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
- };
- const mfib_prefix_t mpfx = {
- .fp_proto = fp,
- .fp_len = (is_ip6 ? 128 : 32),
- .fp_grp_addr = tun_dst_pfx.fp_addr,
- };
-
- /*
- * Setup the (*,G) to receive traffic on the mcast group
- * - the forwarding interface is for-us
- * - the accepting interface is that from the API
- */
- mfib_table_entry_path_update (t->encap_fib_index, &mpfx,
- MFIB_SOURCE_VXLAN_GBP,
- MFIB_ENTRY_FLAG_NONE, &path);
-
- path.frp_sw_if_index = a->mcast_sw_if_index;
- path.frp_flags = FIB_ROUTE_PATH_FLAG_NONE;
- path.frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT;
- mfei = mfib_table_entry_path_update (
- t->encap_fib_index, &mpfx, MFIB_SOURCE_VXLAN_GBP,
- MFIB_ENTRY_FLAG_NONE, &path);
-
- /*
- * Create the mcast adjacency to send traffic to the group
- */
- ai = adj_mcast_add_or_lock (fp,
- fib_proto_to_link (fp),
- a->mcast_sw_if_index);
-
- /*
- * create a new end-point
- */
- mcast_shared_add (&t->dst, mfei, ai);
- }
-
- dpo_id_t dpo = DPO_INVALID;
- mcast_shared_t ep = mcast_shared_get (&t->dst);
-
- /* Stack shared mcast dst mac addr rewrite on encap */
- dpo_set (&dpo, DPO_ADJACENCY_MCAST,
- fib_proto_to_dpo (fp), ep.mcast_adj_index);
-
- dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
- dpo_reset (&dpo);
- flood_class = VNET_FLOOD_CLASS_TUNNEL_MASTER;
- }
-
- vnet_get_sw_interface (vnet_get_main (), sw_if_index)->flood_class =
- flood_class;
- }
- else
- {
- /* deleting a tunnel: tunnel must exist */
- if (!p)
- return VNET_API_ERROR_NO_SUCH_ENTRY;
-
- u32 instance = p[0];
- t = pool_elt_at_index (vxm->tunnels, instance);
-
- sw_if_index = t->sw_if_index;
- vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */ );
-
- if (VXLAN_GBP_TUNNEL_MODE_L3 == t->mode)
- {
- ip4_sw_interface_enable_disable (t->sw_if_index, 0);
- ip6_sw_interface_enable_disable (t->sw_if_index, 0);
- }
-
- vxm->tunnel_index_by_sw_if_index[sw_if_index] = ~0;
-
- if (!is_ip6)
- clib_bihash_add_del_16_8 (&vxm->vxlan4_gbp_tunnel_by_key, &key4,
- 0 /*del */ );
- else
- clib_bihash_add_del_24_8 (&vxm->vxlan6_gbp_tunnel_by_key, &key6,
- 0 /*del */ );
-
- if (!ip46_address_is_multicast (&t->dst))
- {
- vtep_addr_unref (&t->src);
- fib_entry_untrack (t->fib_entry_index, t->sibling_index);
- }
- else if (vtep_addr_unref (&t->dst) == 0)
- {
- mcast_shared_remove (&t->dst);
- }
-
- vxlan_gbp_unregister_udp_ports ();
- vnet_delete_hw_interface (vnm, t->hw_if_index);
- hash_unset (vxm->instance_used, t->user_instance);
-
- fib_node_deinit (&t->node);
- pool_put (vxm->tunnels, t);
- }
-
- if (sw_if_indexp)
- *sw_if_indexp = sw_if_index;
-
- return 0;
-}
-
-int
-vnet_vxlan_gbp_tunnel_del (u32 sw_if_index)
-{
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
- vxlan_gbp_tunnel_t *t = 0;
- u32 ti;
-
- if (sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index))
- return VNET_API_ERROR_NO_SUCH_ENTRY;
-
- ti = vxm->tunnel_index_by_sw_if_index[sw_if_index];
- if (~0 != ti)
- {
- t = pool_elt_at_index (vxm->tunnels, ti);
-
- vnet_vxlan_gbp_tunnel_add_del_args_t args = {
- .is_add = 0,
- .is_ip6 = !ip46_address_is_ip4 (&t->src),
- .vni = t->vni,
- .src = t->src,
- .dst = t->dst,
- .instance = ~0,
- };
-
- return (vnet_vxlan_gbp_tunnel_add_del (&args, NULL));
- }
-
- return VNET_API_ERROR_NO_SUCH_ENTRY;
-}
-
-static uword
-get_decap_next_for_node (u32 node_index, u32 ipv4_set)
-{
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
- vlib_main_t *vm = vxm->vlib_main;
- uword input_node = (ipv4_set) ? vxlan4_gbp_input_node.index :
- vxlan6_gbp_input_node.index;
-
- return vlib_node_add_next (vm, input_node, node_index);
-}
-
-static uword
-unformat_decap_next (unformat_input_t * input, va_list * args)
-{
- u32 *result = va_arg (*args, u32 *);
- u32 ipv4_set = va_arg (*args, int);
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
- vlib_main_t *vm = vxm->vlib_main;
- u32 node_index;
- u32 tmp;
-
- if (unformat (input, "l2"))
- *result = VXLAN_GBP_INPUT_NEXT_L2_INPUT;
- else if (unformat (input, "node %U", unformat_vlib_node, vm, &node_index))
- *result = get_decap_next_for_node (node_index, ipv4_set);
- else if (unformat (input, "%d", &tmp))
- *result = tmp;
- else
- return 0;
- return 1;
-}
-
-static clib_error_t *
-vxlan_gbp_tunnel_add_del_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- ip46_address_t src = ip46_address_initializer, dst =
- ip46_address_initializer;
- vxlan_gbp_tunnel_mode_t mode = VXLAN_GBP_TUNNEL_MODE_L2;
- u8 is_add = 1;
- u8 src_set = 0;
- u8 dst_set = 0;
- u8 grp_set = 0;
- u8 ipv4_set = 0;
- u8 ipv6_set = 0;
- u32 instance = ~0;
- u32 encap_fib_index = 0;
- u32 mcast_sw_if_index = ~0;
- u32 decap_next_index = VXLAN_GBP_INPUT_NEXT_L2_INPUT;
- u32 vni = 0;
- u32 table_id;
- clib_error_t *parse_error = NULL;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "del"))
- {
- is_add = 0;
- }
- else if (unformat (line_input, "instance %d", &instance))
- ;
- else if (unformat (line_input, "src %U",
- unformat_ip46_address, &src, IP46_TYPE_ANY))
- {
- src_set = 1;
- ip46_address_is_ip4 (&src) ? (ipv4_set = 1) : (ipv6_set = 1);
- }
- else if (unformat (line_input, "dst %U",
- unformat_ip46_address, &dst, IP46_TYPE_ANY))
- {
- dst_set = 1;
- ip46_address_is_ip4 (&dst) ? (ipv4_set = 1) : (ipv6_set = 1);
- }
- else if (unformat (line_input, "group %U %U",
- unformat_ip46_address, &dst, IP46_TYPE_ANY,
- unformat_vnet_sw_interface,
- vnet_get_main (), &mcast_sw_if_index))
- {
- grp_set = dst_set = 1;
- ip46_address_is_ip4 (&dst) ? (ipv4_set = 1) : (ipv6_set = 1);
- }
- else if (unformat (line_input, "encap-vrf-id %d", &table_id))
- {
- encap_fib_index =
- fib_table_find (fib_ip_proto (ipv6_set), table_id);
- }
- else if (unformat (line_input, "decap-next %U", unformat_decap_next,
- &decap_next_index, ipv4_set))
- ;
- else if (unformat (line_input, "vni %d", &vni))
- ;
- else
- {
- parse_error = clib_error_return (0, "parse error: '%U'",
- format_unformat_error, line_input);
- break;
- }
- }
-
- unformat_free (line_input);
-
- if (parse_error)
- return parse_error;
-
- if (encap_fib_index == ~0)
- return clib_error_return (0, "nonexistent encap-vrf-id %d", table_id);
-
- if (src_set == 0)
- return clib_error_return (0, "tunnel src address not specified");
-
- if (dst_set == 0)
- return clib_error_return (0, "tunnel dst address not specified");
-
- if (grp_set && !ip46_address_is_multicast (&dst))
- return clib_error_return (0, "tunnel group address not multicast");
-
- if (grp_set == 0 && ip46_address_is_multicast (&dst))
- return clib_error_return (0, "dst address must be unicast");
-
- if (grp_set && mcast_sw_if_index == ~0)
- return clib_error_return (0, "tunnel nonexistent multicast device");
-
- if (ipv4_set && ipv6_set)
- return clib_error_return (0, "both IPv4 and IPv6 addresses specified");
-
- if (ip46_address_cmp (&src, &dst) == 0)
- return clib_error_return (0, "src and dst addresses are identical");
-
- if (decap_next_index == ~0)
- return clib_error_return (0, "next node not found");
-
- if (vni == 0)
- return clib_error_return (0, "vni not specified");
-
- if (vni >> 24)
- return clib_error_return (0, "vni %d out of range", vni);
-
- vnet_vxlan_gbp_tunnel_add_del_args_t a = {
- .is_add = is_add,
- .is_ip6 = ipv6_set,
- .instance = instance,
-#define _(x) .x = x,
- foreach_copy_field
-#undef _
- };
-
- u32 tunnel_sw_if_index;
- int rv = vnet_vxlan_gbp_tunnel_add_del (&a, &tunnel_sw_if_index);
-
- switch (rv)
- {
- case 0:
- if (is_add)
- vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
- vnet_get_main (), tunnel_sw_if_index);
- break;
-
- case VNET_API_ERROR_TUNNEL_EXIST:
- return clib_error_return (0, "tunnel already exists...");
-
- case VNET_API_ERROR_NO_SUCH_ENTRY:
- return clib_error_return (0, "tunnel does not exist...");
-
- case VNET_API_ERROR_INSTANCE_IN_USE:
- return clib_error_return (0, "Instance is in use");
-
- default:
- return clib_error_return
- (0, "vnet_vxlan_gbp_tunnel_add_del returned %d", rv);
- }
-
- return 0;
-}
-
-/*?
- * Add or delete a VXLAN Tunnel.
- *
- * VXLAN provides the features needed to allow L2 bridge domains (BDs)
- * to span multiple servers. This is done by building an L2 overlay on
- * top of an L3 network underlay using VXLAN tunnels.
- *
- * This makes it possible for servers to be co-located in the same data
- * center or be separated geographically as long as they are reachable
- * through the underlay L3 network.
- *
- * You can refer to this kind of L2 overlay bridge domain as a VXLAN
- * (Virtual eXtensible VLAN) segment.
- *
- * @cliexpar
- * Example of how to create a VXLAN Tunnel:
- * @cliexcmd{create vxlan_gbp tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 encap-vrf-id 7}
- * Example of how to create a VXLAN Tunnel with a known name, vxlan_gbp_tunnel42:
- * @cliexcmd{create vxlan_gbp tunnel src 10.0.3.1 dst 10.0.3.3 instance 42}
- * Example of how to create a multicast VXLAN Tunnel with a known name, vxlan_gbp_tunnel23:
- * @cliexcmd{create vxlan_gbp tunnel src 10.0.3.1 group 239.1.1.1 GigabitEthernet0/8/0 instance 23}
- * Example of how to delete a VXLAN Tunnel:
- * @cliexcmd{create vxlan_gbp tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 del}
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (create_vxlan_gbp_tunnel_command, static) = {
- .path = "create vxlan-gbp tunnel",
- .short_help =
- "create vxlan-gbp tunnel src <local-vtep-addr>"
- " {dst <remote-vtep-addr>|group <mcast-vtep-addr> <intf-name>} vni <nn>"
- " [instance <id>]"
- " [encap-vrf-id <nn>] [decap-next [l2|node <name>]] [del]",
- .function = vxlan_gbp_tunnel_add_del_command_fn,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-show_vxlan_gbp_tunnel_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
- vxlan_gbp_tunnel_t *t;
- int raw = 0;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "raw"))
- raw = 1;
- else
- return clib_error_return (0, "parse error: '%U'",
- format_unformat_error, input);
- }
-
- if (pool_elts (vxm->tunnels) == 0)
- vlib_cli_output (vm, "No vxlan-gbp tunnels configured...");
-
-/* *INDENT-OFF* */
- pool_foreach (t, vxm->tunnels)
- {
- vlib_cli_output (vm, "%U", format_vxlan_gbp_tunnel, t);
- }
-/* *INDENT-ON* */
-
- if (raw)
- {
- vlib_cli_output (vm, "Raw IPv4 Hash Table:\n%U\n",
- format_bihash_16_8, &vxm->vxlan4_gbp_tunnel_by_key,
- 1 /* verbose */ );
- vlib_cli_output (vm, "Raw IPv6 Hash Table:\n%U\n",
- format_bihash_24_8, &vxm->vxlan6_gbp_tunnel_by_key,
- 1 /* verbose */ );
- }
-
- return 0;
-}
-
-/*?
- * Display all the VXLAN Tunnel entries.
- *
- * @cliexpar
- * Example of how to display the VXLAN Tunnel entries:
- * @cliexstart{show vxlan_gbp tunnel}
- * [0] src 10.0.3.1 dst 10.0.3.3 vni 13 encap_fib_index 0 sw_if_index 5 decap_next l2
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (show_vxlan_gbp_tunnel_command, static) = {
- .path = "show vxlan-gbp tunnel",
- .short_help = "show vxlan-gbp tunnel [raw]",
- .function = show_vxlan_gbp_tunnel_command_fn,
-};
-/* *INDENT-ON* */
-
-
-void
-vnet_int_vxlan_gbp_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable)
-{
- if (is_ip6)
- vnet_feature_enable_disable ("ip6-unicast", "ip6-vxlan-gbp-bypass",
- sw_if_index, is_enable, 0, 0);
- else
- vnet_feature_enable_disable ("ip4-unicast", "ip4-vxlan-gbp-bypass",
- sw_if_index, is_enable, 0, 0);
-}
-
-
-static clib_error_t *
-set_ip_vxlan_gbp_bypass (u32 is_ip6,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- vnet_main_t *vnm = vnet_get_main ();
- clib_error_t *error = 0;
- u32 sw_if_index, is_enable;
-
- sw_if_index = ~0;
- is_enable = 1;
-
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat_user
- (line_input, unformat_vnet_sw_interface, vnm, &sw_if_index))
- ;
- else if (unformat (line_input, "del"))
- is_enable = 0;
- else
- {
- error = unformat_parse_error (line_input);
- goto done;
- }
- }
-
- if (~0 == sw_if_index)
- {
- error = clib_error_return (0, "unknown interface `%U'",
- format_unformat_error, line_input);
- goto done;
- }
-
- vnet_int_vxlan_gbp_bypass_mode (sw_if_index, is_ip6, is_enable);
-
-done:
- unformat_free (line_input);
-
- return error;
-}
-
-static clib_error_t *
-set_ip4_vxlan_gbp_bypass (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- return set_ip_vxlan_gbp_bypass (0, input, cmd);
-}
-
-/*?
- * This command adds the 'ip4-vxlan-gbp-bypass' graph node for a given
- * interface. By adding the IPv4 vxlan_gbp-bypass graph node to an interface,
- * the node checks for and validate input vxlan_gbp packet and bypass
- * ip4-lookup, ip4-local, ip4-udp-lookup nodes to speedup vxlan_gbp packet
- * forwarding. This node will cause extra overhead to for non-vxlan_gbp packets
- * which is kept at a minimum.
- *
- * @cliexpar
- * @parblock
- * Example of graph node before ip4-vxlan_gbp-bypass is enabled:
- * @cliexstart{show vlib graph ip4-vxlan_gbp-bypass}
- * Name Next Previous
- * ip4-vxlan-gbp-bypass error-drop [0]
- * vxlan4-gbp-input [1]
- * ip4-lookup [2]
- * @cliexend
- *
- * Example of how to enable ip4-vxlan-gbp-bypass on an interface:
- * @cliexcmd{set interface ip vxlan-gbp-bypass GigabitEthernet2/0/0}
- *
- * Example of graph node after ip4-vxlan-gbp-bypass is enabled:
- * @cliexstart{show vlib graph ip4-vxlan-gbp-bypass}
- * Name Next Previous
- * ip4-vxlan-gbp-bypass error-drop [0] ip4-input
- * vxlan4-gbp-input [1] ip4-input-no-checksum
- * ip4-lookup [2]
- * @cliexend
- *
- * Example of how to display the feature enabled on an interface:
- * @cliexstart{show ip interface features GigabitEthernet2/0/0}
- * IP feature paths configured on GigabitEthernet2/0/0...
- * ...
- * ipv4 unicast:
- * ip4-vxlan-gbp-bypass
- * ip4-lookup
- * ...
- * @cliexend
- *
- * Example of how to disable ip4-vxlan-gbp-bypass on an interface:
- * @cliexcmd{set interface ip vxlan-gbp-bypass GigabitEthernet2/0/0 del}
- * @endparblock
-?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (set_interface_ip_vxlan_gbp_bypass_command, static) = {
- .path = "set interface ip vxlan-gbp-bypass",
- .function = set_ip4_vxlan_gbp_bypass,
- .short_help = "set interface ip vxlan-gbp-bypass <interface> [del]",
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-set_ip6_vxlan_gbp_bypass (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- return set_ip_vxlan_gbp_bypass (1, input, cmd);
-}
-
-/*?
- * This command adds the 'ip6-vxlan-gbp-bypass' graph node for a given
- * interface. By adding the IPv6 vxlan-gbp-bypass graph node to an interface,
- * the node checks for and validate input vxlan_gbp packet and bypass
- * ip6-lookup, ip6-local, ip6-udp-lookup nodes to speedup vxlan_gbp packet
- * forwarding. This node will cause extra overhead to for non-vxlan packets
- * which is kept at a minimum.
- *
- * @cliexpar
- * @parblock
- * Example of graph node before ip6-vxlan-gbp-bypass is enabled:
- * @cliexstart{show vlib graph ip6-vxlan-gbp-bypass}
- * Name Next Previous
- * ip6-vxlan-gbp-bypass error-drop [0]
- * vxlan6-gbp-input [1]
- * ip6-lookup [2]
- * @cliexend
- *
- * Example of how to enable ip6-vxlan-gbp-bypass on an interface:
- * @cliexcmd{set interface ip6 vxlan-gbp-bypass GigabitEthernet2/0/0}
- *
- * Example of graph node after ip6-vxlan-gbp-bypass is enabled:
- * @cliexstart{show vlib graph ip6-vxlan-gbp-bypass}
- * Name Next Previous
- * ip6-vxlan-gbp-bypass error-drop [0] ip6-input
- * vxlan6-gbp-input [1] ip4-input-no-checksum
- * ip6-lookup [2]
- * @cliexend
- *
- * Example of how to display the feature enabled on an interface:
- * @cliexstart{show ip interface features GigabitEthernet2/0/0}
- * IP feature paths configured on GigabitEthernet2/0/0...
- * ...
- * ipv6 unicast:
- * ip6-vxlan-gbp-bypass
- * ip6-lookup
- * ...
- * @cliexend
- *
- * Example of how to disable ip6-vxlan-gbp-bypass on an interface:
- * @cliexcmd{set interface ip6 vxlan-gbp-bypass GigabitEthernet2/0/0 del}
- * @endparblock
-?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (set_interface_ip6_vxlan_gbp_bypass_command, static) = {
- .path = "set interface ip6 vxlan-gbp-bypass",
- .function = set_ip6_vxlan_gbp_bypass,
- .short_help = "set interface ip6 vxlan-gbp-bypass <interface> [del]",
-};
-/* *INDENT-ON* */
-
-#define VXLAN_GBP_HASH_NUM_BUCKETS (2 * 1024)
-#define VXLAN_GBP_HASH_MEMORY_SIZE (1 << 20)
-
-clib_error_t *
-vxlan_gbp_init (vlib_main_t * vm)
-{
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
-
- vxm->vnet_main = vnet_get_main ();
- vxm->vlib_main = vm;
-
- /* initialize the ip6 hash */
- clib_bihash_init_16_8 (&vxm->vxlan4_gbp_tunnel_by_key, "vxlan4-gbp",
- VXLAN_GBP_HASH_NUM_BUCKETS,
- VXLAN_GBP_HASH_MEMORY_SIZE);
- clib_bihash_init_24_8 (&vxm->vxlan6_gbp_tunnel_by_key, "vxlan6-gbp",
- VXLAN_GBP_HASH_NUM_BUCKETS,
- VXLAN_GBP_HASH_MEMORY_SIZE);
- vxm->vtep6 = hash_create_mem (0, sizeof (ip6_address_t), sizeof (uword));
- vxm->mcast_shared = hash_create_mem (0,
- sizeof (ip46_address_t),
- sizeof (mcast_shared_t));
-
- fib_node_register_type (FIB_NODE_TYPE_VXLAN_GBP_TUNNEL, &vxlan_gbp_vft);
-
- punt_hdl = vlib_punt_client_register ("vxlan-gbp");
-
- vlib_punt_reason_alloc (punt_hdl, "VXLAN-GBP-no-such-v4-tunnel", NULL, NULL,
- &vxm->punt_no_such_tunnel[FIB_PROTOCOL_IP4],
- VNET_PUNT_REASON_F_IP4_PACKET,
- format_vnet_punt_reason_flags);
- vlib_punt_reason_alloc (punt_hdl, "VXLAN-GBP-no-such-v6-tunnel", NULL, NULL,
- &vxm->punt_no_such_tunnel[FIB_PROTOCOL_IP6],
- VNET_PUNT_REASON_F_IP6_PACKET,
- format_vnet_punt_reason_flags);
-
- return (0);
-}
-
-/* *INDENT-OFF* */
-VLIB_INIT_FUNCTION (vxlan_gbp_init) =
-{
- .runs_after = VLIB_INITS("punt_init"),
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan-gbp/vxlan_gbp.h b/src/vnet/vxlan-gbp/vxlan_gbp.h
deleted file mode 100644
index fe93587cb00..00000000000
--- a/src/vnet/vxlan-gbp/vxlan_gbp.h
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef included_vnet_vxlan_gbp_h
-#define included_vnet_vxlan_gbp_h
-
-#include <vppinfra/error.h>
-#include <vppinfra/hash.h>
-#include <vppinfra/bihash_16_8.h>
-#include <vppinfra/bihash_24_8.h>
-#include <vnet/vnet.h>
-#include <vnet/ip/ip.h>
-#include <vnet/l2/l2_input.h>
-#include <vnet/l2/l2_output.h>
-#include <vnet/l2/l2_bd.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/vxlan-gbp/vxlan_gbp_packet.h>
-#include <vnet/ip/ip4_packet.h>
-#include <vnet/ip/ip6_packet.h>
-#include <vnet/udp/udp_local.h>
-#include <vnet/udp/udp_packet.h>
-#include <vnet/dpo/dpo.h>
-#include <vnet/adj/adj_types.h>
-
-/* *INDENT-OFF* */
-typedef CLIB_PACKED (struct {
- ip4_header_t ip4; /* 20 bytes */
- udp_header_t udp; /* 8 bytes */
- vxlan_gbp_header_t vxlan_gbp; /* 8 bytes */
-}) ip4_vxlan_gbp_header_t;
-
-typedef CLIB_PACKED (struct {
- ip6_header_t ip6; /* 40 bytes */
- udp_header_t udp; /* 8 bytes */
- vxlan_gbp_header_t vxlan_gbp; /* 8 bytes */
-}) ip6_vxlan_gbp_header_t;
-/* *INDENT-ON* */
-
-/*
-* Key fields: remote ip, vni on incoming VXLAN packet
-* all fields in NET byte order
-*/
-typedef clib_bihash_kv_16_8_t vxlan4_gbp_tunnel_key_t;
-
-/*
-* Key fields: remote ip, vni and fib index on incoming VXLAN packet
-* ip, vni fields in NET byte order
-* fib index field in host byte order
-*/
-typedef clib_bihash_kv_24_8_t vxlan6_gbp_tunnel_key_t;
-
-typedef enum vxlan_gbp_tunnel_mode_t_
-{
- VXLAN_GBP_TUNNEL_MODE_L2,
- VXLAN_GBP_TUNNEL_MODE_L3,
-} vxlan_gbp_tunnel_mode_t;
-
-extern u8 *format_vxlan_gbp_tunnel_mode (u8 * s, va_list * args);
-
-typedef struct
-{
- /* Required for pool_get_aligned */
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
-
- /* FIB DPO for IP forwarding of VXLAN encap packet */
- dpo_id_t next_dpo;
-
- /* flags */
- u16 flags;
-
- /* vxlan VNI in HOST byte order */
- u32 vni;
-
- /* tunnel src and dst addresses */
- ip46_address_t src;
- ip46_address_t dst;
-
- /* mcast packet output intfc index (used only if dst is mcast) */
- u32 mcast_sw_if_index;
-
- /* The FIB index for src/dst addresses */
- u32 encap_fib_index;
-
- /* vnet intfc index */
- u32 sw_if_index;
- u32 hw_if_index;
-
- /** Next node after VxLAN-GBP encap */
- uword encap_next_node;
-
- /**
- * Tunnel mode.
- * L2 tunnels decap to L2 path, L3 tunnels to the L3 path
- */
- vxlan_gbp_tunnel_mode_t mode;
-
- /**
- * Linkage into the FIB object graph
- */
- fib_node_t node;
-
- /*
- * The FIB entry for (depending on VXLAN-GBP tunnel is unicast or mcast)
- * sending unicast VXLAN-GBP encap packets or receiving mcast VXLAN-GBP packets
- */
- fib_node_index_t fib_entry_index;
- adj_index_t mcast_adj_index;
-
- /**
- * The tunnel is a child of the FIB entry for its destination. This is
- * so it receives updates when the forwarding information for that entry
- * changes.
- * The tunnels sibling index on the FIB entry's dependency list.
- */
- u32 sibling_index;
-
- u32 dev_instance; /* Real device instance in tunnel vector */
- u32 user_instance; /* Instance name being shown to user */
-
-
- VNET_DECLARE_REWRITE;
-} vxlan_gbp_tunnel_t;
-
-#define foreach_vxlan_gbp_input_next \
- _(DROP, "error-drop") \
- _(PUNT, "punt-dispatch") \
- _(L2_INPUT, "l2-input") \
- _(IP4_INPUT, "ip4-input") \
- _(IP6_INPUT, "ip6-input")
-
-typedef enum
-{
-#define _(s,n) VXLAN_GBP_INPUT_NEXT_##s,
- foreach_vxlan_gbp_input_next
-#undef _
- VXLAN_GBP_INPUT_N_NEXT,
-} vxlan_gbp_input_next_t;
-
-typedef enum
-{
-#define vxlan_gbp_error(n,s) VXLAN_GBP_ERROR_##n,
-#include <vnet/vxlan-gbp/vxlan_gbp_error.def>
-#undef vxlan_gbp_error
- VXLAN_GBP_N_ERROR,
-} vxlan_gbp_input_error_t;
-
-/**
- * Call back function packets that do not match a configured tunnel
- */
-typedef vxlan_gbp_input_next_t (*vxlan_bgp_no_tunnel_t) (vlib_buffer_t * b,
- u32 thread_index,
- u8 is_ip6);
-
-typedef struct
-{
- /* vector of encap tunnel instances */
- vxlan_gbp_tunnel_t *tunnels;
-
- /* lookup tunnel by key */
- clib_bihash_16_8_t vxlan4_gbp_tunnel_by_key; /* keyed on ipv4.dst + fib + vni */
- clib_bihash_24_8_t vxlan6_gbp_tunnel_by_key; /* keyed on ipv6.dst + fib + vni */
-
- /* local VTEP IPs ref count used by vxlan-bypass node to check if
- received VXLAN packet DIP matches any local VTEP address */
- uword *vtep4; /* local ip4 VTEPs keyed on their ip4 addr */
- uword *vtep6; /* local ip6 VTEPs keyed on their ip6 addr */
-
- /* mcast shared info */
- uword *mcast_shared; /* keyed on mcast ip46 addr */
-
- /* Mapping from sw_if_index to tunnel index */
- u32 *tunnel_index_by_sw_if_index;
-
- /* On demand udp port registration */
- u32 udp_ports_registered;
-
- /* convenience */
- vlib_main_t *vlib_main;
- vnet_main_t *vnet_main;
-
- /* Record used instances */
- uword *instance_used;
-
- /**
- * Punt reasons for no such tunnel
- */
- vlib_punt_reason_t punt_no_such_tunnel[FIB_PROTOCOL_IP_MAX];
-} vxlan_gbp_main_t;
-
-extern vxlan_gbp_main_t vxlan_gbp_main;
-
-extern vlib_node_registration_t vxlan4_gbp_input_node;
-extern vlib_node_registration_t vxlan6_gbp_input_node;
-extern vlib_node_registration_t vxlan4_gbp_encap_node;
-extern vlib_node_registration_t vxlan6_gbp_encap_node;
-extern void vxlan_gbp_register_udp_ports (void);
-extern void vxlan_gbp_unregister_udp_ports (void);
-
-u8 *format_vxlan_gbp_encap_trace (u8 * s, va_list * args);
-
-typedef struct
-{
- u8 is_add;
- u8 is_ip6;
- u32 instance;
- vxlan_gbp_tunnel_mode_t mode;
- ip46_address_t src, dst;
- u32 mcast_sw_if_index;
- u32 encap_fib_index;
- u32 vni;
-} vnet_vxlan_gbp_tunnel_add_del_args_t;
-
-int vnet_vxlan_gbp_tunnel_add_del
- (vnet_vxlan_gbp_tunnel_add_del_args_t * a, u32 * sw_if_indexp);
-int vnet_vxlan_gbp_tunnel_del (u32 sw_if_indexp);
-
-void vnet_int_vxlan_gbp_bypass_mode (u32 sw_if_index, u8 is_ip6,
- u8 is_enable);
-
-always_inline u32
-vxlan_gbp_tunnel_by_sw_if_index (u32 sw_if_index)
-{
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
-
- if (sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index))
- return ~0;
-
- return (vxm->tunnel_index_by_sw_if_index[sw_if_index]);
-}
-
-#endif /* included_vnet_vxlan_gbp_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan-gbp/vxlan_gbp_api.c b/src/vnet/vxlan-gbp/vxlan_gbp_api.c
deleted file mode 100644
index a3f2246f463..00000000000
--- a/src/vnet/vxlan-gbp/vxlan_gbp_api.c
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- *------------------------------------------------------------------
- * vxlan_gbp_api.c - vxlan gbp api
- *
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <vnet/vnet.h>
-#include <vlibmemory/api.h>
-
-#include <vnet/interface.h>
-#include <vnet/api_errno.h>
-#include <vnet/feature/feature.h>
-#include <vnet/vxlan-gbp/vxlan_gbp.h>
-#include <vnet/fib/fib_table.h>
-#include <vnet/ip/ip_types_api.h>
-#include <vnet/format_fns.h>
-
-#include <vxlan-gbp/vxlan_gbp.api_enum.h>
-#include <vxlan-gbp/vxlan_gbp.api_types.h>
-
-#define REPLY_MSG_ID_BASE msg_id_base
-#include <vlibapi/api_helper_macros.h>
-
-static u16 msg_id_base;
-
-static void
- vl_api_sw_interface_set_vxlan_gbp_bypass_t_handler
- (vl_api_sw_interface_set_vxlan_gbp_bypass_t * mp)
-{
- vl_api_sw_interface_set_vxlan_gbp_bypass_reply_t *rmp;
- int rv = 0;
- u32 sw_if_index = ntohl (mp->sw_if_index);
-
- VALIDATE_SW_IF_INDEX (mp);
-
- vnet_int_vxlan_gbp_bypass_mode (sw_if_index, mp->is_ipv6, mp->enable);
- BAD_SW_IF_INDEX_LABEL;
-
- REPLY_MACRO (VL_API_SW_INTERFACE_SET_VXLAN_GBP_BYPASS_REPLY);
-}
-
-static int
-vxlan_gbp_tunnel_mode_decode (vl_api_vxlan_gbp_api_tunnel_mode_t in,
- vxlan_gbp_tunnel_mode_t * out)
-{
- in = clib_net_to_host_u32 (in);
-
- switch (in)
- {
- case VXLAN_GBP_API_TUNNEL_MODE_L2:
- *out = VXLAN_GBP_TUNNEL_MODE_L2;
- return (0);
- case VXLAN_GBP_API_TUNNEL_MODE_L3:
- *out = VXLAN_GBP_TUNNEL_MODE_L3;
- return (0);
- }
- return (VNET_API_ERROR_INVALID_VALUE);
-}
-
-static void vl_api_vxlan_gbp_tunnel_add_del_t_handler
- (vl_api_vxlan_gbp_tunnel_add_del_t * mp)
-{
- vl_api_vxlan_gbp_tunnel_add_del_reply_t *rmp;
- vxlan_gbp_tunnel_mode_t mode;
- ip46_address_t src, dst;
- ip46_type_t itype;
- int rv = 0;
- u32 sw_if_index = ~0;
- u32 fib_index;
-
- itype = ip_address_decode (&mp->tunnel.src, &src);
- itype = ip_address_decode (&mp->tunnel.dst, &dst);
-
- fib_index = fib_table_find (fib_proto_from_ip46 (itype),
- ntohl (mp->tunnel.encap_table_id));
- if (fib_index == ~0)
- {
- rv = VNET_API_ERROR_NO_SUCH_FIB;
- goto out;
- }
-
- rv = vxlan_gbp_tunnel_mode_decode (mp->tunnel.mode, &mode);
-
- if (rv)
- goto out;
-
- vnet_vxlan_gbp_tunnel_add_del_args_t a = {
- .is_add = mp->is_add,
- .is_ip6 = (itype == IP46_TYPE_IP6),
- .instance = ntohl (mp->tunnel.instance),
- .mcast_sw_if_index = ntohl (mp->tunnel.mcast_sw_if_index),
- .encap_fib_index = fib_index,
- .vni = ntohl (mp->tunnel.vni),
- .dst = dst,
- .src = src,
- .mode = mode,
- };
-
- /* Check src & dst are different */
- if (ip46_address_cmp (&a.dst, &a.src) == 0)
- {
- rv = VNET_API_ERROR_SAME_SRC_DST;
- goto out;
- }
- if (ip46_address_is_multicast (&a.dst) &&
- !vnet_sw_if_index_is_api_valid (a.mcast_sw_if_index))
- {
- rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
- goto out;
- }
-
- rv = vnet_vxlan_gbp_tunnel_add_del (&a, &sw_if_index);
-
-out:
- /* *INDENT-OFF* */
- REPLY_MACRO2(VL_API_VXLAN_GBP_TUNNEL_ADD_DEL_REPLY,
- ({
- rmp->sw_if_index = ntohl (sw_if_index);
- }));
- /* *INDENT-ON* */
-}
-
-static void send_vxlan_gbp_tunnel_details
- (vxlan_gbp_tunnel_t * t, vl_api_registration_t * reg, u32 context)
-{
- vl_api_vxlan_gbp_tunnel_details_t *rmp;
- ip46_type_t itype = (ip46_address_is_ip4 (&t->dst) ?
- IP46_TYPE_IP4 : IP46_TYPE_IP6);
-
- rmp = vl_msg_api_alloc (sizeof (*rmp));
- clib_memset (rmp, 0, sizeof (*rmp));
- rmp->_vl_msg_id =
- ntohs (VL_API_VXLAN_GBP_TUNNEL_DETAILS + REPLY_MSG_ID_BASE);
-
- ip_address_encode (&t->src, itype, &rmp->tunnel.src);
- ip_address_encode (&t->dst, itype, &rmp->tunnel.dst);
- rmp->tunnel.encap_table_id =
- fib_table_get_table_id (t->encap_fib_index, fib_proto_from_ip46 (itype));
-
- rmp->tunnel.instance = htonl (t->user_instance);
- rmp->tunnel.mcast_sw_if_index = htonl (t->mcast_sw_if_index);
- rmp->tunnel.vni = htonl (t->vni);
- rmp->tunnel.sw_if_index = htonl (t->sw_if_index);
- rmp->context = context;
-
- vl_api_send_msg (reg, (u8 *) rmp);
-}
-
-static void vl_api_vxlan_gbp_tunnel_dump_t_handler
- (vl_api_vxlan_gbp_tunnel_dump_t * mp)
-{
- vl_api_registration_t *reg;
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
- vxlan_gbp_tunnel_t *t;
- u32 sw_if_index;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- sw_if_index = ntohl (mp->sw_if_index);
-
- if (~0 == sw_if_index)
- {
- /* *INDENT-OFF* */
- pool_foreach (t, vxm->tunnels)
- {
- send_vxlan_gbp_tunnel_details(t, reg, mp->context);
- }
- /* *INDENT-ON* */
- }
- else
- {
- if ((sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index)) ||
- (~0 == vxm->tunnel_index_by_sw_if_index[sw_if_index]))
- {
- return;
- }
- t = &vxm->tunnels[vxm->tunnel_index_by_sw_if_index[sw_if_index]];
- send_vxlan_gbp_tunnel_details (t, reg, mp->context);
- }
-}
-
-#include <vxlan-gbp/vxlan_gbp.api.c>
-static clib_error_t *
-vxlan_gbp_api_hookup (vlib_main_t * vm)
-{
- /*
- * Set up the (msg_name, crc, message-id) table
- */
- msg_id_base = setup_message_id_table ();
-
- return 0;
-}
-
-VLIB_API_INIT_FUNCTION (vxlan_gbp_api_hookup);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan-gbp/vxlan_gbp_error.def b/src/vnet/vxlan-gbp/vxlan_gbp_error.def
deleted file mode 100644
index 43ad4dac064..00000000000
--- a/src/vnet/vxlan-gbp/vxlan_gbp_error.def
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-vxlan_gbp_error (DECAPSULATED, "good packets decapsulated")
-vxlan_gbp_error (NO_SUCH_TUNNEL, "no such tunnel packets")
-vxlan_gbp_error (BAD_FLAGS, "packets with bad flags field in vxlan gbp header")
diff --git a/src/vnet/vxlan-gbp/vxlan_gbp_packet.c b/src/vnet/vxlan-gbp/vxlan_gbp_packet.c
deleted file mode 100644
index 01c7a19bfb9..00000000000
--- a/src/vnet/vxlan-gbp/vxlan_gbp_packet.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vnet/vxlan-gbp/vxlan_gbp_packet.h>
-
-u8 *
-format_vxlan_gbp_header_flags (u8 * s, va_list * args)
-{
- vxlan_gbp_flags_t flags = va_arg (*args, int);
-
- if (VXLAN_GBP_FLAGS_NONE == flags)
- {
- s = format (s, "None");
- }
-#define _(n,f) { \
- if (VXLAN_GBP_FLAGS_##f & flags) \
- s = format (s, #f); \
- }
- foreach_vxlan_gbp_flags
-#undef _
- return (s);
-}
-
-u8 *
-format_vxlan_gbp_header_gpflags (u8 * s, va_list * args)
-{
- vxlan_gbp_gpflags_t flags = va_arg (*args, int);
-
- if (VXLAN_GBP_GPFLAGS_NONE == flags)
- {
- s = format (s, "None");
- }
-#define _(n,f) { \
- if (VXLAN_GBP_GPFLAGS_##f & flags) \
- s = format (s, #f); \
- }
- foreach_vxlan_gbp_gpflags
-#undef _
- return (s);
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan-gbp/vxlan_gbp_packet.h b/src/vnet/vxlan-gbp/vxlan_gbp_packet.h
deleted file mode 100644
index e655b333b89..00000000000
--- a/src/vnet/vxlan-gbp/vxlan_gbp_packet.h
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __included_vxlan_gbp_packet_h__
-#define __included_vxlan_gbp_packet_h__ 1
-
-#include <vlib/vlib.h>
-
-/*
- * From draft-smith-vxlan-group-policy-04.txt
- *
- * 0 1 2 3
- * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * |G|R|R|R|I|R|R|R|R|D|E|S|A|R|R|R| Group Policy ID |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | VXLAN Network Identifier (VNI) | Reserved |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *
- * G bit: Bit 0 of the initial word is defined as the G (Group Based
- * Policy Extension) bit.
- *
- * I bit: where the I flag MUST be set to 1 for a valid
- * VXLAN Network ID (VNI).
- *
- * D bit: Bit 9 of the initial word is defined as the Don't Learn bit.
- * When set, this bit indicates that the egress VTEP MUST NOT learn the
- * source address of the encapsulated frame.
- *
- * E bit: Bit 10 of the initial word is defined as the bounce packet.
- * When set, this bit indicates that packet is bounced and must be
- * dropped.
- *
- * S bit: Bit 11 of the initial word is defined as the source policy
- * applied bit.
- *
- * A bit: Bit 12 of the initial word is defined as the A (Policy
- * Applied) bit. This bit is only defined as the A bit when the G bit
- * is set to 1.
- *
- * A = 1 indicates that the group policy has already been applied to
- * this packet. Policies MUST NOT be applied by devices when the A
- * bit is set.
- *
- * A = 0 indicates that the group policy has not been applied to this
- * packet. Group policies MUST be applied by devices when the A bit
- * is set to 0 and the destination Group has been determined.
- * Devices that apply the Group policy MUST set the A bit to 1 after
- * the policy has been applied.
- *
- * Group Policy ID: 16 bit identifier that indicates the source TSI
- * Group membership being encapsulated by VXLAN. Its value is source
- * class id.
- *
- * FOR INTERNAL USE ONLY
- * R bit: Bit 12 of the initial word is defined as the reflection bit
- * Set on packet rx checked on tx and dropped if set. this prevents
- * packets recieved on an iVXLAN tunnel being reflected back to
- * another.
- */
-
-typedef struct
-{
- union
- {
- struct
- {
- union
- {
- struct
- {
- u8 flag_g_i;
- u8 gpflags;
- };
- u16 flags;
- };
- u16 sclass;
- };
- u32 flags_sclass_as_u32;
- };
- u32 vni_reserved;
-} vxlan_gbp_header_t;
-
-#define foreach_vxlan_gbp_flags \
- _ (0x80, G) \
- _ (0x08, I)
-
-typedef enum
-{
- VXLAN_GBP_FLAGS_NONE = 0,
-#define _(n,f) VXLAN_GBP_FLAGS_##f = n,
- foreach_vxlan_gbp_flags
-#undef _
-} __attribute__ ((packed)) vxlan_gbp_flags_t;
-
-#define VXLAN_GBP_FLAGS_GI (VXLAN_GBP_FLAGS_G|VXLAN_GBP_FLAGS_I)
-
-#define foreach_vxlan_gbp_gpflags \
-_ (0x40, D) \
-_ (0x20, E) \
-_ (0x10, S) \
-_ (0x08, A) \
-_ (0x04, R)
-
-typedef enum
-{
- VXLAN_GBP_GPFLAGS_NONE = 0,
-#define _(n,f) VXLAN_GBP_GPFLAGS_##f = n,
- foreach_vxlan_gbp_gpflags
-#undef _
-} __attribute__ ((packed)) vxlan_gbp_gpflags_t;
-
-static inline u32
-vxlan_gbp_get_vni (vxlan_gbp_header_t * h)
-{
- u32 vni_reserved_host_byte_order;
-
- vni_reserved_host_byte_order = clib_net_to_host_u32 (h->vni_reserved);
- return vni_reserved_host_byte_order >> 8;
-}
-
-static inline u16
-vxlan_gbp_get_sclass (vxlan_gbp_header_t * h)
-{
- u16 sclass_host_byte_order;
-
- sclass_host_byte_order = clib_net_to_host_u16 (h->sclass);
- return sclass_host_byte_order;
-}
-
-static inline vxlan_gbp_gpflags_t
-vxlan_gbp_get_gpflags (vxlan_gbp_header_t * h)
-{
- return h->gpflags;
-}
-
-static inline vxlan_gbp_flags_t
-vxlan_gbp_get_flags (vxlan_gbp_header_t * h)
-{
- return h->flag_g_i;
-}
-
-static inline void
-vxlan_gbp_set_header (vxlan_gbp_header_t * h, u32 vni)
-{
- h->vni_reserved = clib_host_to_net_u32 (vni << 8);
- h->flags_sclass_as_u32 = 0;
- h->flag_g_i = VXLAN_GBP_FLAGS_I | VXLAN_GBP_FLAGS_G;
-}
-
-extern u8 *format_vxlan_gbp_header_flags (u8 * s, va_list * args);
-extern u8 *format_vxlan_gbp_header_gpflags (u8 * s, va_list * args);
-
-#endif /* __included_vxlan_gbp_packet_h__ */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan-gpe/decap.c b/src/vnet/vxlan-gpe/decap.c
index 62513614389..d4c7424630d 100644
--- a/src/vnet/vxlan-gpe/decap.c
+++ b/src/vnet/vxlan-gpe/decap.c
@@ -622,7 +622,6 @@ static char *vxlan_gpe_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vxlan4_gpe_input_node) = {
.name = "vxlan4-gpe-input",
/* Takes a vector of packets. */
@@ -642,9 +641,7 @@ VLIB_REGISTER_NODE (vxlan4_gpe_input_node) = {
.format_trace = format_vxlan_gpe_rx_trace,
// $$$$ .unformat_buffer = unformat_vxlan_gpe_header,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vxlan6_gpe_input_node) = {
.name = "vxlan6-gpe-input",
/* Takes a vector of packets. */
@@ -664,7 +661,6 @@ VLIB_REGISTER_NODE (vxlan6_gpe_input_node) = {
.format_trace = format_vxlan_gpe_rx_trace,
// $$$$ .unformat_buffer = unformat_vxlan_gpe_header,
};
-/* *INDENT-ON* */
typedef enum
{
@@ -1105,7 +1101,6 @@ VLIB_NODE_FN (ip4_vxlan_gpe_bypass_node) (vlib_main_t * vm,
return ip_vxlan_gpe_bypass_inline (vm, node, frame, /* is_ip4 */ 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_vxlan_gpe_bypass_node) = {
.name = "ip4-vxlan-gpe-bypass",
.vector_size = sizeof (u32),
@@ -1119,7 +1114,6 @@ VLIB_REGISTER_NODE (ip4_vxlan_gpe_bypass_node) = {
.format_buffer = format_ip4_header,
.format_trace = format_ip4_forward_next_trace,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
/* Dummy init function to get us linked in. */
@@ -1139,7 +1133,6 @@ VLIB_NODE_FN (ip6_vxlan_gpe_bypass_node) (vlib_main_t * vm,
return ip_vxlan_gpe_bypass_inline (vm, node, frame, /* is_ip4 */ 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_vxlan_gpe_bypass_node) = {
.name = "ip6-vxlan-gpe-bypass",
.vector_size = sizeof (u32),
@@ -1153,7 +1146,6 @@ VLIB_REGISTER_NODE (ip6_vxlan_gpe_bypass_node) = {
.format_buffer = format_ip6_header,
.format_trace = format_ip6_forward_next_trace,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
/* Dummy init function to get us linked in. */
diff --git a/src/vnet/vxlan-gpe/encap.c b/src/vnet/vxlan-gpe/encap.c
index daa0381c4bb..a769861577d 100644
--- a/src/vnet/vxlan-gpe/encap.c
+++ b/src/vnet/vxlan-gpe/encap.c
@@ -88,13 +88,15 @@ format_vxlan_gpe_encap_trace (u8 * s, va_list * args)
*
*/
always_inline void
-vxlan_gpe_encap_one_inline (vxlan_gpe_main_t * ngm, vlib_buffer_t * b0,
- vxlan_gpe_tunnel_t * t0, u32 * next0, u8 is_v4)
+vxlan_gpe_encap_one_inline (vxlan_gpe_main_t *ngm, vlib_buffer_t *b0,
+ vxlan_gpe_tunnel_t *t0, u32 *next0,
+ ip_address_family_t af)
{
ASSERT (sizeof (ip4_vxlan_gpe_header_t) == 36);
ASSERT (sizeof (ip6_vxlan_gpe_header_t) == 56);
- ip_udp_encap_one (ngm->vlib_main, b0, t0->rewrite, t0->rewrite_size, is_v4);
+ ip_udp_encap_one (ngm->vlib_main, b0, t0->rewrite, t0->rewrite_size, af,
+ N_AF, UDP_ENCAP_FIXUP_NONE);
next0[0] = t0->encap_next_node;
}
@@ -112,16 +114,18 @@ vxlan_gpe_encap_one_inline (vxlan_gpe_main_t * ngm, vlib_buffer_t * b0,
*
*/
always_inline void
-vxlan_gpe_encap_two_inline (vxlan_gpe_main_t * ngm, vlib_buffer_t * b0,
- vlib_buffer_t * b1, vxlan_gpe_tunnel_t * t0,
- vxlan_gpe_tunnel_t * t1, u32 * next0,
- u32 * next1, u8 is_v4)
+vxlan_gpe_encap_two_inline (vxlan_gpe_main_t *ngm, vlib_buffer_t *b0,
+ vlib_buffer_t *b1, vxlan_gpe_tunnel_t *t0,
+ vxlan_gpe_tunnel_t *t1, u32 *next0, u32 *next1,
+ ip_address_family_t af)
{
ASSERT (sizeof (ip4_vxlan_gpe_header_t) == 36);
ASSERT (sizeof (ip6_vxlan_gpe_header_t) == 56);
- ip_udp_encap_one (ngm->vlib_main, b0, t0->rewrite, t0->rewrite_size, is_v4);
- ip_udp_encap_one (ngm->vlib_main, b1, t1->rewrite, t1->rewrite_size, is_v4);
+ ip_udp_encap_one (ngm->vlib_main, b0, t0->rewrite, t0->rewrite_size, af,
+ N_AF, UDP_ENCAP_FIXUP_NONE);
+ ip_udp_encap_one (ngm->vlib_main, b1, t1->rewrite, t1->rewrite_size, af,
+ N_AF, UDP_ENCAP_FIXUP_NONE);
next0[0] = next1[0] = t0->encap_next_node;
}
@@ -170,7 +174,7 @@ vxlan_gpe_encap (vlib_main_t * vm,
u32 sw_if_index0 = ~0, sw_if_index1 = ~0, len0, len1;
vnet_hw_interface_t *hi0, *hi1;
vxlan_gpe_tunnel_t *t0 = NULL, *t1 = NULL;
- u8 is_ip4_0 = 0, is_ip4_1 = 0;
+ ip_address_family_t af_0 = AF_IP4, af_1 = AF_IP4;
vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
@@ -201,7 +205,7 @@ vxlan_gpe_encap (vlib_main_t * vm,
n_left_to_next -= 2;
n_left_from -= 2;
- /* get the flag "is_ip4" */
+ /* get "af_0" */
if (sw_if_index0 != vnet_buffer (b[0])->sw_if_index[VLIB_TX])
{
sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
@@ -210,10 +214,10 @@ vxlan_gpe_encap (vlib_main_t * vm,
vnet_buffer (b[0])->sw_if_index
[VLIB_TX]);
t0 = pool_elt_at_index (ngm->tunnels, hi0->dev_instance);
- is_ip4_0 = (t0->flags & VXLAN_GPE_TUNNEL_IS_IPV4);
+ af_0 = (t0->flags & VXLAN_GPE_TUNNEL_IS_IPV4 ? AF_IP4 : AF_IP6);
}
- /* get the flag "is_ip4" */
+ /* get "af_1" */
if (sw_if_index1 != vnet_buffer (b[1])->sw_if_index[VLIB_TX])
{
if (sw_if_index0 == vnet_buffer (b[1])->sw_if_index[VLIB_TX])
@@ -221,7 +225,7 @@ vxlan_gpe_encap (vlib_main_t * vm,
sw_if_index1 = sw_if_index0;
hi1 = hi0;
t1 = t0;
- is_ip4_1 = is_ip4_0;
+ af_1 = af_0;
}
else
{
@@ -231,19 +235,20 @@ vxlan_gpe_encap (vlib_main_t * vm,
vnet_buffer (b[1])->sw_if_index
[VLIB_TX]);
t1 = pool_elt_at_index (ngm->tunnels, hi1->dev_instance);
- is_ip4_1 = (t1->flags & VXLAN_GPE_TUNNEL_IS_IPV4);
+ af_1 =
+ (t1->flags & VXLAN_GPE_TUNNEL_IS_IPV4 ? AF_IP4 : AF_IP6);
}
}
- if (PREDICT_TRUE (is_ip4_0 == is_ip4_1))
+ if (PREDICT_TRUE (af_0 == af_1))
{
vxlan_gpe_encap_two_inline (ngm, b[0], b[1], t0, t1, &next0,
- &next1, is_ip4_0);
+ &next1, af_0);
}
else
{
- vxlan_gpe_encap_one_inline (ngm, b[0], t0, &next0, is_ip4_0);
- vxlan_gpe_encap_one_inline (ngm, b[1], t1, &next1, is_ip4_1);
+ vxlan_gpe_encap_one_inline (ngm, b[0], t0, &next0, af_0);
+ vxlan_gpe_encap_one_inline (ngm, b[1], t1, &next1, af_1);
}
/* Reset to look up tunnel partner in the configured FIB */
@@ -325,7 +330,7 @@ vxlan_gpe_encap (vlib_main_t * vm,
n_left_from -= 1;
n_left_to_next -= 1;
- /* get the flag "is_ip4" */
+ /* get "af_0" */
if (sw_if_index0 != vnet_buffer (b[0])->sw_if_index[VLIB_TX])
{
sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
@@ -336,10 +341,10 @@ vxlan_gpe_encap (vlib_main_t * vm,
t0 = pool_elt_at_index (ngm->tunnels, hi0->dev_instance);
- is_ip4_0 = (t0->flags & VXLAN_GPE_TUNNEL_IS_IPV4);
+ af_0 = (t0->flags & VXLAN_GPE_TUNNEL_IS_IPV4 ? AF_IP4 : AF_IP6);
}
- vxlan_gpe_encap_one_inline (ngm, b[0], t0, &next0, is_ip4_0);
+ vxlan_gpe_encap_one_inline (ngm, b[0], t0, &next0, af_0);
/* Reset to look up tunnel partner in the configured FIB */
vnet_buffer (b[0])->sw_if_index[VLIB_TX] = t0->encap_fib_index;
@@ -399,7 +404,6 @@ vxlan_gpe_encap (vlib_main_t * vm,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vxlan_gpe_encap_node) = {
.function = vxlan_gpe_encap,
.name = "vxlan-gpe-encap",
@@ -418,7 +422,6 @@ VLIB_REGISTER_NODE (vxlan_gpe_encap_node) = {
[VXLAN_GPE_ENCAP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.c b/src/vnet/vxlan-gpe/vxlan_gpe.c
index 8ef94d44337..5a5262ea9db 100644
--- a/src/vnet/vxlan-gpe/vxlan_gpe.c
+++ b/src/vnet/vxlan-gpe/vxlan_gpe.c
@@ -144,14 +144,12 @@ vxlan_gpe_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
return 0;
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (vxlan_gpe_device_class,static) = {
.name = "VXLAN_GPE",
.format_device_name = format_vxlan_gpe_name,
.format_tx_trace = format_vxlan_gpe_encap_trace,
.admin_up_down_function = vxlan_gpe_interface_admin_up_down,
};
-/* *INDENT-ON* */
/**
@@ -171,13 +169,11 @@ format_vxlan_gpe_header_with_length (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (vxlan_gpe_hw_class) = {
.name = "VXLAN_GPE",
.format_header = format_vxlan_gpe_header_with_length,
.build_rewrite = default_build_rewrite,
};
-/* *INDENT-ON* */
static void
vxlan_gpe_tunnel_restack_dpo (vxlan_gpe_tunnel_t * t)
@@ -388,7 +384,6 @@ vxlan6_gpe_rewrite (vxlan_gpe_tunnel_t * t, u32 extension_size,
return (0);
}
-/* *INDENT-OFF* */
typedef CLIB_PACKED(union {
struct {
fib_node_index_t mfib_entry_index;
@@ -396,7 +391,6 @@ typedef CLIB_PACKED(union {
};
u64 as_u64;
}) mcast_shared_t;
-/* *INDENT-ON* */
static inline mcast_shared_t
mcast_shared_get (ip46_address_t * ip)
@@ -496,7 +490,6 @@ int vnet_vxlan_gpe_add_del_tunnel
clib_memset (t, 0, sizeof (*t));
/* copy from arg structure */
-/* *INDENT-OFF* */
#define _(x) t->x = a->x;
foreach_gpe_copy_field;
if (!a->is_ip6)
@@ -504,7 +497,6 @@ int vnet_vxlan_gpe_add_del_tunnel
else
foreach_copy_ipv6
#undef _
-/* *INDENT-ON* */
if (!a->is_ip6)
t->flags |= VXLAN_GPE_TUNNEL_IS_IPV4;
@@ -544,7 +536,7 @@ int vnet_vxlan_gpe_add_del_tunnel
vnet_interface_main_t *im = &vnm->interface_main;
hw_if_index = ngm->free_vxlan_gpe_tunnel_hw_if_indices
[vec_len (ngm->free_vxlan_gpe_tunnel_hw_if_indices) - 1];
- _vec_len (ngm->free_vxlan_gpe_tunnel_hw_if_indices) -= 1;
+ vec_dec_len (ngm->free_vxlan_gpe_tunnel_hw_if_indices, 1);
hi = vnet_get_hw_interface (vnm, hw_if_index);
hi->dev_instance = t - ngm->tunnels;
@@ -594,7 +586,8 @@ int vnet_vxlan_gpe_add_del_tunnel
fib_prefix_t tun_remote_pfx;
vnet_flood_class_t flood_class = VNET_FLOOD_CLASS_TUNNEL_NORMAL;
- fib_prefix_from_ip46_addr (&t->remote, &tun_remote_pfx);
+ fib_protocol_t fp = fib_ip_proto (is_ip6);
+ fib_prefix_from_ip46_addr (fp, &t->remote, &tun_remote_pfx);
if (!ip46_address_is_multicast (&t->remote))
{
/* Unicast tunnel -
@@ -618,8 +611,6 @@ int vnet_vxlan_gpe_add_del_tunnel
* with different VNIs, create the output fib adjacency only if
* it does not already exist
*/
- fib_protocol_t fp = fib_ip_proto (is_ip6);
-
if (vtep_addr_ref (&ngm->vtep_table,
t->encap_fib_index, &t->remote) == 1)
{
@@ -919,7 +910,6 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm,
a->is_add = is_add;
a->is_ip6 = ipv6_set;
-/* *INDENT-OFF* */
#define _(x) a->x = x;
foreach_gpe_copy_field;
if (ipv4_set)
@@ -927,7 +917,6 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm,
else
foreach_copy_ipv6
#undef _
-/* *INDENT-ON* */
rv = vnet_vxlan_gpe_add_del_tunnel (a, &sw_if_index);
@@ -980,7 +969,6 @@ done:
* Example of how to delete a VXLAN-GPE Tunnel:
* @cliexcmd{create vxlan-gpe tunnel local 10.0.3.1 remote 10.0.3.3 vni 13 del}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_vxlan_gpe_tunnel_command, static) = {
.path = "create vxlan-gpe tunnel",
.short_help =
@@ -990,7 +978,6 @@ VLIB_CLI_COMMAND (create_vxlan_gpe_tunnel_command, static) = {
" [encap-vrf-id <nn>] [decap-vrf-id <nn>] [del]\n",
.function = vxlan_gpe_add_del_tunnel_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief CLI function for showing VXLAN GPE tunnels
@@ -1013,12 +1000,10 @@ show_vxlan_gpe_tunnel_command_fn (vlib_main_t * vm,
if (pool_elts (ngm->tunnels) == 0)
vlib_cli_output (vm, "No vxlan-gpe tunnels configured.");
- /* *INDENT-OFF* */
pool_foreach (t, ngm->tunnels)
{
vlib_cli_output (vm, "%U", format_vxlan_gpe_tunnel, t);
}
- /* *INDENT-ON* */
return 0;
}
@@ -1032,12 +1017,10 @@ show_vxlan_gpe_tunnel_command_fn (vlib_main_t * vm,
* [0] local 10.0.3.1 remote 10.0.3.3 vni 13 encap_fib_index 0 sw_if_index 5 decap_next l2
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_vxlan_gpe_tunnel_command, static) = {
.path = "show vxlan-gpe",
.function = show_vxlan_gpe_tunnel_command_fn,
};
-/* *INDENT-ON* */
void
vnet_int_vxlan_gpe_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable)
@@ -1145,13 +1128,11 @@ set_ip4_vxlan_gpe_bypass (vlib_main_t * vm,
* @cliexcmd{set interface ip vxlan-gpe-bypass GigabitEthernet2/0/0 del}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_vxlan_gpe_bypass_command, static) = {
.path = "set interface ip vxlan-gpe-bypass",
.function = set_ip4_vxlan_gpe_bypass,
.short_help = "set interface ip vxlan-gpe-bypass <interface> [del]",
};
-/* *INDENT-ON* */
static clib_error_t *
set_ip6_vxlan_gpe_bypass (vlib_main_t * vm,
@@ -1203,15 +1184,12 @@ set_ip6_vxlan_gpe_bypass (vlib_main_t * vm,
* @cliexcmd{set interface ip6 vxlan-gpe-bypass GigabitEthernet2/0/0 del}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip6_vxlan_gpe_bypass_command, static) = {
.path = "set interface ip6 vxlan-gpe-bypass",
.function = set_ip6_vxlan_gpe_bypass,
.short_help = "set interface ip6 vxlan-gpe-bypass <interface> [del]",
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip4_vxlan_gpe_bypass, static) =
{
.arc_name = "ip4-unicast",
@@ -1225,7 +1203,6 @@ VNET_FEATURE_INIT (ip6_vxlan_gpe_bypass, static) =
.node_name = "ip6-vxlan-gpe-bypass",
.runs_before = VNET_FEATURES ("ip6-lookup"),
};
-/* *INDENT-ON* */
/**
* @brief Feature init function for VXLAN GPE
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.h b/src/vnet/vxlan-gpe/vxlan_gpe.h
index 5d21ee66d67..aabaafeee6f 100644
--- a/src/vnet/vxlan-gpe/vxlan_gpe.h
+++ b/src/vnet/vxlan-gpe/vxlan_gpe.h
@@ -40,7 +40,6 @@
* @brief VXLAN GPE header struct
*
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
/** 20 bytes */
ip4_header_t ip4;
@@ -49,9 +48,7 @@ typedef CLIB_PACKED (struct {
/** 8 bytes */
vxlan_gpe_header_t vxlan;
}) ip4_vxlan_gpe_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
/** 40 bytes */
ip6_header_t ip6;
@@ -60,7 +57,6 @@ typedef CLIB_PACKED (struct {
/** 8 bytes */
vxlan_gpe_header_t vxlan;
}) ip6_vxlan_gpe_header_t;
-/* *INDENT-ON* */
/**
* @brief Key struct for IPv4 VXLAN GPE tunnel.
@@ -68,7 +64,6 @@ typedef CLIB_PACKED (struct {
* all fields in NET byte order
* VNI shifted 8 bits
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct {
union {
struct {
@@ -81,7 +76,6 @@ typedef CLIB_PACKED(struct {
u64 as_u64[2];
};
}) vxlan4_gpe_tunnel_key_t;
-/* *INDENT-ON* */
/**
* @brief Key struct for IPv6 VXLAN GPE tunnel.
@@ -89,14 +83,12 @@ typedef CLIB_PACKED(struct {
* all fields in NET byte order
* VNI shifted 8 bits
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct {
ip6_address_t local;
ip6_address_t remote;
u32 vni;
u32 port;
}) vxlan6_gpe_tunnel_key_t;
-/* *INDENT-ON* */
typedef union
{
@@ -237,9 +229,7 @@ typedef struct
vnet_main_t *vnet_main;
/* cache for last 8 vxlan_gpe tunnel */
-#ifdef CLIB_HAVE_VEC512
vtep4_cache_t vtep4_u512;
-#endif
/** List of next nodes for the decap indexed on protocol */
uword decap_next_node_list[VXLAN_GPE_PROTOCOL_MAX];
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe_api.c b/src/vnet/vxlan-gpe/vxlan_gpe_api.c
index 9423b2745be..cc74e1f58d4 100644
--- a/src/vnet/vxlan-gpe/vxlan_gpe_api.c
+++ b/src/vnet/vxlan-gpe/vxlan_gpe_api.c
@@ -114,12 +114,10 @@ static void
rv = vnet_vxlan_gpe_add_del_tunnel (a, &sw_if_index);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_VXLAN_GPE_ADD_DEL_TUNNEL_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -242,12 +240,10 @@ static void vl_api_vxlan_gpe_tunnel_dump_t_handler
if (~0 == sw_if_index)
{
- /* *INDENT-OFF* */
pool_foreach (t, vgm->tunnels)
{
send_vxlan_gpe_tunnel_details (t, reg, mp->context);
}
- /* *INDENT-ON* */
}
else
{
@@ -342,8 +338,8 @@ vxlan_gpe_api_hookup (vlib_main_t * vm)
{
api_main_t *am = vlibapi_get_main ();
- am->api_trace_cfg[VL_API_VXLAN_GPE_ADD_DEL_TUNNEL].size +=
- 17 * sizeof (u32);
+ vl_api_increase_msg_trace_size (am, VL_API_VXLAN_GPE_ADD_DEL_TUNNEL,
+ 17 * sizeof (u32));
/*
* Set up the (msg_name, crc, message-id) table
diff --git a/src/vnet/vxlan/FEATURE.yaml b/src/vnet/vxlan/FEATURE.yaml
deleted file mode 100644
index dc7d21b010e..00000000000
--- a/src/vnet/vxlan/FEATURE.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
----
-name: Virtual eXtensible LAN
-maintainer: John Lo <loj@cisco.com>
-features:
- - VXLAN tunnel for support of L2 overlay/virtual networks (RFC-7348)
- - Support either IPv4 or IPv6 underlay network VTEPs
- - Flooding via headend replication if all VXLAN tunnels in BD are unicast ones
- - Multicast VXLAN tunnel can be added to BD to flood via IP multicast
- - VXLAN encap with flow-hashed source port for better underlay IP load balance
- - VXLAN decap optimization via vxlan-bypass IP feature on underlay interfaces
- - VXLAN decap HW offload using flow director with DPDK on Intel Fortville NICs
-description: "Virtual eXtensible LAN (VXLAN) tunnels support L2 overlay networks that span L3 networks"
-state: production
-properties: [API, CLI, MULTITHREAD]
diff --git a/src/vnet/vxlan/decap.c b/src/vnet/vxlan/decap.c
deleted file mode 100644
index 729293fb3e5..00000000000
--- a/src/vnet/vxlan/decap.c
+++ /dev/null
@@ -1,1330 +0,0 @@
-/*
- * decap.c: vxlan tunnel decap packet processing
- *
- * Copyright (c) 2013 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vlib/vlib.h>
-#include <vnet/vxlan/vxlan.h>
-#include <vnet/udp/udp_local.h>
-
-#ifndef CLIB_MARCH_VARIANT
-vlib_node_registration_t vxlan4_input_node;
-vlib_node_registration_t vxlan6_input_node;
-#endif
-
-typedef struct
-{
- u32 next_index;
- u32 tunnel_index;
- u32 error;
- u32 vni;
-} vxlan_rx_trace_t;
-
-static u8 *
-format_vxlan_rx_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- vxlan_rx_trace_t *t = va_arg (*args, vxlan_rx_trace_t *);
-
- if (t->tunnel_index == ~0)
- return format (s, "VXLAN decap error - tunnel for vni %d does not exist",
- t->vni);
- return format (s, "VXLAN decap from vxlan_tunnel%d vni %d next %d error %d",
- t->tunnel_index, t->vni, t->next_index, t->error);
-}
-
-typedef vxlan4_tunnel_key_t last_tunnel_cache4;
-
-static const vxlan_decap_info_t decap_not_found = {
- .sw_if_index = ~0,
- .next_index = VXLAN_INPUT_NEXT_DROP,
- .error = VXLAN_ERROR_NO_SUCH_TUNNEL
-};
-
-static const vxlan_decap_info_t decap_bad_flags = {
- .sw_if_index = ~0,
- .next_index = VXLAN_INPUT_NEXT_DROP,
- .error = VXLAN_ERROR_BAD_FLAGS
-};
-
-always_inline vxlan_decap_info_t
-vxlan4_find_tunnel (vxlan_main_t * vxm, last_tunnel_cache4 * cache,
- u32 fib_index, ip4_header_t * ip4_0,
- vxlan_header_t * vxlan0, u32 * stats_sw_if_index)
-{
- if (PREDICT_FALSE (vxlan0->flags != VXLAN_FLAGS_I))
- return decap_bad_flags;
-
- /* Make sure VXLAN tunnel exist according to packet S/D IP, UDP port, VRF,
- * and VNI */
- u32 dst = ip4_0->dst_address.as_u32;
- u32 src = ip4_0->src_address.as_u32;
- udp_header_t *udp = ip4_next_header (ip4_0);
- vxlan4_tunnel_key_t key4 = {
- .key[0] = ((u64) dst << 32) | src,
- .key[1] = ((u64) udp->dst_port << 48) | ((u64) fib_index << 32) |
- vxlan0->vni_reserved,
- };
-
- if (PREDICT_TRUE
- (key4.key[0] == cache->key[0] && key4.key[1] == cache->key[1]))
- {
- /* cache hit */
- vxlan_decap_info_t di = {.as_u64 = cache->value };
- *stats_sw_if_index = di.sw_if_index;
- return di;
- }
-
- int rv = clib_bihash_search_inline_16_8 (&vxm->vxlan4_tunnel_by_key, &key4);
- if (PREDICT_TRUE (rv == 0))
- {
- *cache = key4;
- vxlan_decap_info_t di = {.as_u64 = key4.value };
- *stats_sw_if_index = di.sw_if_index;
- return di;
- }
-
- /* try multicast */
- if (PREDICT_TRUE (!ip4_address_is_multicast (&ip4_0->dst_address)))
- return decap_not_found;
-
- /* search for mcast decap info by mcast address */
- key4.key[0] = dst;
- rv = clib_bihash_search_inline_16_8 (&vxm->vxlan4_tunnel_by_key, &key4);
- if (rv != 0)
- return decap_not_found;
-
- /* search for unicast tunnel using the mcast tunnel local(src) ip */
- vxlan_decap_info_t mdi = {.as_u64 = key4.value };
- key4.key[0] = ((u64) mdi.local_ip.as_u32 << 32) | src;
- rv = clib_bihash_search_inline_16_8 (&vxm->vxlan4_tunnel_by_key, &key4);
- if (PREDICT_FALSE (rv != 0))
- return decap_not_found;
-
- /* mcast traffic does not update the cache */
- *stats_sw_if_index = mdi.sw_if_index;
- vxlan_decap_info_t di = {.as_u64 = key4.value };
- return di;
-}
-
-typedef vxlan6_tunnel_key_t last_tunnel_cache6;
-
-always_inline vxlan_decap_info_t
-vxlan6_find_tunnel (vxlan_main_t * vxm, last_tunnel_cache6 * cache,
- u32 fib_index, ip6_header_t * ip6_0,
- vxlan_header_t * vxlan0, u32 * stats_sw_if_index)
-{
- if (PREDICT_FALSE (vxlan0->flags != VXLAN_FLAGS_I))
- return decap_bad_flags;
-
- /* Make sure VXLAN tunnel exist according to packet SIP, UDP port, VRF, and
- * VNI */
- udp_header_t *udp = ip6_next_header (ip6_0);
- vxlan6_tunnel_key_t key6 = {
- .key[0] = ip6_0->src_address.as_u64[0],
- .key[1] = ip6_0->src_address.as_u64[1],
- .key[2] = ((u64) udp->dst_port << 48) | ((u64) fib_index << 32) |
- vxlan0->vni_reserved,
- };
-
- if (PREDICT_FALSE
- (clib_bihash_key_compare_24_8 (key6.key, cache->key) == 0))
- {
- int rv =
- clib_bihash_search_inline_24_8 (&vxm->vxlan6_tunnel_by_key, &key6);
- if (PREDICT_FALSE (rv != 0))
- return decap_not_found;
-
- *cache = key6;
- }
- vxlan_tunnel_t *t0 = pool_elt_at_index (vxm->tunnels, cache->value);
-
- /* Validate VXLAN tunnel SIP against packet DIP */
- if (PREDICT_TRUE (ip6_address_is_equal (&ip6_0->dst_address, &t0->src.ip6)))
- *stats_sw_if_index = t0->sw_if_index;
- else
- {
- /* try multicast */
- if (PREDICT_TRUE (!ip6_address_is_multicast (&ip6_0->dst_address)))
- return decap_not_found;
-
- /* Make sure mcast VXLAN tunnel exist by packet DIP and VNI */
- key6.key[0] = ip6_0->dst_address.as_u64[0];
- key6.key[1] = ip6_0->dst_address.as_u64[1];
- int rv =
- clib_bihash_search_inline_24_8 (&vxm->vxlan6_tunnel_by_key, &key6);
- if (PREDICT_FALSE (rv != 0))
- return decap_not_found;
-
- vxlan_tunnel_t *mcast_t0 = pool_elt_at_index (vxm->tunnels, key6.value);
- *stats_sw_if_index = mcast_t0->sw_if_index;
- }
-
- vxlan_decap_info_t di = {
- .sw_if_index = t0->sw_if_index,
- .next_index = t0->decap_next_index,
- };
- return di;
-}
-
-always_inline uword
-vxlan_input (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame, u32 is_ip4)
-{
- vxlan_main_t *vxm = &vxlan_main;
- vnet_main_t *vnm = vxm->vnet_main;
- vnet_interface_main_t *im = &vnm->interface_main;
- vlib_combined_counter_main_t *rx_counter =
- im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX;
- last_tunnel_cache4 last4;
- last_tunnel_cache6 last6;
- u32 pkts_dropped = 0;
- u32 thread_index = vlib_get_thread_index ();
-
- if (is_ip4)
- clib_memset (&last4, 0xff, sizeof last4);
- else
- clib_memset (&last6, 0xff, sizeof last6);
-
- u32 *from = vlib_frame_vector_args (from_frame);
- u32 n_left_from = from_frame->n_vectors;
-
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
- vlib_get_buffers (vm, from, bufs, n_left_from);
-
- u32 stats_if0 = ~0, stats_if1 = ~0;
- u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
- while (n_left_from >= 4)
- {
- /* Prefetch next iteration. */
- vlib_prefetch_buffer_header (b[2], LOAD);
- vlib_prefetch_buffer_header (b[3], LOAD);
-
- /* udp leaves current_data pointing at the vxlan header */
- void *cur0 = vlib_buffer_get_current (b[0]);
- void *cur1 = vlib_buffer_get_current (b[1]);
- vxlan_header_t *vxlan0 = cur0;
- vxlan_header_t *vxlan1 = cur1;
-
-
- ip4_header_t *ip4_0, *ip4_1;
- ip6_header_t *ip6_0, *ip6_1;
- if (is_ip4)
- {
- ip4_0 = cur0 - sizeof (udp_header_t) - sizeof (ip4_header_t);
- ip4_1 = cur1 - sizeof (udp_header_t) - sizeof (ip4_header_t);
- }
- else
- {
- ip6_0 = cur0 - sizeof (udp_header_t) - sizeof (ip6_header_t);
- ip6_1 = cur1 - sizeof (udp_header_t) - sizeof (ip6_header_t);
- }
-
- /* pop vxlan */
- vlib_buffer_advance (b[0], sizeof *vxlan0);
- vlib_buffer_advance (b[1], sizeof *vxlan1);
-
- u32 fi0 = vlib_buffer_get_ip_fib_index (b[0], is_ip4);
- u32 fi1 = vlib_buffer_get_ip_fib_index (b[1], is_ip4);
-
- vxlan_decap_info_t di0 = is_ip4 ?
- vxlan4_find_tunnel (vxm, &last4, fi0, ip4_0, vxlan0, &stats_if0) :
- vxlan6_find_tunnel (vxm, &last6, fi0, ip6_0, vxlan0, &stats_if0);
- vxlan_decap_info_t di1 = is_ip4 ?
- vxlan4_find_tunnel (vxm, &last4, fi1, ip4_1, vxlan1, &stats_if1) :
- vxlan6_find_tunnel (vxm, &last6, fi1, ip6_1, vxlan1, &stats_if1);
-
- /* Prefetch next iteration. */
- clib_prefetch_load (b[2]->data);
- clib_prefetch_load (b[3]->data);
-
- u32 len0 = vlib_buffer_length_in_chain (vm, b[0]);
- u32 len1 = vlib_buffer_length_in_chain (vm, b[1]);
-
- next[0] = di0.next_index;
- next[1] = di1.next_index;
-
- u8 any_error = di0.error | di1.error;
- if (PREDICT_TRUE (any_error == 0))
- {
- /* Required to make the l2 tag push / pop code work on l2 subifs */
- vnet_update_l2_len (b[0]);
- vnet_update_l2_len (b[1]);
- /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
- vnet_buffer (b[0])->sw_if_index[VLIB_RX] = di0.sw_if_index;
- vnet_buffer (b[1])->sw_if_index[VLIB_RX] = di1.sw_if_index;
- vlib_increment_combined_counter (rx_counter, thread_index,
- stats_if0, 1, len0);
- vlib_increment_combined_counter (rx_counter, thread_index,
- stats_if1, 1, len1);
- }
- else
- {
- if (di0.error == 0)
- {
- vnet_update_l2_len (b[0]);
- vnet_buffer (b[0])->sw_if_index[VLIB_RX] = di0.sw_if_index;
- vlib_increment_combined_counter (rx_counter, thread_index,
- stats_if0, 1, len0);
- }
- else
- {
- b[0]->error = node->errors[di0.error];
- pkts_dropped++;
- }
-
- if (di1.error == 0)
- {
- vnet_update_l2_len (b[1]);
- vnet_buffer (b[1])->sw_if_index[VLIB_RX] = di1.sw_if_index;
- vlib_increment_combined_counter (rx_counter, thread_index,
- stats_if1, 1, len1);
- }
- else
- {
- b[1]->error = node->errors[di1.error];
- pkts_dropped++;
- }
- }
-
- if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_rx_trace_t *tr =
- vlib_add_trace (vm, node, b[0], sizeof (*tr));
- tr->next_index = next[0];
- tr->error = di0.error;
- tr->tunnel_index = di0.sw_if_index == ~0 ?
- ~0 : vxm->tunnel_index_by_sw_if_index[di0.sw_if_index];
- tr->vni = vnet_get_vni (vxlan0);
- }
- if (PREDICT_FALSE (b[1]->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_rx_trace_t *tr =
- vlib_add_trace (vm, node, b[1], sizeof (*tr));
- tr->next_index = next[1];
- tr->error = di1.error;
- tr->tunnel_index = di1.sw_if_index == ~0 ?
- ~0 : vxm->tunnel_index_by_sw_if_index[di1.sw_if_index];
- tr->vni = vnet_get_vni (vxlan1);
- }
- b += 2;
- next += 2;
- n_left_from -= 2;
- }
-
- while (n_left_from > 0)
- {
- /* udp leaves current_data pointing at the vxlan header */
- void *cur0 = vlib_buffer_get_current (b[0]);
- vxlan_header_t *vxlan0 = cur0;
- ip4_header_t *ip4_0;
- ip6_header_t *ip6_0;
- if (is_ip4)
- ip4_0 = cur0 - sizeof (udp_header_t) - sizeof (ip4_header_t);
- else
- ip6_0 = cur0 - sizeof (udp_header_t) - sizeof (ip6_header_t);
-
- /* pop (ip, udp, vxlan) */
- vlib_buffer_advance (b[0], sizeof (*vxlan0));
-
- u32 fi0 = vlib_buffer_get_ip_fib_index (b[0], is_ip4);
-
- vxlan_decap_info_t di0 = is_ip4 ?
- vxlan4_find_tunnel (vxm, &last4, fi0, ip4_0, vxlan0, &stats_if0) :
- vxlan6_find_tunnel (vxm, &last6, fi0, ip6_0, vxlan0, &stats_if0);
-
- uword len0 = vlib_buffer_length_in_chain (vm, b[0]);
-
- next[0] = di0.next_index;
-
- /* Validate VXLAN tunnel encap-fib index against packet */
- if (di0.error == 0)
- {
- /* Required to make the l2 tag push / pop code work on l2 subifs */
- vnet_update_l2_len (b[0]);
-
- /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
- vnet_buffer (b[0])->sw_if_index[VLIB_RX] = di0.sw_if_index;
-
- vlib_increment_combined_counter (rx_counter, thread_index,
- stats_if0, 1, len0);
- }
- else
- {
- b[0]->error = node->errors[di0.error];
- pkts_dropped++;
- }
-
- if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_rx_trace_t *tr
- = vlib_add_trace (vm, node, b[0], sizeof (*tr));
- tr->next_index = next[0];
- tr->error = di0.error;
- tr->tunnel_index = di0.sw_if_index == ~0 ?
- ~0 : vxm->tunnel_index_by_sw_if_index[di0.sw_if_index];
- tr->vni = vnet_get_vni (vxlan0);
- }
- b += 1;
- next += 1;
- n_left_from -= 1;
- }
- vlib_buffer_enqueue_to_next (vm, node, from, nexts, from_frame->n_vectors);
- /* Do we still need this now that tunnel tx stats is kept? */
- u32 node_idx = is_ip4 ? vxlan4_input_node.index : vxlan6_input_node.index;
- vlib_node_increment_counter (vm, node_idx, VXLAN_ERROR_DECAPSULATED,
- from_frame->n_vectors - pkts_dropped);
-
- return from_frame->n_vectors;
-}
-
-VLIB_NODE_FN (vxlan4_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return vxlan_input (vm, node, from_frame, /* is_ip4 */ 1);
-}
-
-VLIB_NODE_FN (vxlan6_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return vxlan_input (vm, node, from_frame, /* is_ip4 */ 0);
-}
-
-static char *vxlan_error_strings[] = {
-#define vxlan_error(n,s) s,
-#include <vnet/vxlan/vxlan_error.def>
-#undef vxlan_error
-};
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (vxlan4_input_node) =
-{
- .name = "vxlan4-input",
- .vector_size = sizeof (u32),
- .n_errors = VXLAN_N_ERROR,
- .error_strings = vxlan_error_strings,
- .n_next_nodes = VXLAN_INPUT_N_NEXT,
- .format_trace = format_vxlan_rx_trace,
- .next_nodes = {
-#define _(s,n) [VXLAN_INPUT_NEXT_##s] = n,
- foreach_vxlan_input_next
-#undef _
- },
-};
-
-VLIB_REGISTER_NODE (vxlan6_input_node) =
-{
- .name = "vxlan6-input",
- .vector_size = sizeof (u32),
- .n_errors = VXLAN_N_ERROR,
- .error_strings = vxlan_error_strings,
- .n_next_nodes = VXLAN_INPUT_N_NEXT,
- .next_nodes = {
-#define _(s,n) [VXLAN_INPUT_NEXT_##s] = n,
- foreach_vxlan_input_next
-#undef _
- },
- .format_trace = format_vxlan_rx_trace,
-};
-/* *INDENT-ON* */
-
-typedef enum
-{
- IP_VXLAN_BYPASS_NEXT_DROP,
- IP_VXLAN_BYPASS_NEXT_VXLAN,
- IP_VXLAN_BYPASS_N_NEXT,
-} ip_vxlan_bypass_next_t;
-
-always_inline uword
-ip_vxlan_bypass_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, u32 is_ip4)
-{
- vxlan_main_t *vxm = &vxlan_main;
- u32 *from, *to_next, n_left_from, n_left_to_next, next_index;
- vlib_node_runtime_t *error_node =
- vlib_node_get_runtime (vm, ip4_input_node.index);
- vtep4_key_t last_vtep4; /* last IPv4 address / fib index
- matching a local VTEP address */
- vtep6_key_t last_vtep6; /* last IPv6 address / fib index
- matching a local VTEP address */
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
-
- last_tunnel_cache4 last4;
- last_tunnel_cache6 last6;
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
-
- vlib_get_buffers (vm, from, bufs, n_left_from);
-
- if (node->flags & VLIB_NODE_FLAG_TRACE)
- ip4_forward_next_trace (vm, node, frame, VLIB_TX);
-
- if (is_ip4)
- {
- vtep4_key_init (&last_vtep4);
- clib_memset (&last4, 0xff, sizeof last4);
- }
- else
- {
- vtep6_key_init (&last_vtep6);
- clib_memset (&last6, 0xff, sizeof last6);
- }
-
- while (n_left_from > 0)
- {
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- vlib_buffer_t *b0, *b1;
- ip4_header_t *ip40, *ip41;
- ip6_header_t *ip60, *ip61;
- udp_header_t *udp0, *udp1;
- vxlan_header_t *vxlan0, *vxlan1;
- u32 bi0, ip_len0, udp_len0, flags0, next0;
- u32 bi1, ip_len1, udp_len1, flags1, next1;
- i32 len_diff0, len_diff1;
- u8 error0, good_udp0, proto0;
- u8 error1, good_udp1, proto1;
- u32 stats_if0 = ~0, stats_if1 = ~0;
-
- /* Prefetch next iteration. */
- {
- vlib_prefetch_buffer_header (b[2], LOAD);
- vlib_prefetch_buffer_header (b[3], LOAD);
-
- CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- }
-
- bi0 = to_next[0] = from[0];
- bi1 = to_next[1] = from[1];
- from += 2;
- n_left_from -= 2;
- to_next += 2;
- n_left_to_next -= 2;
-
- b0 = b[0];
- b1 = b[1];
- b += 2;
- if (is_ip4)
- {
- ip40 = vlib_buffer_get_current (b0);
- ip41 = vlib_buffer_get_current (b1);
- }
- else
- {
- ip60 = vlib_buffer_get_current (b0);
- ip61 = vlib_buffer_get_current (b1);
- }
-
- /* Setup packet for next IP feature */
- vnet_feature_next (&next0, b0);
- vnet_feature_next (&next1, b1);
-
- if (is_ip4)
- {
- /* Treat IP frag packets as "experimental" protocol for now
- until support of IP frag reassembly is implemented */
- proto0 = ip4_is_fragment (ip40) ? 0xfe : ip40->protocol;
- proto1 = ip4_is_fragment (ip41) ? 0xfe : ip41->protocol;
- }
- else
- {
- proto0 = ip60->protocol;
- proto1 = ip61->protocol;
- }
-
- /* Process packet 0 */
- if (proto0 != IP_PROTOCOL_UDP)
- goto exit0; /* not UDP packet */
-
- if (is_ip4)
- udp0 = ip4_next_header (ip40);
- else
- udp0 = ip6_next_header (ip60);
-
- u32 fi0 = vlib_buffer_get_ip_fib_index (b0, is_ip4);
- vxlan0 = vlib_buffer_get_current (b0) + sizeof (udp_header_t) +
- sizeof (ip4_header_t);
-
- vxlan_decap_info_t di0 =
- is_ip4 ?
- vxlan4_find_tunnel (vxm, &last4, fi0, ip40, vxlan0, &stats_if0) :
- vxlan6_find_tunnel (vxm, &last6, fi0, ip60, vxlan0, &stats_if0);
-
- if (PREDICT_FALSE (di0.sw_if_index == ~0))
- goto exit0; /* unknown interface */
-
- /* Validate DIP against VTEPs */
- if (is_ip4)
- {
-#ifdef CLIB_HAVE_VEC512
- if (!vtep4_check_vector (&vxm->vtep_table, b0, ip40, &last_vtep4,
- &vxm->vtep4_u512))
-#else
- if (!vtep4_check (&vxm->vtep_table, b0, ip40, &last_vtep4))
-#endif
- goto exit0; /* no local VTEP for VXLAN packet */
- }
- else
- {
- if (!vtep6_check (&vxm->vtep_table, b0, ip60, &last_vtep6))
- goto exit0; /* no local VTEP for VXLAN packet */
- }
-
- flags0 = b0->flags;
- good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
-
- /* Don't verify UDP checksum for packets with explicit zero checksum. */
- good_udp0 |= udp0->checksum == 0;
-
- /* Verify UDP length */
- if (is_ip4)
- ip_len0 = clib_net_to_host_u16 (ip40->length);
- else
- ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
- udp_len0 = clib_net_to_host_u16 (udp0->length);
- len_diff0 = ip_len0 - udp_len0;
-
- /* Verify UDP checksum */
- if (PREDICT_FALSE (!good_udp0))
- {
- if (is_ip4)
- flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
- else
- flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
- good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
- }
-
- if (is_ip4)
- {
- error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
- error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
- }
- else
- {
- error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
- error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
- }
-
- next0 = error0 ?
- IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
- b0->error = error0 ? error_node->errors[error0] : 0;
-
- /* vxlan-input node expect current at VXLAN header */
- if (is_ip4)
- vlib_buffer_advance (b0,
- sizeof (ip4_header_t) +
- sizeof (udp_header_t));
- else
- vlib_buffer_advance (b0,
- sizeof (ip6_header_t) +
- sizeof (udp_header_t));
-
- exit0:
- /* Process packet 1 */
- if (proto1 != IP_PROTOCOL_UDP)
- goto exit1; /* not UDP packet */
-
- if (is_ip4)
- udp1 = ip4_next_header (ip41);
- else
- udp1 = ip6_next_header (ip61);
-
- u32 fi1 = vlib_buffer_get_ip_fib_index (b1, is_ip4);
- vxlan1 = vlib_buffer_get_current (b1) + sizeof (udp_header_t) +
- sizeof (ip4_header_t);
-
- vxlan_decap_info_t di1 =
- is_ip4 ?
- vxlan4_find_tunnel (vxm, &last4, fi1, ip41, vxlan1, &stats_if1) :
- vxlan6_find_tunnel (vxm, &last6, fi1, ip61, vxlan1, &stats_if1);
-
- if (PREDICT_FALSE (di1.sw_if_index == ~0))
- goto exit1; /* unknown interface */
-
- /* Validate DIP against VTEPs */
- if (is_ip4)
- {
-#ifdef CLIB_HAVE_VEC512
- if (!vtep4_check_vector (&vxm->vtep_table, b1, ip41, &last_vtep4,
- &vxm->vtep4_u512))
-#else
- if (!vtep4_check (&vxm->vtep_table, b1, ip41, &last_vtep4))
-#endif
- goto exit1; /* no local VTEP for VXLAN packet */
- }
- else
- {
- if (!vtep6_check (&vxm->vtep_table, b1, ip61, &last_vtep6))
- goto exit1; /* no local VTEP for VXLAN packet */
- }
-
- flags1 = b1->flags;
- good_udp1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
-
- /* Don't verify UDP checksum for packets with explicit zero checksum. */
- good_udp1 |= udp1->checksum == 0;
-
- /* Verify UDP length */
- if (is_ip4)
- ip_len1 = clib_net_to_host_u16 (ip41->length);
- else
- ip_len1 = clib_net_to_host_u16 (ip61->payload_length);
- udp_len1 = clib_net_to_host_u16 (udp1->length);
- len_diff1 = ip_len1 - udp_len1;
-
- /* Verify UDP checksum */
- if (PREDICT_FALSE (!good_udp1))
- {
- if (is_ip4)
- flags1 = ip4_tcp_udp_validate_checksum (vm, b1);
- else
- flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, b1);
- good_udp1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
- }
-
- if (is_ip4)
- {
- error1 = good_udp1 ? 0 : IP4_ERROR_UDP_CHECKSUM;
- error1 = (len_diff1 >= 0) ? error1 : IP4_ERROR_UDP_LENGTH;
- }
- else
- {
- error1 = good_udp1 ? 0 : IP6_ERROR_UDP_CHECKSUM;
- error1 = (len_diff1 >= 0) ? error1 : IP6_ERROR_UDP_LENGTH;
- }
-
- next1 = error1 ?
- IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
- b1->error = error1 ? error_node->errors[error1] : 0;
-
- /* vxlan-input node expect current at VXLAN header */
- if (is_ip4)
- vlib_buffer_advance (b1,
- sizeof (ip4_header_t) +
- sizeof (udp_header_t));
- else
- vlib_buffer_advance (b1,
- sizeof (ip6_header_t) +
- sizeof (udp_header_t));
-
- exit1:
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- vlib_buffer_t *b0;
- ip4_header_t *ip40;
- ip6_header_t *ip60;
- udp_header_t *udp0;
- vxlan_header_t *vxlan0;
- u32 bi0, ip_len0, udp_len0, flags0, next0;
- i32 len_diff0;
- u8 error0, good_udp0, proto0;
- u32 stats_if0 = ~0;
-
- bi0 = to_next[0] = from[0];
- from += 1;
- n_left_from -= 1;
- to_next += 1;
- n_left_to_next -= 1;
-
- b0 = b[0];
- b++;
- if (is_ip4)
- ip40 = vlib_buffer_get_current (b0);
- else
- ip60 = vlib_buffer_get_current (b0);
-
- /* Setup packet for next IP feature */
- vnet_feature_next (&next0, b0);
-
- if (is_ip4)
- /* Treat IP4 frag packets as "experimental" protocol for now
- until support of IP frag reassembly is implemented */
- proto0 = ip4_is_fragment (ip40) ? 0xfe : ip40->protocol;
- else
- proto0 = ip60->protocol;
-
- if (proto0 != IP_PROTOCOL_UDP)
- goto exit; /* not UDP packet */
-
- if (is_ip4)
- udp0 = ip4_next_header (ip40);
- else
- udp0 = ip6_next_header (ip60);
-
- u32 fi0 = vlib_buffer_get_ip_fib_index (b0, is_ip4);
- vxlan0 = vlib_buffer_get_current (b0) + sizeof (udp_header_t) +
- sizeof (ip4_header_t);
-
- vxlan_decap_info_t di0 =
- is_ip4 ?
- vxlan4_find_tunnel (vxm, &last4, fi0, ip40, vxlan0, &stats_if0) :
- vxlan6_find_tunnel (vxm, &last6, fi0, ip60, vxlan0, &stats_if0);
-
- if (PREDICT_FALSE (di0.sw_if_index == ~0))
- goto exit; /* unknown interface */
-
- /* Validate DIP against VTEPs */
- if (is_ip4)
- {
-#ifdef CLIB_HAVE_VEC512
- if (!vtep4_check_vector (&vxm->vtep_table, b0, ip40, &last_vtep4,
- &vxm->vtep4_u512))
-#else
- if (!vtep4_check (&vxm->vtep_table, b0, ip40, &last_vtep4))
-#endif
- goto exit; /* no local VTEP for VXLAN packet */
- }
- else
- {
- if (!vtep6_check (&vxm->vtep_table, b0, ip60, &last_vtep6))
- goto exit; /* no local VTEP for VXLAN packet */
- }
-
- flags0 = b0->flags;
- good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
-
- /* Don't verify UDP checksum for packets with explicit zero checksum. */
- good_udp0 |= udp0->checksum == 0;
-
- /* Verify UDP length */
- if (is_ip4)
- ip_len0 = clib_net_to_host_u16 (ip40->length);
- else
- ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
- udp_len0 = clib_net_to_host_u16 (udp0->length);
- len_diff0 = ip_len0 - udp_len0;
-
- /* Verify UDP checksum */
- if (PREDICT_FALSE (!good_udp0))
- {
- if (is_ip4)
- flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
- else
- flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
- good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
- }
-
- if (is_ip4)
- {
- error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
- error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
- }
- else
- {
- error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
- error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
- }
-
- next0 = error0 ?
- IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
- b0->error = error0 ? error_node->errors[error0] : 0;
-
- /* vxlan-input node expect current at VXLAN header */
- if (is_ip4)
- vlib_buffer_advance (b0,
- sizeof (ip4_header_t) +
- sizeof (udp_header_t));
- else
- vlib_buffer_advance (b0,
- sizeof (ip6_header_t) +
- sizeof (udp_header_t));
-
- exit:
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return frame->n_vectors;
-}
-
-VLIB_NODE_FN (ip4_vxlan_bypass_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return ip_vxlan_bypass_inline (vm, node, frame, /* is_ip4 */ 1);
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip4_vxlan_bypass_node) =
-{
- .name = "ip4-vxlan-bypass",
- .vector_size = sizeof (u32),
- .n_next_nodes = IP_VXLAN_BYPASS_N_NEXT,
- .next_nodes = {
- [IP_VXLAN_BYPASS_NEXT_DROP] = "error-drop",
- [IP_VXLAN_BYPASS_NEXT_VXLAN] = "vxlan4-input",
- },
- .format_buffer = format_ip4_header,
- .format_trace = format_ip4_forward_next_trace,
-};
-
-/* *INDENT-ON* */
-
-/* Dummy init function to get us linked in. */
-static clib_error_t *
-ip4_vxlan_bypass_init (vlib_main_t * vm)
-{
- return 0;
-}
-
-VLIB_INIT_FUNCTION (ip4_vxlan_bypass_init);
-
-VLIB_NODE_FN (ip6_vxlan_bypass_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return ip_vxlan_bypass_inline (vm, node, frame, /* is_ip4 */ 0);
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip6_vxlan_bypass_node) =
-{
- .name = "ip6-vxlan-bypass",
- .vector_size = sizeof (u32),
- .n_next_nodes = IP_VXLAN_BYPASS_N_NEXT,
- .next_nodes = {
- [IP_VXLAN_BYPASS_NEXT_DROP] = "error-drop",
- [IP_VXLAN_BYPASS_NEXT_VXLAN] = "vxlan6-input",
- },
- .format_buffer = format_ip6_header,
- .format_trace = format_ip6_forward_next_trace,
-};
-
-/* *INDENT-ON* */
-
-/* Dummy init function to get us linked in. */
-static clib_error_t *
-ip6_vxlan_bypass_init (vlib_main_t * vm)
-{
- return 0;
-}
-
-VLIB_INIT_FUNCTION (ip6_vxlan_bypass_init);
-
-#define foreach_vxlan_flow_input_next \
-_(DROP, "error-drop") \
-_(L2_INPUT, "l2-input")
-
-typedef enum
-{
-#define _(s,n) VXLAN_FLOW_NEXT_##s,
- foreach_vxlan_flow_input_next
-#undef _
- VXLAN_FLOW_N_NEXT,
-} vxlan_flow_input_next_t;
-
-#define foreach_vxlan_flow_error \
- _(NONE, "no error") \
- _(IP_CHECKSUM_ERROR, "Rx ip checksum errors") \
- _(IP_HEADER_ERROR, "Rx ip header errors") \
- _(UDP_CHECKSUM_ERROR, "Rx udp checksum errors") \
- _(UDP_LENGTH_ERROR, "Rx udp length errors")
-
-typedef enum
-{
-#define _(f,s) VXLAN_FLOW_ERROR_##f,
- foreach_vxlan_flow_error
-#undef _
- VXLAN_FLOW_N_ERROR,
-} vxlan_flow_error_t;
-
-static char *vxlan_flow_error_strings[] = {
-#define _(n,s) s,
- foreach_vxlan_flow_error
-#undef _
-};
-
-
-static_always_inline u8
-vxlan_validate_udp_csum (vlib_main_t * vm, vlib_buffer_t * b)
-{
- u32 flags = b->flags;
- enum
- { offset =
- sizeof (ip4_header_t) + sizeof (udp_header_t) + sizeof (vxlan_header_t),
- };
-
- /* Verify UDP checksum */
- if ((flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
- {
- vlib_buffer_advance (b, -offset);
- flags = ip4_tcp_udp_validate_checksum (vm, b);
- vlib_buffer_advance (b, offset);
- }
-
- return (flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
-}
-
-static_always_inline u8
-vxlan_check_udp_csum (vlib_main_t * vm, vlib_buffer_t * b)
-{
- ip4_vxlan_header_t *hdr = vlib_buffer_get_current (b) - sizeof *hdr;
- udp_header_t *udp = &hdr->udp;
- /* Don't verify UDP checksum for packets with explicit zero checksum. */
- u8 good_csum = (b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0 ||
- udp->checksum == 0;
-
- return !good_csum;
-}
-
-static_always_inline u8
-vxlan_check_ip (vlib_buffer_t * b, u16 payload_len)
-{
- ip4_vxlan_header_t *hdr = vlib_buffer_get_current (b) - sizeof *hdr;
- u16 ip_len = clib_net_to_host_u16 (hdr->ip4.length);
- u16 expected = payload_len + sizeof *hdr;
- return ip_len > expected || hdr->ip4.ttl == 0
- || hdr->ip4.ip_version_and_header_length != 0x45;
-}
-
-static_always_inline u8
-vxlan_check_ip_udp_len (vlib_buffer_t * b)
-{
- ip4_vxlan_header_t *hdr = vlib_buffer_get_current (b) - sizeof *hdr;
- u16 ip_len = clib_net_to_host_u16 (hdr->ip4.length);
- u16 udp_len = clib_net_to_host_u16 (hdr->udp.length);
- return udp_len > ip_len;
-}
-
-static_always_inline u8
-vxlan_err_code (u8 ip_err0, u8 udp_err0, u8 csum_err0)
-{
- u8 error0 = VXLAN_FLOW_ERROR_NONE;
- if (ip_err0)
- error0 = VXLAN_FLOW_ERROR_IP_HEADER_ERROR;
- if (udp_err0)
- error0 = VXLAN_FLOW_ERROR_UDP_LENGTH_ERROR;
- if (csum_err0)
- error0 = VXLAN_FLOW_ERROR_UDP_CHECKSUM_ERROR;
- return error0;
-}
-
-VLIB_NODE_FN (vxlan4_flow_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * f)
-{
- enum
- { payload_offset = sizeof (ip4_vxlan_header_t) };
-
- vxlan_main_t *vxm = &vxlan_main;
- vnet_interface_main_t *im = &vnet_main.interface_main;
- vlib_combined_counter_main_t *rx_counter[VXLAN_FLOW_N_NEXT] = {
- [VXLAN_FLOW_NEXT_DROP] =
- im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_DROP,
- [VXLAN_FLOW_NEXT_L2_INPUT] =
- im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
- };
- u32 thread_index = vlib_get_thread_index ();
-
- u32 *from = vlib_frame_vector_args (f);
- u32 n_left_from = f->n_vectors;
- u32 next_index = VXLAN_FLOW_NEXT_L2_INPUT;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next, *to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 3 && n_left_to_next > 3)
- {
- u32 bi0 = to_next[0] = from[0];
- u32 bi1 = to_next[1] = from[1];
- u32 bi2 = to_next[2] = from[2];
- u32 bi3 = to_next[3] = from[3];
- from += 4;
- n_left_from -= 4;
- to_next += 4;
- n_left_to_next -= 4;
-
- vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
- vlib_buffer_t *b1 = vlib_get_buffer (vm, bi1);
- vlib_buffer_t *b2 = vlib_get_buffer (vm, bi2);
- vlib_buffer_t *b3 = vlib_get_buffer (vm, bi3);
-
- vlib_buffer_advance (b0, payload_offset);
- vlib_buffer_advance (b1, payload_offset);
- vlib_buffer_advance (b2, payload_offset);
- vlib_buffer_advance (b3, payload_offset);
-
- u16 len0 = vlib_buffer_length_in_chain (vm, b0);
- u16 len1 = vlib_buffer_length_in_chain (vm, b1);
- u16 len2 = vlib_buffer_length_in_chain (vm, b2);
- u16 len3 = vlib_buffer_length_in_chain (vm, b3);
-
- u32 next0 = VXLAN_FLOW_NEXT_L2_INPUT, next1 =
- VXLAN_FLOW_NEXT_L2_INPUT, next2 =
- VXLAN_FLOW_NEXT_L2_INPUT, next3 = VXLAN_FLOW_NEXT_L2_INPUT;
-
- u8 ip_err0 = vxlan_check_ip (b0, len0);
- u8 ip_err1 = vxlan_check_ip (b1, len1);
- u8 ip_err2 = vxlan_check_ip (b2, len2);
- u8 ip_err3 = vxlan_check_ip (b3, len3);
- u8 ip_err = ip_err0 | ip_err1 | ip_err2 | ip_err3;
-
- u8 udp_err0 = vxlan_check_ip_udp_len (b0);
- u8 udp_err1 = vxlan_check_ip_udp_len (b1);
- u8 udp_err2 = vxlan_check_ip_udp_len (b2);
- u8 udp_err3 = vxlan_check_ip_udp_len (b3);
- u8 udp_err = udp_err0 | udp_err1 | udp_err2 | udp_err3;
-
- u8 csum_err0 = vxlan_check_udp_csum (vm, b0);
- u8 csum_err1 = vxlan_check_udp_csum (vm, b1);
- u8 csum_err2 = vxlan_check_udp_csum (vm, b2);
- u8 csum_err3 = vxlan_check_udp_csum (vm, b3);
- u8 csum_err = csum_err0 | csum_err1 | csum_err2 | csum_err3;
-
- if (PREDICT_FALSE (csum_err))
- {
- if (csum_err0)
- csum_err0 = !vxlan_validate_udp_csum (vm, b0);
- if (csum_err1)
- csum_err1 = !vxlan_validate_udp_csum (vm, b1);
- if (csum_err2)
- csum_err2 = !vxlan_validate_udp_csum (vm, b2);
- if (csum_err3)
- csum_err3 = !vxlan_validate_udp_csum (vm, b3);
- csum_err = csum_err0 | csum_err1 | csum_err2 | csum_err3;
- }
-
- if (PREDICT_FALSE (ip_err || udp_err || csum_err))
- {
- if (ip_err0 || udp_err0 || csum_err0)
- {
- next0 = VXLAN_FLOW_NEXT_DROP;
- u8 error0 = vxlan_err_code (ip_err0, udp_err0, csum_err0);
- b0->error = node->errors[error0];
- }
- if (ip_err1 || udp_err1 || csum_err1)
- {
- next1 = VXLAN_FLOW_NEXT_DROP;
- u8 error1 = vxlan_err_code (ip_err1, udp_err1, csum_err1);
- b1->error = node->errors[error1];
- }
- if (ip_err2 || udp_err2 || csum_err2)
- {
- next2 = VXLAN_FLOW_NEXT_DROP;
- u8 error2 = vxlan_err_code (ip_err2, udp_err2, csum_err2);
- b2->error = node->errors[error2];
- }
- if (ip_err3 || udp_err3 || csum_err3)
- {
- next3 = VXLAN_FLOW_NEXT_DROP;
- u8 error3 = vxlan_err_code (ip_err3, udp_err3, csum_err3);
- b3->error = node->errors[error3];
- }
- }
-
- vnet_update_l2_len (b0);
- vnet_update_l2_len (b1);
- vnet_update_l2_len (b2);
- vnet_update_l2_len (b3);
-
- ASSERT (b0->flow_id != 0);
- ASSERT (b1->flow_id != 0);
- ASSERT (b2->flow_id != 0);
- ASSERT (b3->flow_id != 0);
-
- u32 t_index0 = b0->flow_id - vxm->flow_id_start;
- u32 t_index1 = b1->flow_id - vxm->flow_id_start;
- u32 t_index2 = b2->flow_id - vxm->flow_id_start;
- u32 t_index3 = b3->flow_id - vxm->flow_id_start;
-
- vxlan_tunnel_t *t0 = &vxm->tunnels[t_index0];
- vxlan_tunnel_t *t1 = &vxm->tunnels[t_index1];
- vxlan_tunnel_t *t2 = &vxm->tunnels[t_index2];
- vxlan_tunnel_t *t3 = &vxm->tunnels[t_index3];
-
- /* flow id consumed */
- b0->flow_id = 0;
- b1->flow_id = 0;
- b2->flow_id = 0;
- b3->flow_id = 0;
-
- u32 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX] =
- t0->sw_if_index;
- u32 sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX] =
- t1->sw_if_index;
- u32 sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_RX] =
- t2->sw_if_index;
- u32 sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_RX] =
- t3->sw_if_index;
-
- vlib_increment_combined_counter (rx_counter[next0], thread_index,
- sw_if_index0, 1, len0);
- vlib_increment_combined_counter (rx_counter[next1], thread_index,
- sw_if_index1, 1, len1);
- vlib_increment_combined_counter (rx_counter[next2], thread_index,
- sw_if_index2, 1, len2);
- vlib_increment_combined_counter (rx_counter[next3], thread_index,
- sw_if_index3, 1, len3);
-
- u32 flags = b0->flags | b1->flags | b2->flags | b3->flags;
-
- if (PREDICT_FALSE (flags & VLIB_BUFFER_IS_TRACED))
- {
- if (b0->flags & VLIB_BUFFER_IS_TRACED)
- {
- vxlan_rx_trace_t *tr =
- vlib_add_trace (vm, node, b0, sizeof *tr);
- u8 error0 = vxlan_err_code (ip_err0, udp_err0, csum_err0);
- tr->next_index = next0;
- tr->error = error0;
- tr->tunnel_index = t_index0;
- tr->vni = t0->vni;
- }
- if (b1->flags & VLIB_BUFFER_IS_TRACED)
- {
- vxlan_rx_trace_t *tr =
- vlib_add_trace (vm, node, b1, sizeof *tr);
- u8 error1 = vxlan_err_code (ip_err1, udp_err1, csum_err1);
- tr->next_index = next1;
- tr->error = error1;
- tr->tunnel_index = t_index1;
- tr->vni = t1->vni;
- }
- if (b2->flags & VLIB_BUFFER_IS_TRACED)
- {
- vxlan_rx_trace_t *tr =
- vlib_add_trace (vm, node, b2, sizeof *tr);
- u8 error2 = vxlan_err_code (ip_err2, udp_err2, csum_err2);
- tr->next_index = next2;
- tr->error = error2;
- tr->tunnel_index = t_index2;
- tr->vni = t2->vni;
- }
- if (b3->flags & VLIB_BUFFER_IS_TRACED)
- {
- vxlan_rx_trace_t *tr =
- vlib_add_trace (vm, node, b3, sizeof *tr);
- u8 error3 = vxlan_err_code (ip_err3, udp_err3, csum_err3);
- tr->next_index = next3;
- tr->error = error3;
- tr->tunnel_index = t_index3;
- tr->vni = t3->vni;
- }
- }
- vlib_validate_buffer_enqueue_x4
- (vm, node, next_index, to_next, n_left_to_next,
- bi0, bi1, bi2, bi3, next0, next1, next2, next3);
- }
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0 = to_next[0] = from[0];
- from++;
- n_left_from--;
- to_next++;
- n_left_to_next--;
-
- vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
- vlib_buffer_advance (b0, payload_offset);
-
- u16 len0 = vlib_buffer_length_in_chain (vm, b0);
- u32 next0 = VXLAN_FLOW_NEXT_L2_INPUT;
-
- u8 ip_err0 = vxlan_check_ip (b0, len0);
- u8 udp_err0 = vxlan_check_ip_udp_len (b0);
- u8 csum_err0 = vxlan_check_udp_csum (vm, b0);
-
- if (csum_err0)
- csum_err0 = !vxlan_validate_udp_csum (vm, b0);
- if (ip_err0 || udp_err0 || csum_err0)
- {
- next0 = VXLAN_FLOW_NEXT_DROP;
- u8 error0 = vxlan_err_code (ip_err0, udp_err0, csum_err0);
- b0->error = node->errors[error0];
- }
-
- vnet_update_l2_len (b0);
-
- ASSERT (b0->flow_id != 0);
- u32 t_index0 = b0->flow_id - vxm->flow_id_start;
- vxlan_tunnel_t *t0 = &vxm->tunnels[t_index0];
- b0->flow_id = 0;
-
- u32 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX] =
- t0->sw_if_index;
- vlib_increment_combined_counter (rx_counter[next0], thread_index,
- sw_if_index0, 1, len0);
-
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_rx_trace_t *tr =
- vlib_add_trace (vm, node, b0, sizeof *tr);
- u8 error0 = vxlan_err_code (ip_err0, udp_err0, csum_err0);
- tr->next_index = next0;
- tr->error = error0;
- tr->tunnel_index = t_index0;
- tr->vni = t0->vni;
- }
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return f->n_vectors;
-}
-
-/* *INDENT-OFF* */
-#ifndef CLIB_MULTIARCH_VARIANT
-VLIB_REGISTER_NODE (vxlan4_flow_input_node) = {
- .name = "vxlan-flow-input",
- .type = VLIB_NODE_TYPE_INTERNAL,
- .vector_size = sizeof (u32),
-
- .format_trace = format_vxlan_rx_trace,
-
- .n_errors = VXLAN_FLOW_N_ERROR,
- .error_strings = vxlan_flow_error_strings,
-
- .n_next_nodes = VXLAN_FLOW_N_NEXT,
- .next_nodes = {
-#define _(s,n) [VXLAN_FLOW_NEXT_##s] = n,
- foreach_vxlan_flow_input_next
-#undef _
- },
-};
-#endif
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan/dir.dox b/src/vnet/vxlan/dir.dox
deleted file mode 100644
index 31a9e2b6112..00000000000
--- a/src/vnet/vxlan/dir.dox
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (c) 2016 Cisco and/or its affiliates.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
-@dir
-@brief VXLAN Code.
-
-This directory contains source code to support VXLAN.
-
-*/
-/*? %%clicmd:group_label VXLAN CLI %% ?*/
diff --git a/src/vnet/vxlan/encap.c b/src/vnet/vxlan/encap.c
deleted file mode 100644
index 0961a27942d..00000000000
--- a/src/vnet/vxlan/encap.c
+++ /dev/null
@@ -1,540 +0,0 @@
-
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <vppinfra/error.h>
-#include <vppinfra/hash.h>
-#include <vnet/vnet.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/interface_output.h>
-#include <vnet/vxlan/vxlan.h>
-#include <vnet/qos/qos_types.h>
-#include <vnet/adj/rewrite.h>
-
-/* Statistics (not all errors) */
-#define foreach_vxlan_encap_error \
-_(ENCAPSULATED, "good packets encapsulated")
-
-static char *vxlan_encap_error_strings[] = {
-#define _(sym,string) string,
- foreach_vxlan_encap_error
-#undef _
-};
-
-typedef enum
-{
-#define _(sym,str) VXLAN_ENCAP_ERROR_##sym,
- foreach_vxlan_encap_error
-#undef _
- VXLAN_ENCAP_N_ERROR,
-} vxlan_encap_error_t;
-
-typedef enum
-{
- VXLAN_ENCAP_NEXT_DROP,
- VXLAN_ENCAP_N_NEXT,
-} vxlan_encap_next_t;
-
-typedef struct
-{
- u32 tunnel_index;
- u32 vni;
-} vxlan_encap_trace_t;
-
-#ifndef CLIB_MARCH_VARIANT
-u8 *
-format_vxlan_encap_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- vxlan_encap_trace_t *t = va_arg (*args, vxlan_encap_trace_t *);
-
- s = format (s, "VXLAN encap to vxlan_tunnel%d vni %d",
- t->tunnel_index, t->vni);
- return s;
-}
-#endif
-
-always_inline uword
-vxlan_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
- vlib_frame_t *from_frame, u8 is_ip4)
-{
- u32 n_left_from, next_index, *from, *to_next;
- vxlan_main_t *vxm = &vxlan_main;
- vnet_main_t *vnm = vxm->vnet_main;
- vnet_interface_main_t *im = &vnm->interface_main;
- vlib_combined_counter_main_t *tx_counter =
- im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX;
- u32 pkts_encapsulated = 0;
- u32 thread_index = vlib_get_thread_index ();
- u32 sw_if_index0 = 0, sw_if_index1 = 0;
- u32 next0 = 0, next1 = 0;
- vxlan_tunnel_t *t0 = NULL, *t1 = NULL;
- index_t dpoi_idx0 = INDEX_INVALID, dpoi_idx1 = INDEX_INVALID;
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
- vlib_buffer_t **b = bufs;
-
- from = vlib_frame_vector_args (from_frame);
- n_left_from = from_frame->n_vectors;
-
- next_index = node->cached_next_index;
-
- STATIC_ASSERT_SIZEOF (ip6_vxlan_header_t, 56);
- STATIC_ASSERT_SIZEOF (ip4_vxlan_header_t, 36);
-
- u8 const underlay_hdr_len = is_ip4 ?
- sizeof (ip4_vxlan_header_t) : sizeof (ip6_vxlan_header_t);
- u16 const l3_len = is_ip4 ? sizeof (ip4_header_t) : sizeof (ip6_header_t);
- u32 const outer_packet_csum_offload_flags =
- is_ip4 ? (VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM |
- VNET_BUFFER_OFFLOAD_F_TNL_VXLAN) :
- (VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM |
- VNET_BUFFER_OFFLOAD_F_TNL_VXLAN);
-
- vlib_get_buffers (vm, from, bufs, n_left_from);
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- /* Prefetch next iteration. */
- {
- vlib_prefetch_buffer_header (b[2], LOAD);
- vlib_prefetch_buffer_header (b[3], LOAD);
-
- CLIB_PREFETCH (b[2]->data - CLIB_CACHE_LINE_BYTES,
- 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (b[3]->data - CLIB_CACHE_LINE_BYTES,
- 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- }
-
- u32 bi0 = to_next[0] = from[0];
- u32 bi1 = to_next[1] = from[1];
- from += 2;
- to_next += 2;
- n_left_to_next -= 2;
- n_left_from -= 2;
-
- vlib_buffer_t *b0 = b[0];
- vlib_buffer_t *b1 = b[1];
- b += 2;
-
- u32 flow_hash0 = vnet_l2_compute_flow_hash (b0);
- u32 flow_hash1 = vnet_l2_compute_flow_hash (b1);
-
- /* Get next node index and adj index from tunnel next_dpo */
- if (sw_if_index0 != vnet_buffer (b0)->sw_if_index[VLIB_TX])
- {
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
- vnet_hw_interface_t *hi0 =
- vnet_get_sup_hw_interface (vnm, sw_if_index0);
- t0 = &vxm->tunnels[hi0->dev_instance];
- /* Note: change to always set next0 if it may set to drop */
- next0 = t0->next_dpo.dpoi_next_node;
- dpoi_idx0 = t0->next_dpo.dpoi_index;
- }
-
- /* Get next node index and adj index from tunnel next_dpo */
- if (sw_if_index1 != vnet_buffer (b1)->sw_if_index[VLIB_TX])
- {
- if (sw_if_index0 == vnet_buffer (b1)->sw_if_index[VLIB_TX])
- {
- sw_if_index1 = sw_if_index0;
- t1 = t0;
- next1 = next0;
- dpoi_idx1 = dpoi_idx0;
- }
- else
- {
- sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX];
- vnet_hw_interface_t *hi1 =
- vnet_get_sup_hw_interface (vnm, sw_if_index1);
- t1 = &vxm->tunnels[hi1->dev_instance];
- /* Note: change to always set next1 if it may set to drop */
- next1 = t1->next_dpo.dpoi_next_node;
- dpoi_idx1 = t1->next_dpo.dpoi_index;
- }
- }
-
- vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpoi_idx0;
- vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpoi_idx1;
-
- ASSERT (t0->rewrite_header.data_bytes == underlay_hdr_len);
- ASSERT (t1->rewrite_header.data_bytes == underlay_hdr_len);
- vnet_rewrite_two_headers (*t0, *t1, vlib_buffer_get_current (b0),
- vlib_buffer_get_current (b1),
- underlay_hdr_len);
-
- vlib_buffer_advance (b0, -underlay_hdr_len);
- vlib_buffer_advance (b1, -underlay_hdr_len);
-
- u32 len0 = vlib_buffer_length_in_chain (vm, b0);
- u32 len1 = vlib_buffer_length_in_chain (vm, b1);
- u16 payload_l0 = clib_host_to_net_u16 (len0 - l3_len);
- u16 payload_l1 = clib_host_to_net_u16 (len1 - l3_len);
-
- void *underlay0 = vlib_buffer_get_current (b0);
- void *underlay1 = vlib_buffer_get_current (b1);
-
- ip4_header_t *ip4_0, *ip4_1;
- qos_bits_t ip4_0_tos = 0, ip4_1_tos = 0;
- ip6_header_t *ip6_0, *ip6_1;
- udp_header_t *udp0, *udp1;
- u8 *l3_0, *l3_1;
- if (is_ip4)
- {
- ip4_vxlan_header_t *hdr0 = underlay0;
- ip4_vxlan_header_t *hdr1 = underlay1;
-
- /* Fix the IP4 checksum and length */
- ip4_0 = &hdr0->ip4;
- ip4_1 = &hdr1->ip4;
- ip4_0->length = clib_host_to_net_u16 (len0);
- ip4_1->length = clib_host_to_net_u16 (len1);
-
- if (PREDICT_FALSE (b0->flags & VNET_BUFFER_F_QOS_DATA_VALID))
- {
- ip4_0_tos = vnet_buffer2 (b0)->qos.bits;
- ip4_0->tos = ip4_0_tos;
- }
- if (PREDICT_FALSE (b1->flags & VNET_BUFFER_F_QOS_DATA_VALID))
- {
- ip4_1_tos = vnet_buffer2 (b1)->qos.bits;
- ip4_1->tos = ip4_1_tos;
- }
-
- l3_0 = (u8 *) ip4_0;
- l3_1 = (u8 *) ip4_1;
- udp0 = &hdr0->udp;
- udp1 = &hdr1->udp;
- }
- else /* ipv6 */
- {
- ip6_vxlan_header_t *hdr0 = underlay0;
- ip6_vxlan_header_t *hdr1 = underlay1;
-
- /* Fix IP6 payload length */
- ip6_0 = &hdr0->ip6;
- ip6_1 = &hdr1->ip6;
- ip6_0->payload_length = payload_l0;
- ip6_1->payload_length = payload_l1;
-
- l3_0 = (u8 *) ip6_0;
- l3_1 = (u8 *) ip6_1;
- udp0 = &hdr0->udp;
- udp1 = &hdr1->udp;
- }
-
- /* Fix UDP length and set source port */
- udp0->length = payload_l0;
- udp0->src_port = flow_hash0;
- udp1->length = payload_l1;
- udp1->src_port = flow_hash1;
-
- if (b0->flags & VNET_BUFFER_F_OFFLOAD)
- {
- vnet_buffer2 (b0)->outer_l3_hdr_offset = l3_0 - b0->data;
- vnet_buffer2 (b0)->outer_l4_hdr_offset = (u8 *) udp0 - b0->data;
- vnet_buffer_offload_flags_set (b0,
- outer_packet_csum_offload_flags);
- }
- /* IPv4 checksum only */
- else if (is_ip4)
- {
- ip_csum_t sum0 = ip4_0->checksum;
- sum0 = ip_csum_update (sum0, 0, ip4_0->length, ip4_header_t,
- length /* changed member */);
- if (PREDICT_FALSE (ip4_0_tos))
- {
- sum0 = ip_csum_update (sum0, 0, ip4_0_tos, ip4_header_t,
- tos /* changed member */);
- }
- ip4_0->checksum = ip_csum_fold (sum0);
- }
- /* IPv6 UDP checksum is mandatory */
- else
- {
- int bogus = 0;
-
- udp0->checksum =
- ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip6_0, &bogus);
- ASSERT (bogus == 0);
- if (udp0->checksum == 0)
- udp0->checksum = 0xffff;
- }
-
- if (b1->flags & VNET_BUFFER_F_OFFLOAD)
- {
- vnet_buffer2 (b1)->outer_l3_hdr_offset = l3_1 - b1->data;
- vnet_buffer2 (b1)->outer_l4_hdr_offset = (u8 *) udp1 - b1->data;
- vnet_buffer_offload_flags_set (b1,
- outer_packet_csum_offload_flags);
- }
- /* IPv4 checksum only */
- else if (is_ip4)
- {
- ip_csum_t sum1 = ip4_1->checksum;
- sum1 = ip_csum_update (sum1, 0, ip4_1->length, ip4_header_t,
- length /* changed member */);
- if (PREDICT_FALSE (ip4_1_tos))
- {
- sum1 = ip_csum_update (sum1, 0, ip4_1_tos, ip4_header_t,
- tos /* changed member */);
- }
- ip4_1->checksum = ip_csum_fold (sum1);
- }
- /* IPv6 UDP checksum is mandatory */
- else
- {
- int bogus = 0;
-
- udp1->checksum = ip6_tcp_udp_icmp_compute_checksum
- (vm, b1, ip6_1, &bogus);
- ASSERT (bogus == 0);
- if (udp1->checksum == 0)
- udp1->checksum = 0xffff;
- }
-
- /* save inner packet flow_hash for load-balance node */
- vnet_buffer (b0)->ip.flow_hash = flow_hash0;
- vnet_buffer (b1)->ip.flow_hash = flow_hash1;
-
- if (sw_if_index0 == sw_if_index1)
- {
- vlib_increment_combined_counter (tx_counter, thread_index,
- sw_if_index0, 2, len0 + len1);
- }
- else
- {
- vlib_increment_combined_counter (tx_counter, thread_index,
- sw_if_index0, 1, len0);
- vlib_increment_combined_counter (tx_counter, thread_index,
- sw_if_index1, 1, len1);
- }
- pkts_encapsulated += 2;
-
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_encap_trace_t *tr =
- vlib_add_trace (vm, node, b0, sizeof (*tr));
- tr->tunnel_index = t0 - vxm->tunnels;
- tr->vni = t0->vni;
- }
-
- if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_encap_trace_t *tr =
- vlib_add_trace (vm, node, b1, sizeof (*tr));
- tr->tunnel_index = t1 - vxm->tunnels;
- tr->vni = t1->vni;
- }
-
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0 = to_next[0] = from[0];
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- vlib_buffer_t *b0 = b[0];
- b += 1;
-
- u32 flow_hash0 = vnet_l2_compute_flow_hash (b0);
-
- /* Get next node index and adj index from tunnel next_dpo */
- if (sw_if_index0 != vnet_buffer (b0)->sw_if_index[VLIB_TX])
- {
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
- vnet_hw_interface_t *hi0 =
- vnet_get_sup_hw_interface (vnm, sw_if_index0);
- t0 = &vxm->tunnels[hi0->dev_instance];
- /* Note: change to always set next0 if it may be set to drop */
- next0 = t0->next_dpo.dpoi_next_node;
- dpoi_idx0 = t0->next_dpo.dpoi_index;
- }
- vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpoi_idx0;
-
- ASSERT (t0->rewrite_header.data_bytes == underlay_hdr_len);
- vnet_rewrite_one_header (*t0, vlib_buffer_get_current (b0),
- underlay_hdr_len);
-
- vlib_buffer_advance (b0, -underlay_hdr_len);
- void *underlay0 = vlib_buffer_get_current (b0);
-
- u32 len0 = vlib_buffer_length_in_chain (vm, b0);
- u16 payload_l0 = clib_host_to_net_u16 (len0 - l3_len);
-
- udp_header_t *udp0;
- ip4_header_t *ip4_0;
- qos_bits_t ip4_0_tos = 0;
- ip6_header_t *ip6_0;
- u8 *l3_0;
- if (is_ip4)
- {
- ip4_vxlan_header_t *hdr = underlay0;
-
- /* Fix the IP4 checksum and length */
- ip4_0 = &hdr->ip4;
- ip4_0->length = clib_host_to_net_u16 (len0);
-
- if (PREDICT_FALSE (b0->flags & VNET_BUFFER_F_QOS_DATA_VALID))
- {
- ip4_0_tos = vnet_buffer2 (b0)->qos.bits;
- ip4_0->tos = ip4_0_tos;
- }
-
- l3_0 = (u8 *) ip4_0;
- udp0 = &hdr->udp;
- }
- else /* ip6 path */
- {
- ip6_vxlan_header_t *hdr = underlay0;
-
- /* Fix IP6 payload length */
- ip6_0 = &hdr->ip6;
- ip6_0->payload_length = payload_l0;
-
- l3_0 = (u8 *) ip6_0;
- udp0 = &hdr->udp;
- }
-
- /* Fix UDP length and set source port */
- udp0->length = payload_l0;
- udp0->src_port = flow_hash0;
-
- if (b0->flags & VNET_BUFFER_F_OFFLOAD)
- {
- vnet_buffer2 (b0)->outer_l3_hdr_offset = l3_0 - b0->data;
- vnet_buffer2 (b0)->outer_l4_hdr_offset = (u8 *) udp0 - b0->data;
- vnet_buffer_offload_flags_set (b0,
- outer_packet_csum_offload_flags);
- }
- /* IPv4 checksum only */
- else if (is_ip4)
- {
- ip_csum_t sum0 = ip4_0->checksum;
- sum0 = ip_csum_update (sum0, 0, ip4_0->length, ip4_header_t,
- length /* changed member */);
- if (PREDICT_FALSE (ip4_0_tos))
- {
- sum0 = ip_csum_update (sum0, 0, ip4_0_tos, ip4_header_t,
- tos /* changed member */);
- }
- ip4_0->checksum = ip_csum_fold (sum0);
- }
- /* IPv6 UDP checksum is mandatory */
- else
- {
- int bogus = 0;
-
- udp0->checksum = ip6_tcp_udp_icmp_compute_checksum
- (vm, b0, ip6_0, &bogus);
- ASSERT (bogus == 0);
- if (udp0->checksum == 0)
- udp0->checksum = 0xffff;
- }
-
- /* reuse inner packet flow_hash for load-balance node */
- vnet_buffer (b0)->ip.flow_hash = flow_hash0;
-
- vlib_increment_combined_counter (tx_counter, thread_index,
- sw_if_index0, 1, len0);
- pkts_encapsulated++;
-
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_encap_trace_t *tr =
- vlib_add_trace (vm, node, b0, sizeof (*tr));
- tr->tunnel_index = t0 - vxm->tunnels;
- tr->vni = t0->vni;
- }
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- /* Do we still need this now that tunnel tx stats is kept? */
- vlib_node_increment_counter (vm, node->node_index,
- VXLAN_ENCAP_ERROR_ENCAPSULATED,
- pkts_encapsulated);
-
- return from_frame->n_vectors;
-}
-
-VLIB_NODE_FN (vxlan4_encap_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- /* Disable chksum offload as setup overhead in tx node is not worthwhile
- for ip4 header checksum only, unless udp checksum is also required */
- return vxlan_encap_inline (vm, node, from_frame, /* is_ip4 */ 1);
-}
-
-VLIB_NODE_FN (vxlan6_encap_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- /* Enable checksum offload for ip6 as udp checksum is mandatory, */
- return vxlan_encap_inline (vm, node, from_frame, /* is_ip4 */ 0);
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (vxlan4_encap_node) = {
- .name = "vxlan4-encap",
- .vector_size = sizeof (u32),
- .format_trace = format_vxlan_encap_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(vxlan_encap_error_strings),
- .error_strings = vxlan_encap_error_strings,
- .n_next_nodes = VXLAN_ENCAP_N_NEXT,
- .next_nodes = {
- [VXLAN_ENCAP_NEXT_DROP] = "error-drop",
- },
-};
-
-VLIB_REGISTER_NODE (vxlan6_encap_node) = {
- .name = "vxlan6-encap",
- .vector_size = sizeof (u32),
- .format_trace = format_vxlan_encap_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(vxlan_encap_error_strings),
- .error_strings = vxlan_encap_error_strings,
- .n_next_nodes = VXLAN_ENCAP_N_NEXT,
- .next_nodes = {
- [VXLAN_ENCAP_NEXT_DROP] = "error-drop",
- },
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan/vxlan.api b/src/vnet/vxlan/vxlan.api
deleted file mode 100644
index b7e678595d8..00000000000
--- a/src/vnet/vxlan/vxlan.api
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Copyright (c) 2015-2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-option version = "2.1.0";
-
-import "vnet/interface_types.api";
-import "vnet/ip/ip_types.api";
-
-/** \brief Create or delete a VXLAN tunnel
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param is_add - Use 1 to create the tunnel, 0 to remove it
- @param instance - optional unique custom device instance, else ~0.
- @param src_address - Source IP address
- @param dst_address - Destination IP address, can be multicast
- @param mcast_sw_if_index - Interface for multicast destination
- @param encap_vrf_id - Encap route table FIB index
- @param decap_next_index - index of decap next graph node
- @param vni - The VXLAN Network Identifier, uint24
-*/
-define vxlan_add_del_tunnel
-{
- u32 client_index;
- u32 context;
- bool is_add [default=true];
- u32 instance; /* If non-~0, specifies a custom dev instance */
- vl_api_address_t src_address;
- vl_api_address_t dst_address;
- vl_api_interface_index_t mcast_sw_if_index;
- u32 encap_vrf_id;
- u32 decap_next_index;
- u32 vni;
-};
-
-/** \brief Create or delete a VXLAN tunnel
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param is_add - Use 1 to create the tunnel, 0 to remove it
- @param instance - optional unique custom device instance, else ~0.
- @param src_address - Source IP address
- @param dst_address - Destination IP address, can be multicast
- @param src_port - Source UDP port. It is not included in sent packets. Used only for port registration
- @param dst_port - Destination UDP port
- @param mcast_sw_if_index - Interface for multicast destination
- @param encap_vrf_id - Encap route table FIB index
- @param decap_next_index - index of decap next graph node
- @param vni - The VXLAN Network Identifier, uint24
-*/
-define vxlan_add_del_tunnel_v2
-{
- u32 client_index;
- u32 context;
- bool is_add [default=true];
- u32 instance [default=0xffffffff]; /* If non-~0, specifies a custom dev instance */
- vl_api_address_t src_address;
- vl_api_address_t dst_address;
- u16 src_port;
- u16 dst_port;
- vl_api_interface_index_t mcast_sw_if_index;
- u32 encap_vrf_id;
- u32 decap_next_index;
- u32 vni;
-};
-
-/** \brief Create or delete a VXLAN tunnel
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param is_add - Use 1 to create the tunnel, 0 to remove it
- @param instance - optional unique custom device instance, else ~0.
- @param src_address - Source IP address
- @param dst_address - Destination IP address, can be multicast
- @param src_port - Source UDP port. It is not included in sent packets. Used only for port registration
- @param dst_port - Destination UDP port
- @param mcast_sw_if_index - Interface for multicast destination
- @param encap_vrf_id - Encap route table FIB index
- @param decap_next_index - index of decap next graph node
- @param vni - The VXLAN Network Identifier, uint24
- @param is_l3 - if true, create the interface in L3 mode, w/o MAC
-*/
-define vxlan_add_del_tunnel_v3
-{
- u32 client_index;
- u32 context;
- bool is_add [default=true];
- u32 instance [default=0xffffffff]; /* If non-~0, specifies a custom dev instance */
- vl_api_address_t src_address;
- vl_api_address_t dst_address;
- u16 src_port;
- u16 dst_port;
- vl_api_interface_index_t mcast_sw_if_index;
- u32 encap_vrf_id;
- u32 decap_next_index;
- u32 vni;
- bool is_l3 [default=false];
-};
-
-define vxlan_add_del_tunnel_reply
-{
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
-};
-define vxlan_add_del_tunnel_v2_reply
-{
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
-};
-define vxlan_add_del_tunnel_v3_reply
-{
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
-};
-
-define vxlan_tunnel_dump
-{
- u32 client_index;
- u32 context;
- vl_api_interface_index_t sw_if_index;
-};
-define vxlan_tunnel_v2_dump
-{
- u32 client_index;
- u32 context;
- vl_api_interface_index_t sw_if_index;
-};
-
-define vxlan_tunnel_details
-{
- u32 context;
- vl_api_interface_index_t sw_if_index;
- u32 instance;
- vl_api_address_t src_address;
- vl_api_address_t dst_address;
- vl_api_interface_index_t mcast_sw_if_index;
- u32 encap_vrf_id;
- u32 decap_next_index;
- u32 vni;
-};
-define vxlan_tunnel_v2_details
-{
- u32 context;
- vl_api_interface_index_t sw_if_index;
- u32 instance;
- vl_api_address_t src_address;
- vl_api_address_t dst_address;
- u16 src_port;
- u16 dst_port;
- vl_api_interface_index_t mcast_sw_if_index;
- u32 encap_vrf_id;
- u32 decap_next_index;
- u32 vni;
-};
-
-/** \brief Interface set vxlan-bypass request
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param sw_if_index - interface used to reach neighbor
- @param is_ipv6 - if non-zero, enable ipv6-vxlan-bypass, else ipv4-vxlan-bypass
- @param enable - if non-zero enable, else disable
-*/
-autoreply define sw_interface_set_vxlan_bypass
-{
- u32 client_index;
- u32 context;
- vl_api_interface_index_t sw_if_index;
- bool is_ipv6;
- bool enable [default=true];
-};
-
-/** \brief Offload vxlan rx request
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param hw_if_index - rx hw interface
- @param sw_if_index - vxlan interface to offload
- @param enable - if non-zero enable, else disable
-*/
-autoreply define vxlan_offload_rx
-{
- u32 client_index;
- u32 context;
- vl_api_interface_index_t hw_if_index;
- vl_api_interface_index_t sw_if_index;
- bool enable [default=true];
-};
diff --git a/src/vnet/vxlan/vxlan.c b/src/vnet/vxlan/vxlan.c
deleted file mode 100644
index 8568d5b0a98..00000000000
--- a/src/vnet/vxlan/vxlan.c
+++ /dev/null
@@ -1,1343 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <vnet/vxlan/vxlan.h>
-#include <vnet/ip/format.h>
-#include <vnet/fib/fib_entry.h>
-#include <vnet/fib/fib_table.h>
-#include <vnet/fib/fib_entry_track.h>
-#include <vnet/mfib/mfib_table.h>
-#include <vnet/adj/adj_mcast.h>
-#include <vnet/adj/rewrite.h>
-#include <vnet/dpo/drop_dpo.h>
-#include <vnet/interface.h>
-#include <vnet/flow/flow.h>
-#include <vnet/udp/udp_local.h>
-#include <vlib/vlib.h>
-
-/**
- * @file
- * @brief VXLAN.
- *
- * VXLAN provides the features needed to allow L2 bridge domains (BDs)
- * to span multiple servers. This is done by building an L2 overlay on
- * top of an L3 network underlay using VXLAN tunnels.
- *
- * This makes it possible for servers to be co-located in the same data
- * center or be separated geographically as long as they are reachable
- * through the underlay L3 network.
- *
- * You can refer to this kind of L2 overlay bridge domain as a VXLAN
- * (Virtual eXtensible VLAN) segment.
- */
-
-
-vxlan_main_t vxlan_main;
-
-static u32
-vxlan_eth_flag_change (vnet_main_t *vnm, vnet_hw_interface_t *hi, u32 flags)
-{
- /* nothing for now */
- return 0;
-}
-
-static u8 *
-format_decap_next (u8 * s, va_list * args)
-{
- u32 next_index = va_arg (*args, u32);
-
- if (next_index == VXLAN_INPUT_NEXT_DROP)
- return format (s, "drop");
- else
- return format (s, "index %d", next_index);
- return s;
-}
-
-u8 *
-format_vxlan_tunnel (u8 * s, va_list * args)
-{
- vxlan_tunnel_t *t = va_arg (*args, vxlan_tunnel_t *);
-
- s = format (s,
- "[%d] instance %d src %U dst %U src_port %d dst_port %d vni %d "
- "fib-idx %d sw-if-idx %d ",
- t->dev_instance, t->user_instance, format_ip46_address, &t->src,
- IP46_TYPE_ANY, format_ip46_address, &t->dst, IP46_TYPE_ANY,
- t->src_port, t->dst_port, t->vni, t->encap_fib_index,
- t->sw_if_index);
-
- s = format (s, "encap-dpo-idx %d ", t->next_dpo.dpoi_index);
-
- if (PREDICT_FALSE (t->decap_next_index != VXLAN_INPUT_NEXT_L2_INPUT))
- s = format (s, "decap-next-%U ", format_decap_next, t->decap_next_index);
-
- if (PREDICT_FALSE (ip46_address_is_multicast (&t->dst)))
- s = format (s, "mcast-sw-if-idx %d ", t->mcast_sw_if_index);
-
- if (t->flow_index != ~0)
- s = format (s, "flow-index %d [%U]", t->flow_index,
- format_flow_enabled_hw, t->flow_index);
-
- return s;
-}
-
-static u8 *
-format_vxlan_name (u8 * s, va_list * args)
-{
- u32 dev_instance = va_arg (*args, u32);
- vxlan_main_t *vxm = &vxlan_main;
- vxlan_tunnel_t *t;
-
- if (dev_instance == ~0)
- return format (s, "<cached-unused>");
-
- if (dev_instance >= vec_len (vxm->tunnels))
- return format (s, "<improperly-referenced>");
-
- t = pool_elt_at_index (vxm->tunnels, dev_instance);
-
- return format (s, "vxlan_tunnel%d", t->user_instance);
-}
-
-static clib_error_t *
-vxlan_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
-{
- u32 hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
- VNET_HW_INTERFACE_FLAG_LINK_UP : 0;
- vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
-
- return /* no error */ 0;
-}
-
-/* *INDENT-OFF* */
-VNET_DEVICE_CLASS (vxlan_device_class, static) = {
- .name = "VXLAN",
- .format_device_name = format_vxlan_name,
- .format_tx_trace = format_vxlan_encap_trace,
- .admin_up_down_function = vxlan_interface_admin_up_down,
-};
-/* *INDENT-ON* */
-
-static u8 *
-format_vxlan_header_with_length (u8 * s, va_list * args)
-{
- u32 dev_instance = va_arg (*args, u32);
- s = format (s, "unimplemented dev %u", dev_instance);
- return s;
-}
-
-/* *INDENT-OFF* */
-VNET_HW_INTERFACE_CLASS (vxlan_hw_class) = {
- .name = "VXLAN",
- .format_header = format_vxlan_header_with_length,
- .build_rewrite = default_build_rewrite,
-};
-/* *INDENT-ON* */
-
-static void
-vxlan_tunnel_restack_dpo (vxlan_tunnel_t * t)
-{
- u8 is_ip4 = ip46_address_is_ip4 (&t->dst);
- dpo_id_t dpo = DPO_INVALID;
- fib_forward_chain_type_t forw_type = is_ip4 ?
- FIB_FORW_CHAIN_TYPE_UNICAST_IP4 : FIB_FORW_CHAIN_TYPE_UNICAST_IP6;
-
- fib_entry_contribute_forwarding (t->fib_entry_index, forw_type, &dpo);
-
- /* vxlan uses the payload hash as the udp source port
- * hence the packet's hash is unknown
- * skip single bucket load balance dpo's */
- while (DPO_LOAD_BALANCE == dpo.dpoi_type)
- {
- const load_balance_t *lb;
- const dpo_id_t *choice;
-
- lb = load_balance_get (dpo.dpoi_index);
- if (lb->lb_n_buckets > 1)
- break;
-
- choice = load_balance_get_bucket_i (lb, 0);
-
- if (DPO_RECEIVE == choice->dpoi_type)
- dpo_copy (&dpo, drop_dpo_get (choice->dpoi_proto));
- else
- dpo_copy (&dpo, choice);
- }
-
- u32 encap_index = is_ip4 ?
- vxlan4_encap_node.index : vxlan6_encap_node.index;
- dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
- dpo_reset (&dpo);
-}
-
-static vxlan_tunnel_t *
-vxlan_tunnel_from_fib_node (fib_node_t * node)
-{
- ASSERT (FIB_NODE_TYPE_VXLAN_TUNNEL == node->fn_type);
- return ((vxlan_tunnel_t *) (((char *) node) -
- STRUCT_OFFSET_OF (vxlan_tunnel_t, node)));
-}
-
-/**
- * Function definition to backwalk a FIB node -
- * Here we will restack the new dpo of VXLAN DIP to encap node.
- */
-static fib_node_back_walk_rc_t
-vxlan_tunnel_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx)
-{
- vxlan_tunnel_restack_dpo (vxlan_tunnel_from_fib_node (node));
- return (FIB_NODE_BACK_WALK_CONTINUE);
-}
-
-/**
- * Function definition to get a FIB node from its index
- */
-static fib_node_t *
-vxlan_tunnel_fib_node_get (fib_node_index_t index)
-{
- vxlan_tunnel_t *t;
- vxlan_main_t *vxm = &vxlan_main;
-
- t = pool_elt_at_index (vxm->tunnels, index);
-
- return (&t->node);
-}
-
-/**
- * Function definition to inform the FIB node that its last lock has gone.
- */
-static void
-vxlan_tunnel_last_lock_gone (fib_node_t * node)
-{
- /*
- * The VXLAN tunnel is a root of the graph. As such
- * it never has children and thus is never locked.
- */
- ASSERT (0);
-}
-
-/*
- * Virtual function table registered by VXLAN tunnels
- * for participation in the FIB object graph.
- */
-const static fib_node_vft_t vxlan_vft = {
- .fnv_get = vxlan_tunnel_fib_node_get,
- .fnv_last_lock = vxlan_tunnel_last_lock_gone,
- .fnv_back_walk = vxlan_tunnel_back_walk,
-};
-
-#define foreach_copy_field \
- _ (vni) \
- _ (mcast_sw_if_index) \
- _ (encap_fib_index) \
- _ (decap_next_index) \
- _ (src) \
- _ (dst) \
- _ (src_port) \
- _ (dst_port)
-
-static void
-vxlan_rewrite (vxlan_tunnel_t * t, bool is_ip6)
-{
- union
- {
- ip4_vxlan_header_t h4;
- ip6_vxlan_header_t h6;
- } h;
- int len = is_ip6 ? sizeof h.h6 : sizeof h.h4;
-
- udp_header_t *udp;
- vxlan_header_t *vxlan;
- /* Fixed portion of the (outer) ip header */
-
- clib_memset (&h, 0, sizeof (h));
- if (!is_ip6)
- {
- ip4_header_t *ip = &h.h4.ip4;
- udp = &h.h4.udp, vxlan = &h.h4.vxlan;
- ip->ip_version_and_header_length = 0x45;
- ip->ttl = 254;
- ip->protocol = IP_PROTOCOL_UDP;
-
- ip->src_address = t->src.ip4;
- ip->dst_address = t->dst.ip4;
-
- /* we fix up the ip4 header length and checksum after-the-fact */
- ip->checksum = ip4_header_checksum (ip);
- }
- else
- {
- ip6_header_t *ip = &h.h6.ip6;
- udp = &h.h6.udp, vxlan = &h.h6.vxlan;
- ip->ip_version_traffic_class_and_flow_label =
- clib_host_to_net_u32 (6 << 28);
- ip->hop_limit = 255;
- ip->protocol = IP_PROTOCOL_UDP;
-
- ip->src_address = t->src.ip6;
- ip->dst_address = t->dst.ip6;
- }
-
- /* UDP header, randomize src port on something, maybe? */
- udp->src_port = clib_host_to_net_u16 (t->src_port);
- udp->dst_port = clib_host_to_net_u16 (t->dst_port);
-
- /* VXLAN header */
- vnet_set_vni_and_flags (vxlan, t->vni);
- vnet_rewrite_set_data (*t, &h, len);
-}
-
-static bool
-vxlan_decap_next_is_valid (vxlan_main_t * vxm, u32 is_ip6,
- u32 decap_next_index)
-{
- vlib_main_t *vm = vxm->vlib_main;
- u32 input_idx = (!is_ip6) ?
- vxlan4_input_node.index : vxlan6_input_node.index;
- vlib_node_runtime_t *r = vlib_node_get_runtime (vm, input_idx);
-
- return decap_next_index < r->n_next_nodes;
-}
-
-/* *INDENT-OFF* */
-typedef CLIB_PACKED(union
-{
- struct
- {
- fib_node_index_t mfib_entry_index;
- adj_index_t mcast_adj_index;
- };
- u64 as_u64;
-}) mcast_shared_t;
-/* *INDENT-ON* */
-
-static inline mcast_shared_t
-mcast_shared_get (ip46_address_t * ip)
-{
- ASSERT (ip46_address_is_multicast (ip));
- uword *p = hash_get_mem (vxlan_main.mcast_shared, ip);
- ALWAYS_ASSERT (p);
- mcast_shared_t ret = {.as_u64 = *p };
- return ret;
-}
-
-static inline void
-mcast_shared_add (ip46_address_t * dst, fib_node_index_t mfei, adj_index_t ai)
-{
- mcast_shared_t new_ep = {
- .mcast_adj_index = ai,
- .mfib_entry_index = mfei,
- };
-
- hash_set_mem_alloc (&vxlan_main.mcast_shared, dst, new_ep.as_u64);
-}
-
-static inline void
-mcast_shared_remove (ip46_address_t * dst)
-{
- mcast_shared_t ep = mcast_shared_get (dst);
-
- adj_unlock (ep.mcast_adj_index);
- mfib_table_entry_delete_index (ep.mfib_entry_index, MFIB_SOURCE_VXLAN);
-
- hash_unset_mem_free (&vxlan_main.mcast_shared, dst);
-}
-
-int vnet_vxlan_add_del_tunnel
- (vnet_vxlan_add_del_tunnel_args_t * a, u32 * sw_if_indexp)
-{
- vxlan_main_t *vxm = &vxlan_main;
- vnet_main_t *vnm = vxm->vnet_main;
- vxlan_decap_info_t *p;
- u32 sw_if_index = ~0;
- vxlan4_tunnel_key_t key4;
- vxlan6_tunnel_key_t key6;
- u32 is_ip6 = a->is_ip6;
- vlib_main_t *vm = vlib_get_main ();
- u8 hw_addr[6];
-
- /* Set udp-ports */
- if (a->src_port == 0)
- a->src_port = is_ip6 ? UDP_DST_PORT_vxlan6 : UDP_DST_PORT_vxlan;
-
- if (a->dst_port == 0)
- a->dst_port = is_ip6 ? UDP_DST_PORT_vxlan6 : UDP_DST_PORT_vxlan;
-
- int not_found;
- if (!is_ip6)
- {
- /* ip4 mcast is indexed by mcast addr only */
- key4.key[0] = ip46_address_is_multicast (&a->dst) ?
- a->dst.ip4.as_u32 :
- a->dst.ip4.as_u32 | (((u64) a->src.ip4.as_u32) << 32);
- key4.key[1] = ((u64) clib_host_to_net_u16 (a->src_port) << 48) |
- (((u64) a->encap_fib_index) << 32) |
- clib_host_to_net_u32 (a->vni << 8);
- not_found =
- clib_bihash_search_inline_16_8 (&vxm->vxlan4_tunnel_by_key, &key4);
- p = (void *) &key4.value;
- }
- else
- {
- key6.key[0] = a->dst.ip6.as_u64[0];
- key6.key[1] = a->dst.ip6.as_u64[1];
- key6.key[2] = (((u64) clib_host_to_net_u16 (a->src_port) << 48) |
- ((u64) a->encap_fib_index) << 32) |
- clib_host_to_net_u32 (a->vni << 8);
- not_found =
- clib_bihash_search_inline_24_8 (&vxm->vxlan6_tunnel_by_key, &key6);
- p = (void *) &key6.value;
- }
-
- if (not_found)
- p = 0;
-
- if (a->is_add)
- {
- l2input_main_t *l2im = &l2input_main;
- u32 dev_instance; /* real dev instance tunnel index */
- u32 user_instance; /* request and actual instance number */
-
- /* adding a tunnel: tunnel must not already exist */
- if (p)
- return VNET_API_ERROR_TUNNEL_EXIST;
-
- /*if not set explicitly, default to l2 */
- if (a->decap_next_index == ~0)
- a->decap_next_index = VXLAN_INPUT_NEXT_L2_INPUT;
- if (!vxlan_decap_next_is_valid (vxm, is_ip6, a->decap_next_index))
- return VNET_API_ERROR_INVALID_DECAP_NEXT;
-
- vxlan_tunnel_t *t;
- pool_get_aligned (vxm->tunnels, t, CLIB_CACHE_LINE_BYTES);
- clib_memset (t, 0, sizeof (*t));
- dev_instance = t - vxm->tunnels;
-
- /* copy from arg structure */
-#define _(x) t->x = a->x;
- foreach_copy_field;
-#undef _
-
- vxlan_rewrite (t, is_ip6);
- /*
- * Reconcile the real dev_instance and a possible requested instance.
- */
- user_instance = a->instance;
- if (user_instance == ~0)
- user_instance = dev_instance;
- if (hash_get (vxm->instance_used, user_instance))
- {
- pool_put (vxm->tunnels, t);
- return VNET_API_ERROR_INSTANCE_IN_USE;
- }
-
- hash_set (vxm->instance_used, user_instance, 1);
-
- t->dev_instance = dev_instance; /* actual */
- t->user_instance = user_instance; /* name */
- t->flow_index = ~0;
-
- if (a->is_l3)
- t->hw_if_index =
- vnet_register_interface (vnm, vxlan_device_class.index, dev_instance,
- vxlan_hw_class.index, dev_instance);
- else
- {
- f64 now = vlib_time_now (vm);
- u32 rnd;
- rnd = (u32) (now * 1e6);
- rnd = random_u32 (&rnd);
- memcpy (hw_addr + 2, &rnd, sizeof (rnd));
- hw_addr[0] = 2;
- hw_addr[1] = 0xfe;
- if (ethernet_register_interface (
- vnm, vxlan_device_class.index, dev_instance, hw_addr,
- &t->hw_if_index, vxlan_eth_flag_change))
- {
- hash_unset (vxm->instance_used, t->user_instance);
-
- pool_put (vxm->tunnels, t);
- return VNET_API_ERROR_SYSCALL_ERROR_2;
- }
- }
-
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, t->hw_if_index);
-
- /* Set vxlan tunnel output node */
- u32 encap_index = !is_ip6 ?
- vxlan4_encap_node.index : vxlan6_encap_node.index;
- vnet_set_interface_output_node (vnm, t->hw_if_index, encap_index);
-
- t->sw_if_index = sw_if_index = hi->sw_if_index;
-
- /* copy the key */
- int add_failed;
- if (is_ip6)
- {
- key6.value = (u64) dev_instance;
- add_failed = clib_bihash_add_del_24_8 (&vxm->vxlan6_tunnel_by_key,
- &key6, 1 /*add */ );
- }
- else
- {
- vxlan_decap_info_t di = {.sw_if_index = t->sw_if_index, };
- if (ip46_address_is_multicast (&t->dst))
- di.local_ip = t->src.ip4;
- else
- di.next_index = t->decap_next_index;
- key4.value = di.as_u64;
- add_failed = clib_bihash_add_del_16_8 (&vxm->vxlan4_tunnel_by_key,
- &key4, 1 /*add */ );
- }
-
- if (add_failed)
- {
- if (a->is_l3)
- vnet_delete_hw_interface (vnm, t->hw_if_index);
- else
- ethernet_delete_interface (vnm, t->hw_if_index);
- hash_unset (vxm->instance_used, t->user_instance);
- pool_put (vxm->tunnels, t);
- return VNET_API_ERROR_INVALID_REGISTRATION;
- }
-
- vec_validate_init_empty (vxm->tunnel_index_by_sw_if_index, sw_if_index,
- ~0);
- vxm->tunnel_index_by_sw_if_index[sw_if_index] = dev_instance;
-
- /* setup l2 input config with l2 feature and bd 0 to drop packet */
- vec_validate (l2im->configs, sw_if_index);
- l2im->configs[sw_if_index].feature_bitmap = L2INPUT_FEAT_DROP;
- l2im->configs[sw_if_index].bd_index = 0;
-
- vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
- si->flags &= ~VNET_SW_INTERFACE_FLAG_HIDDEN;
- vnet_sw_interface_set_flags (vnm, sw_if_index,
- VNET_SW_INTERFACE_FLAG_ADMIN_UP);
-
- fib_node_init (&t->node, FIB_NODE_TYPE_VXLAN_TUNNEL);
- fib_prefix_t tun_dst_pfx;
- vnet_flood_class_t flood_class = VNET_FLOOD_CLASS_TUNNEL_NORMAL;
-
- fib_prefix_from_ip46_addr (&t->dst, &tun_dst_pfx);
- if (!ip46_address_is_multicast (&t->dst))
- {
- /* Unicast tunnel -
- * source the FIB entry for the tunnel's destination
- * and become a child thereof. The tunnel will then get poked
- * when the forwarding for the entry updates, and the tunnel can
- * re-stack accordingly
- */
- vtep_addr_ref (&vxm->vtep_table, t->encap_fib_index, &t->src);
- t->fib_entry_index = fib_entry_track (t->encap_fib_index,
- &tun_dst_pfx,
- FIB_NODE_TYPE_VXLAN_TUNNEL,
- dev_instance,
- &t->sibling_index);
- vxlan_tunnel_restack_dpo (t);
- }
- else
- {
- /* Multicast tunnel -
- * as the same mcast group can be used for multiple mcast tunnels
- * with different VNIs, create the output fib adjacency only if
- * it does not already exist
- */
- fib_protocol_t fp = fib_ip_proto (is_ip6);
-
- if (vtep_addr_ref (&vxm->vtep_table,
- t->encap_fib_index, &t->dst) == 1)
- {
- fib_node_index_t mfei;
- adj_index_t ai;
- fib_route_path_t path = {
- .frp_proto = fib_proto_to_dpo (fp),
- .frp_addr = zero_addr,
- .frp_sw_if_index = 0xffffffff,
- .frp_fib_index = ~0,
- .frp_weight = 1,
- .frp_flags = FIB_ROUTE_PATH_LOCAL,
- .frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
- };
- const mfib_prefix_t mpfx = {
- .fp_proto = fp,
- .fp_len = (is_ip6 ? 128 : 32),
- .fp_grp_addr = tun_dst_pfx.fp_addr,
- };
-
- /*
- * Setup the (*,G) to receive traffic on the mcast group
- * - the forwarding interface is for-us
- * - the accepting interface is that from the API
- */
- mfib_table_entry_path_update (t->encap_fib_index, &mpfx,
- MFIB_SOURCE_VXLAN,
- MFIB_ENTRY_FLAG_NONE, &path);
-
- path.frp_sw_if_index = a->mcast_sw_if_index;
- path.frp_flags = FIB_ROUTE_PATH_FLAG_NONE;
- path.frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT;
- mfei = mfib_table_entry_path_update (
- t->encap_fib_index, &mpfx, MFIB_SOURCE_VXLAN,
- MFIB_ENTRY_FLAG_NONE, &path);
-
- /*
- * Create the mcast adjacency to send traffic to the group
- */
- ai = adj_mcast_add_or_lock (fp,
- fib_proto_to_link (fp),
- a->mcast_sw_if_index);
-
- /*
- * create a new end-point
- */
- mcast_shared_add (&t->dst, mfei, ai);
- }
-
- dpo_id_t dpo = DPO_INVALID;
- mcast_shared_t ep = mcast_shared_get (&t->dst);
-
- /* Stack shared mcast dst mac addr rewrite on encap */
- dpo_set (&dpo, DPO_ADJACENCY_MCAST,
- fib_proto_to_dpo (fp), ep.mcast_adj_index);
-
- dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
- dpo_reset (&dpo);
- flood_class = VNET_FLOOD_CLASS_TUNNEL_MASTER;
- }
-
- vnet_get_sw_interface (vnet_get_main (), sw_if_index)->flood_class =
- flood_class;
- }
- else
- {
- /* deleting a tunnel: tunnel must exist */
- if (!p)
- return VNET_API_ERROR_NO_SUCH_ENTRY;
-
- u32 instance = is_ip6 ? key6.value :
- vxm->tunnel_index_by_sw_if_index[p->sw_if_index];
- vxlan_tunnel_t *t = pool_elt_at_index (vxm->tunnels, instance);
-
- sw_if_index = t->sw_if_index;
- vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */ );
-
- vxm->tunnel_index_by_sw_if_index[sw_if_index] = ~0;
-
- if (!is_ip6)
- clib_bihash_add_del_16_8 (&vxm->vxlan4_tunnel_by_key, &key4,
- 0 /*del */ );
- else
- clib_bihash_add_del_24_8 (&vxm->vxlan6_tunnel_by_key, &key6,
- 0 /*del */ );
-
- if (!ip46_address_is_multicast (&t->dst))
- {
- if (t->flow_index != ~0)
- vnet_flow_del (vnm, t->flow_index);
-
- vtep_addr_unref (&vxm->vtep_table, t->encap_fib_index, &t->src);
- fib_entry_untrack (t->fib_entry_index, t->sibling_index);
- }
- else if (vtep_addr_unref (&vxm->vtep_table,
- t->encap_fib_index, &t->dst) == 0)
- {
- mcast_shared_remove (&t->dst);
- }
-
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, t->hw_if_index);
- if (hw->dev_class_index == vxlan_device_class.index)
- vnet_delete_hw_interface (vnm, t->hw_if_index);
- else
- ethernet_delete_interface (vnm, t->hw_if_index);
- hash_unset (vxm->instance_used, t->user_instance);
-
- fib_node_deinit (&t->node);
- pool_put (vxm->tunnels, t);
- }
-
- if (sw_if_indexp)
- *sw_if_indexp = sw_if_index;
-
- if (a->is_add)
- {
- /* register udp ports */
- if (!is_ip6 && !udp_is_valid_dst_port (a->src_port, 1))
- udp_register_dst_port (vxm->vlib_main, a->src_port,
- vxlan4_input_node.index, 1);
- if (is_ip6 && !udp_is_valid_dst_port (a->src_port, 0))
- udp_register_dst_port (vxm->vlib_main, a->src_port,
- vxlan6_input_node.index, 0);
- }
-
- return 0;
-}
-
-static uword
-get_decap_next_for_node (u32 node_index, u32 ipv4_set)
-{
- vxlan_main_t *vxm = &vxlan_main;
- vlib_main_t *vm = vxm->vlib_main;
- uword input_node = (ipv4_set) ? vxlan4_input_node.index :
- vxlan6_input_node.index;
-
- return vlib_node_add_next (vm, input_node, node_index);
-}
-
-static uword
-unformat_decap_next (unformat_input_t * input, va_list * args)
-{
- u32 *result = va_arg (*args, u32 *);
- u32 ipv4_set = va_arg (*args, int);
- vxlan_main_t *vxm = &vxlan_main;
- vlib_main_t *vm = vxm->vlib_main;
- u32 node_index;
- u32 tmp;
-
- if (unformat (input, "l2"))
- *result = VXLAN_INPUT_NEXT_L2_INPUT;
- else if (unformat (input, "node %U", unformat_vlib_node, vm, &node_index))
- *result = get_decap_next_for_node (node_index, ipv4_set);
- else if (unformat (input, "%d", &tmp))
- *result = tmp;
- else
- return 0;
- return 1;
-}
-
-static clib_error_t *
-vxlan_add_del_tunnel_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- ip46_address_t src = ip46_address_initializer, dst =
- ip46_address_initializer;
- u8 is_add = 1;
- u8 src_set = 0;
- u8 dst_set = 0;
- u8 grp_set = 0;
- u8 ipv4_set = 0;
- u8 ipv6_set = 0;
- u8 is_l3 = 0;
- u32 instance = ~0;
- u32 encap_fib_index = 0;
- u32 mcast_sw_if_index = ~0;
- u32 decap_next_index = VXLAN_INPUT_NEXT_L2_INPUT;
- u32 vni = 0;
- u32 src_port = 0;
- u32 dst_port = 0;
- u32 table_id;
- clib_error_t *parse_error = NULL;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "del"))
- {
- is_add = 0;
- }
- else if (unformat (line_input, "instance %d", &instance))
- ;
- else if (unformat (line_input, "src %U",
- unformat_ip46_address, &src, IP46_TYPE_ANY))
- {
- src_set = 1;
- ip46_address_is_ip4 (&src) ? (ipv4_set = 1) : (ipv6_set = 1);
- }
- else if (unformat (line_input, "dst %U",
- unformat_ip46_address, &dst, IP46_TYPE_ANY))
- {
- dst_set = 1;
- ip46_address_is_ip4 (&dst) ? (ipv4_set = 1) : (ipv6_set = 1);
- }
- else if (unformat (line_input, "group %U %U",
- unformat_ip46_address, &dst, IP46_TYPE_ANY,
- unformat_vnet_sw_interface,
- vnet_get_main (), &mcast_sw_if_index))
- {
- grp_set = dst_set = 1;
- ip46_address_is_ip4 (&dst) ? (ipv4_set = 1) : (ipv6_set = 1);
- }
- else if (unformat (line_input, "encap-vrf-id %d", &table_id))
- {
- encap_fib_index =
- fib_table_find (fib_ip_proto (ipv6_set), table_id);
- }
- else if (unformat (line_input, "l3"))
- is_l3 = 1;
- else if (unformat (line_input, "decap-next %U", unformat_decap_next,
- &decap_next_index, ipv4_set))
- ;
- else if (unformat (line_input, "vni %d", &vni))
- ;
- else if (unformat (line_input, "src_port %d", &src_port))
- ;
- else if (unformat (line_input, "dst_port %d", &dst_port))
- ;
- else
- {
- parse_error = clib_error_return (0, "parse error: '%U'",
- format_unformat_error, line_input);
- break;
- }
- }
-
- unformat_free (line_input);
-
- if (parse_error)
- return parse_error;
-
- if (is_l3 && decap_next_index == VXLAN_INPUT_NEXT_L2_INPUT)
- {
- vlib_node_t *node = vlib_get_node_by_name (
- vm, (u8 *) (ipv4_set ? "ip4-input" : "ip6-input"));
- decap_next_index = get_decap_next_for_node (node->index, ipv4_set);
- }
-
- if (encap_fib_index == ~0)
- return clib_error_return (0, "nonexistent encap-vrf-id %d", table_id);
-
- if (src_set == 0)
- return clib_error_return (0, "tunnel src address not specified");
-
- if (dst_set == 0)
- return clib_error_return (0, "tunnel dst address not specified");
-
- if (grp_set && !ip46_address_is_multicast (&dst))
- return clib_error_return (0, "tunnel group address not multicast");
-
- if (grp_set == 0 && ip46_address_is_multicast (&dst))
- return clib_error_return (0, "dst address must be unicast");
-
- if (grp_set && mcast_sw_if_index == ~0)
- return clib_error_return (0, "tunnel nonexistent multicast device");
-
- if (ipv4_set && ipv6_set)
- return clib_error_return (0, "both IPv4 and IPv6 addresses specified");
-
- if (ip46_address_cmp (&src, &dst) == 0)
- return clib_error_return (0, "src and dst addresses are identical");
-
- if (decap_next_index == ~0)
- return clib_error_return (0, "next node not found");
-
- if (vni == 0)
- return clib_error_return (0, "vni not specified");
-
- if (vni >> 24)
- return clib_error_return (0, "vni %d out of range", vni);
-
- vnet_vxlan_add_del_tunnel_args_t a = { .is_add = is_add,
- .is_ip6 = ipv6_set,
- .is_l3 = is_l3,
- .instance = instance,
-#define _(x) .x = x,
- foreach_copy_field
-#undef _
- };
-
- u32 tunnel_sw_if_index;
- int rv = vnet_vxlan_add_del_tunnel (&a, &tunnel_sw_if_index);
-
- switch (rv)
- {
- case 0:
- if (is_add)
- vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
- vnet_get_main (), tunnel_sw_if_index);
- break;
-
- case VNET_API_ERROR_TUNNEL_EXIST:
- return clib_error_return (0, "tunnel already exists...");
-
- case VNET_API_ERROR_NO_SUCH_ENTRY:
- return clib_error_return (0, "tunnel does not exist...");
-
- case VNET_API_ERROR_INSTANCE_IN_USE:
- return clib_error_return (0, "Instance is in use");
-
- default:
- return clib_error_return
- (0, "vnet_vxlan_add_del_tunnel returned %d", rv);
- }
-
- return 0;
-}
-
-/*?
- * Add or delete a VXLAN Tunnel.
- *
- * VXLAN provides the features needed to allow L2 bridge domains (BDs)
- * to span multiple servers. This is done by building an L2 overlay on
- * top of an L3 network underlay using VXLAN tunnels.
- *
- * This makes it possible for servers to be co-located in the same data
- * center or be separated geographically as long as they are reachable
- * through the underlay L3 network.
- *
- * You can refer to this kind of L2 overlay bridge domain as a VXLAN
- * (Virtual eXtensible VLAN) segment.
- *
- * @cliexpar
- * Example of how to create a VXLAN Tunnel:
- * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 encap-vrf-id
- 7}
- * Example of how to create a VXLAN Tunnel with a known name, vxlan_tunnel42:
- * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 instance 42}
- * Example of how to create a multicast VXLAN Tunnel with a known name,
- vxlan_tunnel23:
- * @cliexcmd{create vxlan tunnel src 10.0.3.1 group 239.1.1.1
- GigabitEthernet0/8/0 instance 23}
- * Example of how to create a VXLAN Tunnel with custom udp-ports:
- * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 src_port
- 59000 dst_port 59001}
- * Example of how to delete a VXLAN Tunnel:
- * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 del}
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (create_vxlan_tunnel_command, static) = {
- .path = "create vxlan tunnel",
- .short_help =
- "create vxlan tunnel src <local-vtep-addr>"
- " {dst <remote-vtep-addr>|group <mcast-vtep-addr> <intf-name>} vni <nn>"
- " [instance <id>]"
- " [encap-vrf-id <nn>] [decap-next [l2|node <name>]] [del] [l3]"
- " [src_port <local-vtep-udp-port>] [dst_port <remote-vtep-udp-port>]",
- .function = vxlan_add_del_tunnel_command_fn,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-show_vxlan_tunnel_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- vxlan_main_t *vxm = &vxlan_main;
- vxlan_tunnel_t *t;
- int raw = 0;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "raw"))
- raw = 1;
- else
- return clib_error_return (0, "parse error: '%U'",
- format_unformat_error, input);
- }
-
- if (pool_elts (vxm->tunnels) == 0)
- vlib_cli_output (vm, "No vxlan tunnels configured...");
-
-/* *INDENT-OFF* */
- pool_foreach (t, vxm->tunnels)
- {
- vlib_cli_output (vm, "%U", format_vxlan_tunnel, t);
- }
-/* *INDENT-ON* */
-
- if (raw)
- {
- vlib_cli_output (vm, "Raw IPv4 Hash Table:\n%U\n",
- format_bihash_16_8, &vxm->vxlan4_tunnel_by_key,
- 1 /* verbose */ );
- vlib_cli_output (vm, "Raw IPv6 Hash Table:\n%U\n",
- format_bihash_24_8, &vxm->vxlan6_tunnel_by_key,
- 1 /* verbose */ );
- }
-
- return 0;
-}
-
-/*?
- * Display all the VXLAN Tunnel entries.
- *
- * @cliexpar
- * Example of how to display the VXLAN Tunnel entries:
- * @cliexstart{show vxlan tunnel}
- * [0] src 10.0.3.1 dst 10.0.3.3 src_port 4789 dst_port 4789 vni 13
- encap_fib_index 0 sw_if_index 5 decap_next l2
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (show_vxlan_tunnel_command, static) = {
- .path = "show vxlan tunnel",
- .short_help = "show vxlan tunnel [raw]",
- .function = show_vxlan_tunnel_command_fn,
-};
-/* *INDENT-ON* */
-
-
-void
-vnet_int_vxlan_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable)
-{
- vxlan_main_t *vxm = &vxlan_main;
-
- if (pool_is_free_index (vxm->vnet_main->interface_main.sw_interfaces,
- sw_if_index))
- return;
-
- is_enable = ! !is_enable;
-
- if (is_ip6)
- {
- if (clib_bitmap_get (vxm->bm_ip6_bypass_enabled_by_sw_if, sw_if_index)
- != is_enable)
- {
- vnet_feature_enable_disable ("ip6-unicast", "ip6-vxlan-bypass",
- sw_if_index, is_enable, 0, 0);
- vxm->bm_ip6_bypass_enabled_by_sw_if =
- clib_bitmap_set (vxm->bm_ip6_bypass_enabled_by_sw_if,
- sw_if_index, is_enable);
- }
- }
- else
- {
- if (clib_bitmap_get (vxm->bm_ip4_bypass_enabled_by_sw_if, sw_if_index)
- != is_enable)
- {
- vnet_feature_enable_disable ("ip4-unicast", "ip4-vxlan-bypass",
- sw_if_index, is_enable, 0, 0);
- vxm->bm_ip4_bypass_enabled_by_sw_if =
- clib_bitmap_set (vxm->bm_ip4_bypass_enabled_by_sw_if,
- sw_if_index, is_enable);
- }
- }
-}
-
-
-static clib_error_t *
-set_ip_vxlan_bypass (u32 is_ip6,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- vnet_main_t *vnm = vnet_get_main ();
- clib_error_t *error = 0;
- u32 sw_if_index, is_enable;
-
- sw_if_index = ~0;
- is_enable = 1;
-
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat_user
- (line_input, unformat_vnet_sw_interface, vnm, &sw_if_index))
- ;
- else if (unformat (line_input, "del"))
- is_enable = 0;
- else
- {
- error = unformat_parse_error (line_input);
- goto done;
- }
- }
-
- if (~0 == sw_if_index)
- {
- error = clib_error_return (0, "unknown interface `%U'",
- format_unformat_error, line_input);
- goto done;
- }
-
- vnet_int_vxlan_bypass_mode (sw_if_index, is_ip6, is_enable);
-
-done:
- unformat_free (line_input);
-
- return error;
-}
-
-static clib_error_t *
-set_ip4_vxlan_bypass (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- return set_ip_vxlan_bypass (0, input, cmd);
-}
-
-/*?
- * This command adds the 'ip4-vxlan-bypass' graph node for a given interface.
- * By adding the IPv4 vxlan-bypass graph node to an interface, the node checks
- * for and validate input vxlan packet and bypass ip4-lookup, ip4-local,
- * ip4-udp-lookup nodes to speedup vxlan packet forwarding. This node will
- * cause extra overhead to for non-vxlan packets which is kept at a minimum.
- *
- * @cliexpar
- * @parblock
- * Example of graph node before ip4-vxlan-bypass is enabled:
- * @cliexstart{show vlib graph ip4-vxlan-bypass}
- * Name Next Previous
- * ip4-vxlan-bypass error-drop [0]
- * vxlan4-input [1]
- * ip4-lookup [2]
- * @cliexend
- *
- * Example of how to enable ip4-vxlan-bypass on an interface:
- * @cliexcmd{set interface ip vxlan-bypass GigabitEthernet2/0/0}
- *
- * Example of graph node after ip4-vxlan-bypass is enabled:
- * @cliexstart{show vlib graph ip4-vxlan-bypass}
- * Name Next Previous
- * ip4-vxlan-bypass error-drop [0] ip4-input
- * vxlan4-input [1] ip4-input-no-checksum
- * ip4-lookup [2]
- * @cliexend
- *
- * Example of how to display the feature enabled on an interface:
- * @cliexstart{show ip interface features GigabitEthernet2/0/0}
- * IP feature paths configured on GigabitEthernet2/0/0...
- * ...
- * ipv4 unicast:
- * ip4-vxlan-bypass
- * ip4-lookup
- * ...
- * @cliexend
- *
- * Example of how to disable ip4-vxlan-bypass on an interface:
- * @cliexcmd{set interface ip vxlan-bypass GigabitEthernet2/0/0 del}
- * @endparblock
-?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (set_interface_ip_vxlan_bypass_command, static) = {
- .path = "set interface ip vxlan-bypass",
- .function = set_ip4_vxlan_bypass,
- .short_help = "set interface ip vxlan-bypass <interface> [del]",
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-set_ip6_vxlan_bypass (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- return set_ip_vxlan_bypass (1, input, cmd);
-}
-
-/*?
- * This command adds the 'ip6-vxlan-bypass' graph node for a given interface.
- * By adding the IPv6 vxlan-bypass graph node to an interface, the node checks
- * for and validate input vxlan packet and bypass ip6-lookup, ip6-local,
- * ip6-udp-lookup nodes to speedup vxlan packet forwarding. This node will
- * cause extra overhead to for non-vxlan packets which is kept at a minimum.
- *
- * @cliexpar
- * @parblock
- * Example of graph node before ip6-vxlan-bypass is enabled:
- * @cliexstart{show vlib graph ip6-vxlan-bypass}
- * Name Next Previous
- * ip6-vxlan-bypass error-drop [0]
- * vxlan6-input [1]
- * ip6-lookup [2]
- * @cliexend
- *
- * Example of how to enable ip6-vxlan-bypass on an interface:
- * @cliexcmd{set interface ip6 vxlan-bypass GigabitEthernet2/0/0}
- *
- * Example of graph node after ip6-vxlan-bypass is enabled:
- * @cliexstart{show vlib graph ip6-vxlan-bypass}
- * Name Next Previous
- * ip6-vxlan-bypass error-drop [0] ip6-input
- * vxlan6-input [1] ip4-input-no-checksum
- * ip6-lookup [2]
- * @cliexend
- *
- * Example of how to display the feature enabled on an interface:
- * @cliexstart{show ip interface features GigabitEthernet2/0/0}
- * IP feature paths configured on GigabitEthernet2/0/0...
- * ...
- * ipv6 unicast:
- * ip6-vxlan-bypass
- * ip6-lookup
- * ...
- * @cliexend
- *
- * Example of how to disable ip6-vxlan-bypass on an interface:
- * @cliexcmd{set interface ip6 vxlan-bypass GigabitEthernet2/0/0 del}
- * @endparblock
-?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (set_interface_ip6_vxlan_bypass_command, static) = {
- .path = "set interface ip6 vxlan-bypass",
- .function = set_ip6_vxlan_bypass,
- .short_help = "set interface ip6 vxlan-bypass <interface> [del]",
-};
-/* *INDENT-ON* */
-
-int
-vnet_vxlan_add_del_rx_flow (u32 hw_if_index, u32 t_index, int is_add)
-{
- vxlan_main_t *vxm = &vxlan_main;
- vxlan_tunnel_t *t = pool_elt_at_index (vxm->tunnels, t_index);
- vnet_main_t *vnm = vnet_get_main ();
- if (is_add)
- {
- if (t->flow_index == ~0)
- {
- vxlan_main_t *vxm = &vxlan_main;
- vnet_flow_t flow = {
- .actions =
- VNET_FLOW_ACTION_REDIRECT_TO_NODE | VNET_FLOW_ACTION_MARK |
- VNET_FLOW_ACTION_BUFFER_ADVANCE,
- .mark_flow_id = t->dev_instance + vxm->flow_id_start,
- .redirect_node_index = vxlan4_flow_input_node.index,
- .buffer_advance = sizeof (ethernet_header_t),
- .type = VNET_FLOW_TYPE_IP4_VXLAN,
- .ip4_vxlan = {
- .protocol.prot = IP_PROTOCOL_UDP,
- .src_addr.addr = t->dst.ip4,
- .dst_addr.addr = t->src.ip4,
- .src_addr.mask.as_u32 = ~0,
- .dst_addr.mask.as_u32 = ~0,
- .dst_port.port = t->src_port,
- .dst_port.mask = 0xFF,
- .vni = t->vni,
- }
- ,
- };
- vnet_flow_add (vnm, &flow, &t->flow_index);
- }
- return vnet_flow_enable (vnm, t->flow_index, hw_if_index);
- }
- /* flow index is removed when the tunnel is deleted */
- return vnet_flow_disable (vnm, t->flow_index, hw_if_index);
-}
-
-u32
-vnet_vxlan_get_tunnel_index (u32 sw_if_index)
-{
- vxlan_main_t *vxm = &vxlan_main;
-
- if (sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index))
- return ~0;
- return vxm->tunnel_index_by_sw_if_index[sw_if_index];
-}
-
-static clib_error_t *
-vxlan_offload_command_fn (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- vnet_main_t *vnm = vnet_get_main ();
- u32 rx_sw_if_index = ~0;
- u32 hw_if_index = ~0;
- int is_add = 1;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "hw %U", unformat_vnet_hw_interface, vnm,
- &hw_if_index))
- continue;
- if (unformat (line_input, "rx %U", unformat_vnet_sw_interface, vnm,
- &rx_sw_if_index))
- continue;
- if (unformat (line_input, "del"))
- {
- is_add = 0;
- continue;
- }
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
- }
-
- if (rx_sw_if_index == ~0)
- return clib_error_return (0, "missing rx interface");
- if (hw_if_index == ~0)
- return clib_error_return (0, "missing hw interface");
-
- u32 t_index = vnet_vxlan_get_tunnel_index (rx_sw_if_index);;
- if (t_index == ~0)
- return clib_error_return (0, "%U is not a vxlan tunnel",
- format_vnet_sw_if_index_name, vnm,
- rx_sw_if_index);
-
- vxlan_main_t *vxm = &vxlan_main;
- vxlan_tunnel_t *t = pool_elt_at_index (vxm->tunnels, t_index);
-
- if (!ip46_address_is_ip4 (&t->dst))
- return clib_error_return (0, "currently only IPV4 tunnels are supported");
-
- vnet_hw_interface_t *hw_if = vnet_get_hw_interface (vnm, hw_if_index);
- ip4_main_t *im = &ip4_main;
- u32 rx_fib_index =
- vec_elt (im->fib_index_by_sw_if_index, hw_if->sw_if_index);
-
- if (t->encap_fib_index != rx_fib_index)
- return clib_error_return (0, "interface/tunnel fib mismatch");
-
- if (vnet_vxlan_add_del_rx_flow (hw_if_index, t_index, is_add))
- return clib_error_return (0, "error %s flow",
- is_add ? "enabling" : "disabling");
-
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (vxlan_offload_command, static) = {
- .path = "set flow-offload vxlan",
- .short_help =
- "set flow-offload vxlan hw <interface-name> rx <tunnel-name> [del]",
- .function = vxlan_offload_command_fn,
-};
-/* *INDENT-ON* */
-
-#define VXLAN_HASH_NUM_BUCKETS (2 * 1024)
-#define VXLAN_HASH_MEMORY_SIZE (1 << 20)
-
-clib_error_t *
-vxlan_init (vlib_main_t * vm)
-{
- vxlan_main_t *vxm = &vxlan_main;
-
- vxm->vnet_main = vnet_get_main ();
- vxm->vlib_main = vm;
-
- vnet_flow_get_range (vxm->vnet_main, "vxlan", 1024 * 1024,
- &vxm->flow_id_start);
-
- vxm->bm_ip4_bypass_enabled_by_sw_if = 0;
- vxm->bm_ip6_bypass_enabled_by_sw_if = 0;
-
- /* initialize the ip6 hash */
- clib_bihash_init_16_8 (&vxm->vxlan4_tunnel_by_key, "vxlan4",
- VXLAN_HASH_NUM_BUCKETS, VXLAN_HASH_MEMORY_SIZE);
- clib_bihash_init_24_8 (&vxm->vxlan6_tunnel_by_key, "vxlan6",
- VXLAN_HASH_NUM_BUCKETS, VXLAN_HASH_MEMORY_SIZE);
- vxm->vtep_table = vtep_table_create ();
- vxm->mcast_shared = hash_create_mem (0,
- sizeof (ip46_address_t),
- sizeof (mcast_shared_t));
-
- fib_node_register_type (FIB_NODE_TYPE_VXLAN_TUNNEL, &vxlan_vft);
-
- return 0;
-}
-
-VLIB_INIT_FUNCTION (vxlan_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan/vxlan.h b/src/vnet/vxlan/vxlan.h
deleted file mode 100644
index be819ab1069..00000000000
--- a/src/vnet/vxlan/vxlan.h
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef included_vnet_vxlan_h
-#define included_vnet_vxlan_h
-
-#include <vppinfra/error.h>
-#include <vppinfra/hash.h>
-#include <vppinfra/bihash_16_8.h>
-#include <vppinfra/bihash_24_8.h>
-#include <vnet/vnet.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ip/vtep.h>
-#include <vnet/l2/l2_input.h>
-#include <vnet/l2/l2_output.h>
-#include <vnet/l2/l2_bd.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/vxlan/vxlan_packet.h>
-#include <vnet/ip/ip4_packet.h>
-#include <vnet/ip/ip6_packet.h>
-#include <vnet/udp/udp_packet.h>
-#include <vnet/dpo/dpo.h>
-#include <vnet/adj/adj_types.h>
-
-/* *INDENT-OFF* */
-typedef CLIB_PACKED (struct {
- ip4_header_t ip4; /* 20 bytes */
- udp_header_t udp; /* 8 bytes */
- vxlan_header_t vxlan; /* 8 bytes */
-}) ip4_vxlan_header_t;
-
-typedef CLIB_PACKED (struct {
- ip6_header_t ip6; /* 40 bytes */
- udp_header_t udp; /* 8 bytes */
- vxlan_header_t vxlan; /* 8 bytes */
-}) ip6_vxlan_header_t;
-/* *INDENT-ON* */
-
-/*
-* Key fields: remote ip, vni on incoming VXLAN packet
-* all fields in NET byte order
-*/
-typedef clib_bihash_kv_16_8_t vxlan4_tunnel_key_t;
-
-/*
-* Key fields: remote ip, vni and fib index on incoming VXLAN packet
-* ip, vni fields in NET byte order
-* fib index field in host byte order
-*/
-typedef clib_bihash_kv_24_8_t vxlan6_tunnel_key_t;
-
-typedef union
-{
- struct
- {
- u32 sw_if_index; /* unicast - input interface / mcast - stats interface */
- union
- {
- struct /* unicast action */
- {
- u16 next_index;
- u8 error;
- };
- ip4_address_t local_ip; /* used as dst ip for mcast pkts to assign them to unicast tunnel */
- };
- };
- u64 as_u64;
-} vxlan_decap_info_t;
-
-typedef struct
-{
- /* Required for pool_get_aligned */
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
-
- /* FIB DPO for IP forwarding of VXLAN encap packet */
- dpo_id_t next_dpo;
-
- /* vxlan VNI in HOST byte order */
- u32 vni;
-
- /* tunnel src and dst addresses */
- ip46_address_t src;
- ip46_address_t dst;
-
- /* udp-ports */
- u16 src_port;
- u16 dst_port;
-
- /* mcast packet output intfc index (used only if dst is mcast) */
- u32 mcast_sw_if_index;
-
- /* decap next index */
- u16 decap_next_index;
-
- /* The FIB index for src/dst addresses */
- u32 encap_fib_index;
-
- /* vnet intfc index */
- u32 sw_if_index;
- u32 hw_if_index;
-
- /**
- * Linkage into the FIB object graph
- */
- fib_node_t node;
-
- /*
- * The FIB entry for (depending on VXLAN tunnel is unicast or mcast)
- * sending unicast VXLAN encap packets or receiving mcast VXLAN packets
- */
- fib_node_index_t fib_entry_index;
- adj_index_t mcast_adj_index;
-
- /**
- * The tunnel is a child of the FIB entry for its destination. This is
- * so it receives updates when the forwarding information for that entry
- * changes.
- * The tunnels sibling index on the FIB entry's dependency list.
- */
- u32 sibling_index;
-
- u32 flow_index; /* infra flow index */
- u32 dev_instance; /* Real device instance in tunnel vector */
- u32 user_instance; /* Instance name being shown to user */
-
- VNET_DECLARE_REWRITE;
-} vxlan_tunnel_t;
-
-#define foreach_vxlan_input_next \
-_(DROP, "error-drop") \
-_(L2_INPUT, "l2-input")
-
-typedef enum
-{
-#define _(s,n) VXLAN_INPUT_NEXT_##s,
- foreach_vxlan_input_next
-#undef _
- VXLAN_INPUT_N_NEXT,
-} vxlan_input_next_t;
-
-typedef enum
-{
-#define vxlan_error(n,s) VXLAN_ERROR_##n,
-#include <vnet/vxlan/vxlan_error.def>
-#undef vxlan_error
- VXLAN_N_ERROR,
-} vxlan_input_error_t;
-
-typedef struct
-{
- /* vector of encap tunnel instances */
- vxlan_tunnel_t *tunnels;
-
- /* lookup tunnel by key */
- clib_bihash_16_8_t
- vxlan4_tunnel_by_key; /* keyed on ipv4.dst + src_port + fib + vni */
- clib_bihash_24_8_t
- vxlan6_tunnel_by_key; /* keyed on ipv6.dst + src_port + fib + vni */
-
- /* local VTEP IPs ref count used by vxlan-bypass node to check if
- received VXLAN packet DIP matches any local VTEP address */
- vtep_table_t vtep_table;
-
- /* mcast shared info */
- uword *mcast_shared; /* keyed on mcast ip46 addr */
-
- /* Mapping from sw_if_index to tunnel index */
- u32 *tunnel_index_by_sw_if_index;
-
- /* graph node state */
- uword *bm_ip4_bypass_enabled_by_sw_if;
- uword *bm_ip6_bypass_enabled_by_sw_if;
-
- /* convenience */
- vlib_main_t *vlib_main;
- vnet_main_t *vnet_main;
-
- /* Record used instances */
- uword *instance_used;
- u32 flow_id_start;
-
- /* cache for last 8 vxlan tunnel */
-#ifdef CLIB_HAVE_VEC512
- vtep4_cache_t vtep4_u512;
-#endif
-
-} vxlan_main_t;
-
-extern vxlan_main_t vxlan_main;
-
-extern vlib_node_registration_t vxlan4_input_node;
-extern vlib_node_registration_t vxlan6_input_node;
-extern vlib_node_registration_t vxlan4_encap_node;
-extern vlib_node_registration_t vxlan6_encap_node;
-extern vlib_node_registration_t vxlan4_flow_input_node;
-
-u8 *format_vxlan_encap_trace (u8 * s, va_list * args);
-
-typedef struct
-{
- u8 is_add;
-
- /* we normally use is_ip4, but since this adds to the
- * structure, this seems less of a breaking change */
- u8 is_ip6;
- u8 is_l3;
- u32 instance;
- ip46_address_t src, dst;
- u32 mcast_sw_if_index;
- u32 encap_fib_index;
- u32 decap_next_index;
- u32 vni;
- u16 src_port;
- u16 dst_port;
-} vnet_vxlan_add_del_tunnel_args_t;
-
-int vnet_vxlan_add_del_tunnel
- (vnet_vxlan_add_del_tunnel_args_t * a, u32 * sw_if_indexp);
-
-void vnet_int_vxlan_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable);
-
-int vnet_vxlan_add_del_rx_flow (u32 hw_if_index, u32 t_imdex, int is_add);
-
-u32 vnet_vxlan_get_tunnel_index (u32 sw_if_index);
-#endif /* included_vnet_vxlan_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan/vxlan_api.c b/src/vnet/vxlan/vxlan_api.c
deleted file mode 100644
index c97597a2ef2..00000000000
--- a/src/vnet/vxlan/vxlan_api.c
+++ /dev/null
@@ -1,375 +0,0 @@
-/*
- *------------------------------------------------------------------
- * vxlan_api.c - vxlan api
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <vnet/vnet.h>
-#include <vlibmemory/api.h>
-
-#include <vnet/interface.h>
-#include <vnet/api_errno.h>
-#include <vnet/feature/feature.h>
-#include <vnet/vxlan/vxlan.h>
-#include <vnet/fib/fib_table.h>
-#include <vnet/ip/ip_types_api.h>
-#include <vnet/udp/udp_local.h>
-#include <vnet/format_fns.h>
-#include <vxlan/vxlan.api_enum.h>
-#include <vxlan/vxlan.api_types.h>
-
-static u16 msg_id_base;
-
-#define REPLY_MSG_ID_BASE msg_id_base
-#include <vlibapi/api_helper_macros.h>
-
-static void
-vl_api_vxlan_offload_rx_t_handler (vl_api_vxlan_offload_rx_t * mp)
-{
- vl_api_vxlan_offload_rx_reply_t *rmp;
- int rv = 0;
- u32 hw_if_index = ntohl (mp->hw_if_index);
- u32 sw_if_index = ntohl (mp->sw_if_index);
-
- if (!vnet_hw_interface_is_valid (vnet_get_main (), hw_if_index))
- {
- rv = VNET_API_ERROR_NO_SUCH_ENTRY;
- goto err;
- }
- VALIDATE_SW_IF_INDEX (mp);
-
- u32 t_index = vnet_vxlan_get_tunnel_index (sw_if_index);
- if (t_index == ~0)
- {
- rv = VNET_API_ERROR_INVALID_SW_IF_INDEX_2;
- goto err;
- }
-
- vxlan_main_t *vxm = &vxlan_main;
- vxlan_tunnel_t *t = pool_elt_at_index (vxm->tunnels, t_index);
- if (!ip46_address_is_ip4 (&t->dst))
- {
- rv = VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
- goto err;
- }
-
- vnet_main_t *vnm = vnet_get_main ();
- vnet_hw_interface_t *hw_if = vnet_get_hw_interface (vnm, hw_if_index);
- ip4_main_t *im = &ip4_main;
- u32 rx_fib_index =
- vec_elt (im->fib_index_by_sw_if_index, hw_if->sw_if_index);
-
- if (t->encap_fib_index != rx_fib_index)
- {
- rv = VNET_API_ERROR_NO_SUCH_FIB;
- goto err;
- }
-
- if (vnet_vxlan_add_del_rx_flow (hw_if_index, t_index, mp->enable))
- {
- rv = VNET_API_ERROR_UNSPECIFIED;
- goto err;
- }
- BAD_SW_IF_INDEX_LABEL;
-err:
-
- REPLY_MACRO (VL_API_VXLAN_OFFLOAD_RX_REPLY);
-}
-
-static void
- vl_api_sw_interface_set_vxlan_bypass_t_handler
- (vl_api_sw_interface_set_vxlan_bypass_t * mp)
-{
- vl_api_sw_interface_set_vxlan_bypass_reply_t *rmp;
- int rv = 0;
- u32 sw_if_index = ntohl (mp->sw_if_index);
-
- VALIDATE_SW_IF_INDEX (mp);
-
- vnet_int_vxlan_bypass_mode (sw_if_index, mp->is_ipv6, mp->enable);
- BAD_SW_IF_INDEX_LABEL;
-
- REPLY_MACRO (VL_API_SW_INTERFACE_SET_VXLAN_BYPASS_REPLY);
-}
-
-static int
-vxlan_add_del_tunnel_clean_input (vnet_vxlan_add_del_tunnel_args_t *a,
- u32 encap_vrf_id)
-{
- a->is_ip6 = !ip46_address_is_ip4 (&a->src);
-
- a->encap_fib_index = fib_table_find (fib_ip_proto (a->is_ip6), encap_vrf_id);
- if (a->encap_fib_index == ~0)
- {
- return VNET_API_ERROR_NO_SUCH_FIB;
- }
-
- if (ip46_address_is_ip4 (&a->src) != ip46_address_is_ip4 (&a->dst))
- {
- return VNET_API_ERROR_INVALID_VALUE;
- }
-
- /* Check src & dst are different */
- if (ip46_address_cmp (&a->dst, &a->src) == 0)
- {
- return VNET_API_ERROR_SAME_SRC_DST;
- }
- if (ip46_address_is_multicast (&a->dst) &&
- !vnet_sw_if_index_is_api_valid (a->mcast_sw_if_index))
- {
- return VNET_API_ERROR_INVALID_SW_IF_INDEX;
- }
- return 0;
-}
-
-static void
-vl_api_vxlan_add_del_tunnel_t_handler (vl_api_vxlan_add_del_tunnel_t *mp)
-{
- vl_api_vxlan_add_del_tunnel_reply_t *rmp;
- u32 sw_if_index = ~0;
- int rv = 0;
-
- vnet_vxlan_add_del_tunnel_args_t a = {
- .is_add = mp->is_add,
- .instance = ntohl (mp->instance),
- .mcast_sw_if_index = ntohl (mp->mcast_sw_if_index),
- .decap_next_index = ntohl (mp->decap_next_index),
- .vni = ntohl (mp->vni),
- };
- ip_address_decode (&mp->src_address, &a.src);
- ip_address_decode (&mp->dst_address, &a.dst);
-
- rv = vxlan_add_del_tunnel_clean_input (&a, ntohl (mp->encap_vrf_id));
- if (rv)
- goto out;
- a.dst_port = a.is_ip6 ? UDP_DST_PORT_vxlan6 : UDP_DST_PORT_vxlan,
- a.src_port = a.is_ip6 ? UDP_DST_PORT_vxlan6 : UDP_DST_PORT_vxlan,
- rv = vnet_vxlan_add_del_tunnel (&a, &sw_if_index);
-
-out:
- REPLY_MACRO2(VL_API_VXLAN_ADD_DEL_TUNNEL_REPLY,
- ({
- rmp->sw_if_index = ntohl (sw_if_index);
- }));
-}
-
-static void
-vl_api_vxlan_add_del_tunnel_v2_t_handler (vl_api_vxlan_add_del_tunnel_v2_t *mp)
-{
- vl_api_vxlan_add_del_tunnel_v2_reply_t *rmp;
- u32 sw_if_index = ~0;
- int rv = 0;
-
- vnet_vxlan_add_del_tunnel_args_t a = {
- .is_add = mp->is_add,
- .instance = ntohl (mp->instance),
- .mcast_sw_if_index = ntohl (mp->mcast_sw_if_index),
- .decap_next_index = ntohl (mp->decap_next_index),
- .vni = ntohl (mp->vni),
- .dst_port = ntohs (mp->dst_port),
- .src_port = ntohs (mp->src_port),
- };
-
- ip_address_decode (&mp->src_address, &a.src);
- ip_address_decode (&mp->dst_address, &a.dst);
-
- rv = vxlan_add_del_tunnel_clean_input (&a, ntohl (mp->encap_vrf_id));
- if (rv)
- goto out;
- rv = vnet_vxlan_add_del_tunnel (&a, &sw_if_index);
-out:
- REPLY_MACRO2 (VL_API_VXLAN_ADD_DEL_TUNNEL_V2_REPLY,
- ({ rmp->sw_if_index = ntohl (sw_if_index); }));
-}
-
-static void
-vl_api_vxlan_add_del_tunnel_v3_t_handler (vl_api_vxlan_add_del_tunnel_v3_t *mp)
-{
- vl_api_vxlan_add_del_tunnel_v3_reply_t *rmp;
- u32 sw_if_index = ~0;
- int rv = 0;
-
- vnet_vxlan_add_del_tunnel_args_t a = {
- .is_add = mp->is_add,
- .instance = ntohl (mp->instance),
- .mcast_sw_if_index = ntohl (mp->mcast_sw_if_index),
- .decap_next_index = ntohl (mp->decap_next_index),
- .vni = ntohl (mp->vni),
- .dst_port = ntohs (mp->dst_port),
- .src_port = ntohs (mp->src_port),
- .is_l3 = mp->is_l3,
- };
-
- ip_address_decode (&mp->src_address, &a.src);
- ip_address_decode (&mp->dst_address, &a.dst);
-
- rv = vxlan_add_del_tunnel_clean_input (&a, ntohl (mp->encap_vrf_id));
- if (rv)
- goto out;
- rv = vnet_vxlan_add_del_tunnel (&a, &sw_if_index);
-out:
- REPLY_MACRO2 (VL_API_VXLAN_ADD_DEL_TUNNEL_V3_REPLY,
- ({ rmp->sw_if_index = ntohl (sw_if_index); }));
-}
-
-static void send_vxlan_tunnel_details
- (vxlan_tunnel_t * t, vl_api_registration_t * reg, u32 context)
-{
- vl_api_vxlan_tunnel_details_t *rmp;
- ip4_main_t *im4 = &ip4_main;
- ip6_main_t *im6 = &ip6_main;
-
- rmp = vl_msg_api_alloc (sizeof (*rmp));
- clib_memset (rmp, 0, sizeof (*rmp));
- rmp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_VXLAN_TUNNEL_DETAILS);
-
- ip_address_encode (&t->src, IP46_TYPE_ANY, &rmp->src_address);
- ip_address_encode (&t->dst, IP46_TYPE_ANY, &rmp->dst_address);
-
- if (ip46_address_is_ip4 (&t->dst))
- rmp->encap_vrf_id = htonl (im4->fibs[t->encap_fib_index].ft_table_id);
- else
- rmp->encap_vrf_id = htonl (im6->fibs[t->encap_fib_index].ft_table_id);
-
- rmp->instance = htonl (t->user_instance);
- rmp->mcast_sw_if_index = htonl (t->mcast_sw_if_index);
- rmp->vni = htonl (t->vni);
- rmp->decap_next_index = htonl (t->decap_next_index);
- rmp->sw_if_index = htonl (t->sw_if_index);
- rmp->context = context;
-
- vl_api_send_msg (reg, (u8 *) rmp);
-}
-
-static void vl_api_vxlan_tunnel_dump_t_handler
- (vl_api_vxlan_tunnel_dump_t * mp)
-{
- vl_api_registration_t *reg;
- vxlan_main_t *vxm = &vxlan_main;
- vxlan_tunnel_t *t;
- u32 sw_if_index;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- sw_if_index = ntohl (mp->sw_if_index);
-
- if (~0 == sw_if_index)
- {
- pool_foreach (t, vxm->tunnels)
- send_vxlan_tunnel_details(t, reg, mp->context);
- }
- else
- {
- if ((sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index)) ||
- (~0 == vxm->tunnel_index_by_sw_if_index[sw_if_index]))
- {
- return;
- }
- t = &vxm->tunnels[vxm->tunnel_index_by_sw_if_index[sw_if_index]];
- send_vxlan_tunnel_details (t, reg, mp->context);
- }
-}
-
-static void
-send_vxlan_tunnel_v2_details (vxlan_tunnel_t *t, vl_api_registration_t *reg,
- u32 context)
-{
- vl_api_vxlan_tunnel_v2_details_t *rmp;
- ip4_main_t *im4 = &ip4_main;
- ip6_main_t *im6 = &ip6_main;
-
- rmp = vl_msg_api_alloc (sizeof (*rmp));
- clib_memset (rmp, 0, sizeof (*rmp));
- rmp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_VXLAN_TUNNEL_V2_DETAILS);
-
- ip_address_encode (&t->src, IP46_TYPE_ANY, &rmp->src_address);
- ip_address_encode (&t->dst, IP46_TYPE_ANY, &rmp->dst_address);
- rmp->src_port = htons (t->src_port);
- rmp->dst_port = htons (t->dst_port);
-
- if (ip46_address_is_ip4 (&t->dst))
- rmp->encap_vrf_id = htonl (im4->fibs[t->encap_fib_index].ft_table_id);
- else
- rmp->encap_vrf_id = htonl (im6->fibs[t->encap_fib_index].ft_table_id);
-
- rmp->instance = htonl (t->user_instance);
- rmp->mcast_sw_if_index = htonl (t->mcast_sw_if_index);
- rmp->vni = htonl (t->vni);
- rmp->decap_next_index = htonl (t->decap_next_index);
- rmp->sw_if_index = htonl (t->sw_if_index);
- rmp->context = context;
-
- vl_api_send_msg (reg, (u8 *) rmp);
-}
-
-static void
-vl_api_vxlan_tunnel_v2_dump_t_handler (vl_api_vxlan_tunnel_v2_dump_t *mp)
-{
- vl_api_registration_t *reg;
- vxlan_main_t *vxm = &vxlan_main;
- vxlan_tunnel_t *t;
- u32 sw_if_index;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- sw_if_index = ntohl (mp->sw_if_index);
-
- if (~0 == sw_if_index)
- {
- pool_foreach (t, vxm->tunnels)
- send_vxlan_tunnel_v2_details (t, reg, mp->context);
- }
- else
- {
- if ((sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index)) ||
- (~0 == vxm->tunnel_index_by_sw_if_index[sw_if_index]))
- {
- return;
- }
- t = &vxm->tunnels[vxm->tunnel_index_by_sw_if_index[sw_if_index]];
- send_vxlan_tunnel_v2_details (t, reg, mp->context);
- }
-}
-
-#include <vxlan/vxlan.api.c>
-static clib_error_t *
-vxlan_api_hookup (vlib_main_t * vm)
-{
- api_main_t *am = vlibapi_get_main ();
-
- am->api_trace_cfg[VL_API_VXLAN_ADD_DEL_TUNNEL].size += 16 * sizeof (u32);
-
- /*
- * Set up the (msg_name, crc, message-id) table
- */
- msg_id_base = setup_message_id_table ();
-
- return 0;
-}
-
-VLIB_API_INIT_FUNCTION (vxlan_api_hookup);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan/vxlan_error.def b/src/vnet/vxlan/vxlan_error.def
deleted file mode 100644
index 17f905950f5..00000000000
--- a/src/vnet/vxlan/vxlan_error.def
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-vxlan_error (DECAPSULATED, "good packets decapsulated")
-vxlan_error (NO_SUCH_TUNNEL, "no such tunnel packets")
-vxlan_error (BAD_FLAGS, "packets with bad flags field in vxlan header")
diff --git a/src/vnet/vxlan/vxlan_packet.h b/src/vnet/vxlan/vxlan_packet.h
deleted file mode 100644
index d1d1ed813e5..00000000000
--- a/src/vnet/vxlan/vxlan_packet.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __included_vxlan_packet_h__
-#define __included_vxlan_packet_h__ 1
-
-/*
- * From RFC-7348
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * |R|R|R|R|I|R|R|R| Reserved |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | VXLAN Network Identifier (VNI) | Reserved |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *
- * VXLAN Header: This is an 8-byte field that has:
- *
- * - Flags (8 bits): where the I flag MUST be set to 1 for a valid
- * VXLAN Network ID (VNI). The other 7 bits (designated "R") are
- * reserved fields and MUST be set to zero on transmission and
- * ignored on receipt.
- *
- * - VXLAN Segment ID/VXLAN Network Identifier (VNI): this is a
- * 24-bit value used to designate the individual VXLAN overlay
- * network on which the communicating VMs are situated. VMs in
- * different VXLAN overlay networks cannot communicate with each
- * other.
- *
- * - Reserved fields (24 bits and 8 bits): MUST be set to zero on
- * transmission and ignored on receipt.
- *
- */
-
-typedef struct
-{
- u8 flags;
- u8 res1;
- u8 res2;
- u8 res3;
- u32 vni_reserved;
-} vxlan_header_t;
-
-#define VXLAN_FLAGS_I 0x08
-
-static inline u32
-vnet_get_vni (vxlan_header_t * h)
-{
- u32 vni_reserved_host_byte_order;
-
- vni_reserved_host_byte_order = clib_net_to_host_u32 (h->vni_reserved);
- return vni_reserved_host_byte_order >> 8;
-}
-
-static inline void
-vnet_set_vni_and_flags (vxlan_header_t * h, u32 vni)
-{
- h->vni_reserved = clib_host_to_net_u32 (vni << 8);
- *(u32 *) h = 0;
- h->flags = VXLAN_FLAGS_I;
-}
-
-#endif /* __included_vxlan_packet_h__ */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */